cpp-common/bt2c: add `UnicodeConv` class

author Simon Marchi <simon.marchi@efficios.com>

Fri, 3 May 2024 16:54:10 +0000 (12:54 -0400)

committer Simon Marchi <simon.marchi@efficios.com>

Wed, 4 Sep 2024 19:05:14 +0000 (15:05 -0400)
author Simon Marchi <simon.marchi@efficios.com>
Fri, 3 May 2024 16:54:10 +0000 (12:54 -0400)
committer Simon Marchi <simon.marchi@efficios.com>
Wed, 4 Sep 2024 19:05:14 +0000 (15:05 -0400)
diff --git a/src/Makefile.am b/src/Makefile.am

index eeb50a9561bc758eda0c1a60c06cde909e9b5093..1fffd088c84936c27297a87ef12f244dbc5acc33 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -195,6 +195,8 @@ cpp_common_libcpp_common_la_SOURCES = \
         cpp-common/bt2c/text-loc-str.cpp \
         cpp-common/bt2c/text-loc-str.hpp \
         cpp-common/bt2c/type-traits.hpp \
+       cpp-common/bt2c/unicode-conv.cpp \
+       cpp-common/bt2c/unicode-conv.hpp \
         cpp-common/bt2c/uuid.hpp \
         cpp-common/bt2c/val-req.hpp \
         cpp-common/bt2c/vector.hpp \
diff --git a/src/cpp-common/bt2c/unicode-conv.cpp b/src/cpp-common/bt2c/unicode-conv.cpp

new file mode 100644 (file)

index 0000000..11cdd51
--- /dev/null
+++ b/src/cpp-common/bt2c/unicode-conv.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2024 EfficiOS, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <glib.h>
+
+#include "common/assert.h"
+#include "cpp-common/bt2/exc.hpp"
+
+#include "unicode-conv.hpp"
+
+namespace bt2c {
+namespace {
+
+const auto invalidGIConv = reinterpret_cast<GIConv>(-1);
+
+} /* namespace */
+
+UnicodeConv::UnicodeConv(const bt2c::Logger& parentLogger) :
+    _mLogger {parentLogger, "UNICODE-CONV"}, _mUtf16BeToUtf8IConv {invalidGIConv},
+    _mUtf16LeToUtf8IConv {invalidGIConv}, _mUtf32BeToUtf8IConv {invalidGIConv},
+    _mUtf32LeToUtf8IConv {invalidGIConv}
+{
+}
+
+namespace {
+
+void tryCloseGIConv(const GIConv conv) noexcept
+{
+    if (conv != invalidGIConv) {
+        g_iconv_close(conv);
+    }
+};
+
+} /* namespace */
+
+UnicodeConv::~UnicodeConv()
+{
+    tryCloseGIConv(_mUtf16BeToUtf8IConv);
+    tryCloseGIConv(_mUtf16LeToUtf8IConv);
+    tryCloseGIConv(_mUtf32BeToUtf8IConv);
+    tryCloseGIConv(_mUtf32LeToUtf8IConv);
+}
+
+ConstBytes UnicodeConv::_justDoIt(const char * const srcEncoding, GIConv& conv,
+                                  const ConstBytes data, const std::size_t codeUnitSize)
+{
+    /* Create iconv conversion descriptor if not created already */
+    if (conv == invalidGIConv) {
+        conv = g_iconv_open("UTF-8", srcEncoding);
+
+        if (conv == invalidGIConv) {
+            BT_CPPLOGE_ERRNO_APPEND_CAUSE_AND_THROW(bt2::Error, "g_iconv_open() failed",
+                                                    ": from-encoding={}, to-encoding=UTF-8",
+                                                    srcEncoding);
+        }
+    }
+
+    /*
+     * Compute a dumb, but safe upper bound for the UTF-8 output buffer.
+     *
+     * The input string can encode up to `data.size() / codeUnitSize`
+     * codepoints. Then, each code point can take up to four bytes in
+     * UTF-8.
+     */
+    _mBuf.resize(data.size() / codeUnitSize * 4);
+
+    /* Convert */
+    gsize inBytesLeft = data.size();
+    gsize outBytesLeft = _mBuf.size();
+    auto inBuf = const_cast<gchar *>(reinterpret_cast<const gchar *>(data.data()));
+    auto outBuf = reinterpret_cast<gchar *>(_mBuf.data());
+
+    if (g_iconv(conv, &inBuf, &inBytesLeft, &outBuf, &outBytesLeft) == -1) {
+        BT_CPPLOGE_ERRNO_APPEND_CAUSE_AND_THROW(
+            bt2::Error, "g_iconv() failed",
+            ": input-byte-offset={}, from-encoding={}, to-encoding=UTF-8",
+            data.size() - inBytesLeft, srcEncoding);
+    }
+
+    /*
+     * When g_iconv() is successful, assert that it consumed all input.
+     *
+     * The (underlying) iconv() documentation outlines three
+     * failure modes:
+     *
+     * 1. Insufficient output buffer space.
+     * 2. Invalid multibyte sequence in input.
+     * 3. Incomplete multibyte sequence in input.
+     *
+     * For any malformed input, iconv() will return error 2 or 3.
+     *
+     * This suggests that, barring input errors, a successful conversion
+     * will consume all input bytes.
+     */
+    BT_ASSERT(inBytesLeft == 0);
+    return {_mBuf.data(), _mBuf.size() - outBytesLeft};
+}
+
+ConstBytes UnicodeConv::utf8FromUtf16Be(const ConstBytes data)
+{
+    return this->_justDoIt("UTF-16BE", _mUtf16BeToUtf8IConv, data, 2);
+}
+
+ConstBytes UnicodeConv::utf8FromUtf16Le(const ConstBytes data)
+{
+    return this->_justDoIt("UTF-16LE", _mUtf16LeToUtf8IConv, data, 2);
+}
+
+ConstBytes UnicodeConv::utf8FromUtf32Be(const ConstBytes data)
+{
+    return this->_justDoIt("UTF-32BE", _mUtf32BeToUtf8IConv, data, 4);
+}
+
+ConstBytes UnicodeConv::utf8FromUtf32Le(const ConstBytes data)
+{
+    return this->_justDoIt("UTF-32LE", _mUtf32LeToUtf8IConv, data, 4);
+}
+
+} /* namespace bt2c */
diff --git a/src/cpp-common/bt2c/unicode-conv.hpp b/src/cpp-common/bt2c/unicode-conv.hpp

new file mode 100644 (file)

index 0000000..2590097
--- /dev/null
+++ b/src/cpp-common/bt2c/unicode-conv.hpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2024 EfficiOS, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef BABELTRACE_CPP_COMMON_BT2C_UNICODE_CONV_HPP
+#define BABELTRACE_CPP_COMMON_BT2C_UNICODE_CONV_HPP
+
+#include <cstddef>
+#include <vector>
+
+#include <glib.h>
+
+#include "logging.hpp"
+
+#include "aliases.hpp"
+
+namespace bt2c {
+
+/*
+ * A Unicode converter offers the utf8FromUtf*() methods to convert
+ * UTF-16 and UTF-32 data to UTF-8.
+ *
+ * IMPORTANT: The conversion methods aren't thread-safe: a `UnicodeConv`
+ * instance keeps an internal buffer where it writes the resulting UTF-8
+ * data.
+ */
+class UnicodeConv final
+{
+public:
+    explicit UnicodeConv(const bt2c::Logger& parentLogger);
+    ~UnicodeConv();
+
+    /*
+     * Converts the UTF-16BE data `data` to UTF-8 and returns it.
+     *
+     * `data.data()` must not return `nullptr`.
+     *
+     * The returned data belongs to this Unicode converter and remains
+     * valid as long as you don't call another method of this.
+     *
+     * Logs a message, appends a cause to the error of the current
+     * thread, and throws an error if any conversion error occurs,
+     * including incomplete data in `data`.
+     */
+    ConstBytes utf8FromUtf16Be(ConstBytes data);
+
+    /*
+     * Converts the UTF-16LE data `data` to UTF-8 and returns it.
+     *
+     * `data.data()` must not return `nullptr`.
+     *
+     * The returned data belongs to this Unicode converter and remains
+     * valid as long as you don't call another method of this.
+     *
+     * Logs a message, appends a cause to the error of the current
+     * thread, and throws an error if any conversion error occurs,
+     * including incomplete data in `data`.
+     */
+    ConstBytes utf8FromUtf16Le(ConstBytes data);
+
+    /*
+     * Converts the UTF-32BE data `data` to UTF-8 and returns it.
+     *
+     * `data.data()` must not return `nullptr`.
+     *
+     * The returned data belongs to this Unicode converter and remains
+     * valid as long as you don't call another method of this.
+     *
+     * Logs a message, appends a cause to the error of the current
+     * thread, and throws an error if any conversion error occurs,
+     * including incomplete data in `data`.
+     */
+    ConstBytes utf8FromUtf32Be(ConstBytes data);
+
+    /*
+     * Converts the UTF-32LE data `data` to UTF-8 and returns it.
+     *
+     * `data.data()` must not return `nullptr`.
+     *
+     * The returned data belongs to this Unicode converter and remains
+     * valid as long as you don't call another method of this.
+     *
+     * Logs a message, appends a cause to the error of the current
+     * thread, and throws an error if any conversion error occurs,
+     * including incomplete data in `data`.
+     */
+    ConstBytes utf8FromUtf32Le(ConstBytes data);
+
+private:
+    ConstBytes _justDoIt(const char *sourceEncoding, GIConv& converter, const ConstBytes data,
+                         std::size_t codeUnitSize);
+
+    bt2c::Logger _mLogger;
+    GIConv _mUtf16BeToUtf8IConv;
+    GIConv _mUtf16LeToUtf8IConv;
+    GIConv _mUtf32BeToUtf8IConv;
+    GIConv _mUtf32LeToUtf8IConv;
+    std::vector<std::uint8_t> _mBuf;
+};
+
+} /* namespace bt2c */
+
+#endif /* BABELTRACE_CPP_COMMON_BT2C_UNICODE_CONV_HPP */
diff --git a/tests/Makefile.am b/tests/Makefile.am

index b1953af9df95eef896e2c4404f08847e5baf1ff2..0b63ad9dd58079523ab890a0eb2f7c995c9381c7 100644 (file)
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -110,9 +110,22 @@ cpp_common_test_uuid_LDADD = \
         $(COMMON_TEST_LDADD) \
         $(top_builddir)/src/cpp-common/vendor/fmt/libfmt.la
  
+noinst_PROGRAMS += \
+       cpp-common/test-unicode-conv
+
+cpp_common_test_unicode_conv_SOURCES = \
+       cpp-common/test-unicode-conv.cpp
+
+cpp_common_test_unicode_conv_LDADD = \
+       $(top_builddir)/src/cpp-common/vendor/fmt/libfmt.la \
+       $(top_builddir)/src/cpp-common/libcpp-common.la \
+       $(top_builddir)/src/lib/libbabeltrace2.la \
+       $(COMMON_TEST_LDADD)
+
  TESTS_CPP_COMMON = \
         cpp-common/test-c-string-view \
-       cpp-common/test-uuid
+       cpp-common/test-uuid \
+       cpp-common/test-unicode-conv
  
  TESTS_LIB = \
         lib/test-bt-uuid \
diff --git a/tests/cpp-common/test-unicode-conv.cpp b/tests/cpp-common/test-unicode-conv.cpp

new file mode 100644 (file)

index 0000000..b7d910f
--- /dev/null
+++ b/tests/cpp-common/test-unicode-conv.cpp
@@ -0,0 +1,193 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2024 EfficiOS, Inc.
+ */
+
+#include <cstring>
+
+#include "cpp-common/bt2c/call.hpp"
+#include "cpp-common/bt2c/unicode-conv.hpp"
+#include "cpp-common/vendor/fmt/core.h"
+
+#include "tap/tap.h"
+
+namespace {
+
+constexpr std::uint8_t refUtf8String[] = {
+    0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21, 0x20, 0xc3, 0x85,
+    0xc3, 0xa5, 0xc3, 0x89, 0xc3, 0xa9, 0xc3, 0x9c, 0xc3, 0xbc, 0x20, 0xf0, 0x9f, 0x8c, 0x8d, 0xf0,
+    0x9f, 0x9a, 0x80, 0x20, 0xd0, 0x9f, 0xd1, 0x80, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82,
+    0x20, 0xce, 0x93, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xac, 0x20, 0xcf, 0x83, 0xce, 0xbf, 0xcf, 0x85,
+    0x20, 0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd, 0x20, 0xe2, 0x88, 0x91, 0xe2, 0x88, 0x8f, 0x00,
+};
+
+constexpr std::uint8_t utf16BeString[] = {
+    0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x57,
+    0x00, 0x6f, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x64, 0x00, 0x21, 0x00, 0x20, 0x00, 0xc5, 0x00, 0xe5,
+    0x00, 0xc9, 0x00, 0xe9, 0x00, 0xdc, 0x00, 0xfc, 0x00, 0x20, 0xd8, 0x3c, 0xdf, 0x0d, 0xd8, 0x3d,
+    0xde, 0x80, 0x00, 0x20, 0x04, 0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, 0x42,
+    0x00, 0x20, 0x03, 0x93, 0x03, 0xb5, 0x03, 0xb9, 0x03, 0xac, 0x00, 0x20, 0x03, 0xc3, 0x03, 0xbf,
+    0x03, 0xc5, 0x00, 0x20, 0x4f, 0x60, 0x59, 0x7d, 0x00, 0x20, 0x22, 0x11, 0x22, 0x0f, 0x00, 0x00,
+};
+
+constexpr std::uint8_t utf16LeString[] = {
+    0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x57, 0x00,
+    0x6f, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x64, 0x00, 0x21, 0x00, 0x20, 0x00, 0xc5, 0x00, 0xe5, 0x00,
+    0xc9, 0x00, 0xe9, 0x00, 0xdc, 0x00, 0xfc, 0x00, 0x20, 0x00, 0x3c, 0xd8, 0x0d, 0xdf, 0x3d, 0xd8,
+    0x80, 0xde, 0x20, 0x00, 0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, 0x42, 0x04,
+    0x20, 0x00, 0x93, 0x03, 0xb5, 0x03, 0xb9, 0x03, 0xac, 0x03, 0x20, 0x00, 0xc3, 0x03, 0xbf, 0x03,
+    0xc5, 0x03, 0x20, 0x00, 0x60, 0x4f, 0x7d, 0x59, 0x20, 0x00, 0x11, 0x22, 0x0f, 0x22, 0x00, 0x00,
+};
+
+constexpr std::uint8_t utf32BeString[] = {
+    0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x6c,
+    0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x57,
+    0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x64,
+    0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, 0xe5,
+    0x00, 0x00, 0x00, 0xc9, 0x00, 0x00, 0x00, 0xe9, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0xfc,
+    0x00, 0x00, 0x00, 0x20, 0x00, 0x01, 0xf3, 0x0d, 0x00, 0x01, 0xf6, 0x80, 0x00, 0x00, 0x00, 0x20,
+    0x00, 0x00, 0x04, 0x1f, 0x00, 0x00, 0x04, 0x40, 0x00, 0x00, 0x04, 0x38, 0x00, 0x00, 0x04, 0x32,
+    0x00, 0x00, 0x04, 0x35, 0x00, 0x00, 0x04, 0x42, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x03, 0x93,
+    0x00, 0x00, 0x03, 0xb5, 0x00, 0x00, 0x03, 0xb9, 0x00, 0x00, 0x03, 0xac, 0x00, 0x00, 0x00, 0x20,
+    0x00, 0x00, 0x03, 0xc3, 0x00, 0x00, 0x03, 0xbf, 0x00, 0x00, 0x03, 0xc5, 0x00, 0x00, 0x00, 0x20,
+    0x00, 0x00, 0x4f, 0x60, 0x00, 0x00, 0x59, 0x7d, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x22, 0x11,
+    0x00, 0x00, 0x22, 0x0f, 0x00, 0x00, 0x00, 0x00,
+};
+
+constexpr std::uint8_t utf32LeString[] = {
+    0x48, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+    0x6f, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00,
+    0x6f, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xc5, 0x00, 0x00, 0x00, 0xe5, 0x00, 0x00, 0x00,
+    0xc9, 0x00, 0x00, 0x00, 0xe9, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x0d, 0xf3, 0x01, 0x00, 0x80, 0xf6, 0x01, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x1f, 0x04, 0x00, 0x00, 0x40, 0x04, 0x00, 0x00, 0x38, 0x04, 0x00, 0x00, 0x32, 0x04, 0x00, 0x00,
+    0x35, 0x04, 0x00, 0x00, 0x42, 0x04, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x93, 0x03, 0x00, 0x00,
+    0xb5, 0x03, 0x00, 0x00, 0xb9, 0x03, 0x00, 0x00, 0xac, 0x03, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0xc3, 0x03, 0x00, 0x00, 0xbf, 0x03, 0x00, 0x00, 0xc5, 0x03, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x60, 0x4f, 0x00, 0x00, 0x7d, 0x59, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x22, 0x00, 0x00,
+    0x0f, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/*
+ * A UTF-16BE string that abruptly ends in the middle of a code point
+ * (but with complete code units).
+ */
+constexpr std::uint8_t utf16BeTruncCodePoint[] = {
+    0x00, 0x43, 0x00, 0x68, 0x00, 0x61, 0x00, 0x74, 0x00, 0x6f, 0x00, 0x6e, 0x00, 0x20, 0xd8, 0x3d,
+};
+
+/*
+ * A UTF-16BE string that abruptly ends in the middle of a code unit.
+ */
+constexpr std::uint8_t utf16BeTruncCodeUnit[] = {
+    0x00, 0x43, 0x00, 0x68, 0x00, 0x61, 0x00, 0x74, 0x00, 0x6f, 0x00, 0x6e, 0x00,
+};
+
+/*
+ * A UTF-32BE string that abruptly ends in the middle of a code unit.
+ */
+constexpr std::uint8_t utf32BeTruncCodeUnit[] = {
+    0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x6f,
+    0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x64,
+    0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x01, 0xf9,
+};
+
+std::string dump(const bt2c::ConstBytes bytes)
+{
+    std::string res;
+
+    for (const auto byte : bytes) {
+        res += fmt::format("{:02x} ", byte);
+    }
+
+    return res;
+}
+
+/*
+ * Checks that `result` matches `refUtf8String` after a conversion from
+ * the encoding named `sourceEncoding`.
+ */
+void checkPass(const bt2c::ConstBytes result, const char * const sourceEncoding)
+{
+    bool passed = ok(result.size() == sizeof(refUtf8String), "%s to UTF-8: length is expected",
+                     sourceEncoding);
+
+    passed &= ok(std::memcmp(result.data(), refUtf8String,
+                             std::min(result.size(), sizeof(refUtf8String))) == 0,
+                 "%s to UTF-8: content is expected", sourceEncoding);
+
+    if (!passed) {
+        diag("Expected: %s\n", dump(refUtf8String).c_str());
+        diag("Actual:   %s\n", dump(result).c_str());
+    }
+}
+
+/*
+ * Checks that calling `f()` throws `bt2::Error` and appends a cause
+ * having the message `expectedCauseMsg` to the error of the current
+ * thread.
+ */
+template <typename FuncT>
+void checkFail(FuncT&& f, const char * const testName, const bt2c::CStringView expectedCauseMsg)
+{
+    const auto gotError = bt2c::call([&f] {
+        try {
+            f();
+        } catch (const bt2::Error&) {
+            return true;
+        }
+
+        return false;
+    });
+
+    ok(gotError, "%s - got error", testName);
+
+    const auto error = bt_current_thread_take_error();
+    const auto msg = bt_error_cause_get_message(bt_error_borrow_cause_by_index(error, 0));
+
+    if (!ok(expectedCauseMsg == msg, "%s - error cause message is expected", testName)) {
+        diag("Expecting `%s`", msg);
+    }
+
+    bt_error_release(error);
+}
+
+} /* namespace */
+
+int main()
+{
+    plan_tests(14);
+
+    const bt2c::Logger logger {"test-module", "test-tag", bt2c::Logger::Level::None};
+    bt2c::UnicodeConv conv {logger};
+
+    checkPass(conv.utf8FromUtf16Be(utf16BeString), "UTF-16BE");
+    checkPass(conv.utf8FromUtf16Le(utf16LeString), "UTF-16LE");
+    checkPass(conv.utf8FromUtf32Be(utf32BeString), "UTF-32BE");
+    checkPass(conv.utf8FromUtf32Le(utf32LeString), "UTF-32LE");
+
+    checkFail(
+        [&conv] {
+            conv.utf8FromUtf16Be(utf16BeTruncCodePoint);
+        },
+        "truncated code point",
+        "g_iconv() failed: Invalid argument: input-byte-offset=14, from-encoding=UTF-16BE, to-encoding=UTF-8");
+
+    checkFail(
+        [&conv] {
+            conv.utf8FromUtf16Be(utf16BeTruncCodeUnit);
+        },
+        "truncated code unit",
+        "g_iconv() failed: Invalid argument: input-byte-offset=12, from-encoding=UTF-16BE, to-encoding=UTF-8");
+
+    checkFail(
+        [&conv] {
+            conv.utf8FromUtf32Be(utf32BeTruncCodeUnit);
+        },
+        "truncated code unit",
+        "g_iconv() failed: Invalid argument: input-byte-offset=32, from-encoding=UTF-32BE, to-encoding=UTF-8");
+
+    return exit_status();
+}
author	Simon Marchi <simon.marchi@efficios.com>
	Fri, 3 May 2024 16:54:10 +0000 (12:54 -0400)
committer	Simon Marchi <simon.marchi@efficios.com>
	Wed, 4 Sep 2024 19:05:14 +0000 (15:05 -0400)
src/Makefile.am		patch \| blob \| blame \| history
src/cpp-common/bt2c/unicode-conv.cpp	[new file with mode: 0644]	patch \| blob
src/cpp-common/bt2c/unicode-conv.hpp	[new file with mode: 0644]	patch \| blob
tests/Makefile.am		patch \| blob \| blame \| history
tests/cpp-common/test-unicode-conv.cpp	[new file with mode: 0644]	patch \| blob