Version in base suite: 1.1.9+ds1-1 Base version: opencc_1.1.9+ds1-1 Target version: opencc_1.1.9+ds1-1+deb13u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/o/opencc/opencc_1.1.9+ds1-1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/o/opencc/opencc_1.1.9+ds1-1+deb13u1.dsc changelog | 7 patches/0001-Fix-two-out-of-bounds-read-issues-when-handling-trun.patch | 214 ++++++++++ patches/series | 1 3 files changed, 222 insertions(+) dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmpfzxasrvl/opencc_1.1.9+ds1-1.dsc: no acceptable signature found dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmpfzxasrvl/opencc_1.1.9+ds1-1+deb13u1.dsc: no acceptable signature found diff -Nru opencc-1.1.9+ds1/debian/changelog opencc-1.1.9+ds1/debian/changelog --- opencc-1.1.9+ds1/debian/changelog 2024-08-14 18:05:52.000000000 +0000 +++ opencc-1.1.9+ds1/debian/changelog 2026-06-20 16:41:39.000000000 +0000 @@ -1,3 +1,10 @@ +opencc (1.1.9+ds1-1+deb13u1) trixie; urgency=medium + + * Non-maintainer upload. + * CVE-2025-15536: Out-of-bounds read (Closes: #1126286) + + -- Adrian Bunk Sat, 20 Jun 2026 19:41:39 +0300 + opencc (1.1.9+ds1-1) unstable; urgency=medium * Upload to unstable. diff -Nru opencc-1.1.9+ds1/debian/patches/0001-Fix-two-out-of-bounds-read-issues-when-handling-trun.patch opencc-1.1.9+ds1/debian/patches/0001-Fix-two-out-of-bounds-read-issues-when-handling-trun.patch --- opencc-1.1.9+ds1/debian/patches/0001-Fix-two-out-of-bounds-read-issues-when-handling-trun.patch 1970-01-01 00:00:00.000000000 +0000 +++ opencc-1.1.9+ds1/debian/patches/0001-Fix-two-out-of-bounds-read-issues-when-handling-trun.patch 2026-06-20 16:41:14.000000000 +0000 @@ -0,0 +1,214 @@ +From 44b9c01d1875c23c894cf4095350a6c0216ea505 Mon Sep 17 00:00:00 2001 +From: frankslin +Date: Mon, 12 Jan 2026 16:51:38 -0800 +Subject: Fix two out-of-bounds read issues when handling truncated UTF-8 input + (#1005) + +Two independent out-of-bounds read issues were identified in OpenCC's UTF-8 +processing logic when handling malformed or truncated UTF-8 sequences. + +1) MaxMatchSegmentation: + NextCharLength() could return a value larger than the remaining input size. + The previous logic subtracted this value from a size_t length counter, + potentially causing underflow and subsequent out-of-bounds reads. + +2) Conversion: + Similar length handling could allow reads past the end of the input buffer + during dictionary matching, potentially propagating unintended bytes to the + conversion output. + +This patch fixes both issues by: +- Explicitly tracking the end of the input buffer +- Recomputing remaining length on each iteration +- Clamping matched character and key lengths to the remaining buffer size +- Preventing reads past the null terminator + +The changes preserve existing behavior for valid UTF-8 input and add test +coverage for truncated UTF-8 sequences. + +These issues may have security implications when processing untrusted input +and are classified as heap out-of-bounds reads (CWE-125). + +Co-authored-by: Claude +--- + src/Conversion.cpp | 18 ++++++++++++- + src/ConversionTest.cpp | 40 +++++++++++++++++++++++++++ + src/MaxMatchSegmentation.cpp | 10 ++++--- + src/MaxMatchSegmentationTest.cpp | 46 ++++++++++++++++++++++++++++++++ + 4 files changed, 110 insertions(+), 4 deletions(-) + +diff --git a/src/Conversion.cpp b/src/Conversion.cpp +index 87a5135..d5737a1 100644 +--- a/src/Conversion.cpp ++++ b/src/Conversion.cpp +@@ -23,14 +23,30 @@ using namespace opencc; + + std::string Conversion::Convert(const char* phrase) const { + std::ostringstream buffer; ++ // Calculate string end to prevent reading beyond null terminator ++ const char* phraseEnd = phrase; ++ while (*phraseEnd != '\0') { ++ phraseEnd++; ++ } ++ + for (const char* pstr = phrase; *pstr != '\0';) { +- Optional matched = dict->MatchPrefix(pstr); ++ size_t remainingLength = phraseEnd - pstr; ++ Optional matched = dict->MatchPrefix(pstr, remainingLength); + size_t matchedLength; + if (matched.IsNull()) { + matchedLength = UTF8Util::NextCharLength(pstr); ++ // Ensure we don't read beyond the null terminator ++ if (matchedLength > remainingLength) { ++ matchedLength = remainingLength; ++ } + buffer << UTF8Util::FromSubstr(pstr, matchedLength); + } else { + matchedLength = matched.Get()->KeyLength(); ++ // Defensive: ensure dictionary key length does not exceed remaining input ++ // (MatchPrefix should already guarantee this, but defense in depth) ++ if (matchedLength > remainingLength) { ++ matchedLength = remainingLength; ++ } + buffer << matched.Get()->GetDefault(); + } + pstr += matchedLength; +diff --git a/src/ConversionTest.cpp b/src/ConversionTest.cpp +index 04a80a7..fb7731c 100644 +--- a/src/ConversionTest.cpp ++++ b/src/ConversionTest.cpp +@@ -47,4 +47,44 @@ TEST_F(ConversionTest, ConvertCString) { + EXPECT_EQ(expected, converted); + } + ++TEST_F(ConversionTest, TruncatedUtf8Sequence) { ++ // This test specifically triggers the information disclosure vulnerability ++ // in the old code. The bug occurs when a string ends with an incomplete ++ // UTF-8 sequence. ++ // ++ // Background: UTF8Util::NextCharLength() examines only the first byte to ++ // determine the expected character length (1-6 bytes), but doesn't verify ++ // that enough bytes actually remain before the null terminator. ++ // ++ // Trigger condition: When the expected UTF-8 character length exceeds ++ // the actual remaining bytes before null, the old code would: ++ // 1. Call FromSubstr with a length crossing the null terminator ++ // 2. Advance pstr beyond the null terminator ++ // 3. Continue reading heap memory on next iteration ++ // 4. Output leaked heap data to conversion result (INFORMATION DISCLOSURE) ++ ++ // Construct a string ending with a truncated 3-byte UTF-8 sequence: ++ // - Normal text: "干" (valid 3-byte UTF-8: 0xE5 0xB9 0xB2) ++ // - Followed by: 0xE5 0xB9 (incomplete 3-byte sequence - missing last byte) ++ std::string malformed; ++ malformed += utf8("干"); // Valid character ++ malformed += '\xE5'; // Start of 3-byte UTF-8 (NextCharLength returns 3) ++ malformed += '\xB9'; // Second byte ++ // Missing third byte - only 2 bytes remain but NextCharLength expects 3 ++ // Old code would jump over null, read heap memory, and leak it in output ++ ++ // The fixed code should handle this gracefully without information disclosure ++ EXPECT_NO_THROW({ ++ const std::string converted = conversion->Convert(malformed); ++ // Should convert "干" to "幹" (first candidate in dict) and preserve incomplete sequence ++ std::string expected; ++ expected += utf8("幹"); // Converted from "干" (dict has ["幹", "乾", "干"]) ++ expected += '\xE5'; // Incomplete sequence preserved as-is ++ expected += '\xB9'; ++ EXPECT_EQ(expected, converted); ++ // Should NOT contain garbage heap data beyond the input ++ // (ASan would catch any out-of-bounds reads during conversion) ++ }); ++} ++ + } // namespace opencc +diff --git a/src/MaxMatchSegmentation.cpp b/src/MaxMatchSegmentation.cpp +index 5cdd79f..ff24e0a 100644 +--- a/src/MaxMatchSegmentation.cpp ++++ b/src/MaxMatchSegmentation.cpp +@@ -30,12 +30,17 @@ SegmentsPtr MaxMatchSegmentation::Segment(const std::string& text) const { + segLength = 0; + } + }; +- size_t length = text.length(); ++ const char* textEnd = text.c_str() + text.length(); + for (const char* pstr = text.c_str(); *pstr != '\0';) { +- const Optional& matched = dict->MatchPrefix(pstr, length); ++ size_t remainingLength = textEnd - pstr; ++ const Optional& matched = dict->MatchPrefix(pstr, remainingLength); + size_t matchedLength; + if (matched.IsNull()) { + matchedLength = UTF8Util::NextCharLength(pstr); ++ // Ensure we don't advance beyond the string boundary ++ if (matchedLength > remainingLength) { ++ matchedLength = remainingLength; ++ } + segLength += matchedLength; + } else { + clearBuffer(); +@@ -44,7 +49,6 @@ SegmentsPtr MaxMatchSegmentation::Segment(const std::string& text) const { + segStart = pstr + matchedLength; + } + pstr += matchedLength; +- length -= matchedLength; + } + clearBuffer(); + return segments; +diff --git a/src/MaxMatchSegmentationTest.cpp b/src/MaxMatchSegmentationTest.cpp +index 775c7ef..c1c9e35 100644 +--- a/src/MaxMatchSegmentationTest.cpp ++++ b/src/MaxMatchSegmentationTest.cpp +@@ -43,4 +43,50 @@ TEST_F(MaxMatchSegmentationTest, Segment) { + EXPECT_EQ(utf8("干燥"), std::string(segments->At(3))); + } + ++TEST_F(MaxMatchSegmentationTest, EmptyString) { ++ const auto& segments = segmenter->Segment(""); ++ EXPECT_EQ(0, segments->Length()); ++} ++ ++TEST_F(MaxMatchSegmentationTest, SingleCharacter) { ++ const auto& segments = segmenter->Segment(utf8("一")); ++ EXPECT_EQ(1, segments->Length()); ++ EXPECT_EQ(utf8("一"), std::string(segments->At(0))); ++} ++ ++TEST_F(MaxMatchSegmentationTest, TruncatedUtf8Sequence) { ++ // This test specifically triggers the buffer overflow bug in the old code. ++ // The bug occurs when a string ends with an incomplete UTF-8 sequence. ++ // ++ // Background: UTF8Util::NextCharLength() examines only the first byte to ++ // determine the expected character length (1-6 bytes), but doesn't verify ++ // that enough bytes actually remain in the buffer. ++ // ++ // Trigger condition: When the expected UTF-8 character length exceeds ++ // the actual remaining bytes, the old code's "length -= matchedLength" ++ // causes integer underflow (size_t wraps around to a huge value), leading ++ // to out-of-bounds reads in the next MatchPrefix() call. ++ ++ // Construct a string ending with a truncated 3-byte UTF-8 sequence: ++ // - Normal text: "一" (valid 3-byte UTF-8: 0xE4 0xB8 0x80) ++ // - Followed by: 0xE4 0xB8 (incomplete 3-byte sequence - missing last byte) ++ std::string malformed; ++ malformed += utf8("一"); // Valid character ++ malformed += '\xE4'; // Start of 3-byte UTF-8 (NextCharLength returns 3) ++ malformed += '\xB8'; // Second byte ++ // Missing third byte - only 2 bytes remain but NextCharLength expects 3 ++ // Old code: length=2, matchedLength=3 → length = 2-3 = SIZE_MAX (underflow) ++ ++ // The fixed code should handle this gracefully without buffer overflow ++ EXPECT_NO_THROW({ ++ const auto& segments = segmenter->Segment(malformed); ++ // The valid character "一" plus the incomplete sequence form a single segment ++ // (incomplete sequence doesn't match dictionary, gets accumulated with previous) ++ EXPECT_EQ(1, segments->Length()); ++ // Output should preserve all input bytes (including incomplete sequence) ++ // This is correct behavior - we don't discard data, we preserve it ++ EXPECT_EQ(malformed, std::string(segments->At(0))); ++ }); ++} ++ + } // namespace opencc +-- +2.47.3 + diff -Nru opencc-1.1.9+ds1/debian/patches/series opencc-1.1.9+ds1/debian/patches/series --- opencc-1.1.9+ds1/debian/patches/series 2024-08-13 16:52:02.000000000 +0000 +++ opencc-1.1.9+ds1/debian/patches/series 2026-06-20 16:41:34.000000000 +0000 @@ -2,3 +2,4 @@ 0003-no-remote-images-when-reading-docs-on-disk.patch 0004-Use-system-googletest.patch 0005-Disable-build-in-setup.py.patch +0001-Fix-two-out-of-bounds-read-issues-when-handling-trun.patch