Version in base suite: 2.0207+dfsg+really+2.0134-1 Base version: libxml-libxml-perl_2.0207+dfsg+really+2.0134-1 Target version: libxml-libxml-perl_2.0207+dfsg+really+2.0134-1+deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/libx/libxml-libxml-perl/libxml-libxml-perl_2.0207+dfsg+really+2.0134-1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/libx/libxml-libxml-perl/libxml-libxml-perl_2.0207+dfsg+really+2.0134-1+deb12u1.dsc changelog | 8 patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch | 327 ++++++++++ patches/series | 1 3 files changed, 336 insertions(+) dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp_9qj7irs/libxml-libxml-perl_2.0207+dfsg+really+2.0134-1.dsc: no acceptable signature found dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp_9qj7irs/libxml-libxml-perl_2.0207+dfsg+really+2.0134-1+deb12u1.dsc: no acceptable signature found diff -Nru libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog --- libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog 2022-02-11 19:29:49.000000000 +0000 +++ libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog 2026-06-27 11:41:54.000000000 +0000 @@ -1,3 +1,11 @@ +libxml-libxml-perl (2.0207+dfsg+really+2.0134-1+deb12u1) bookworm; urgency=medium + + * Team upload. + * fix: replace domParseChar with xmlValidateName to prevent OOB UTF-8 read + (CVE-2026-8177) (Closes: #1136300) + + -- Salvatore Bonaccorso Sat, 27 Jun 2026 13:41:54 +0200 + libxml-libxml-perl (2.0207+dfsg+really+2.0134-1) unstable; urgency=medium * Team upload diff -Nru libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch --- libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch 1970-01-01 00:00:00.000000000 +0000 +++ libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch 2026-06-27 11:41:54.000000000 +0000 @@ -0,0 +1,327 @@ +From: Toddr Bot +Date: Tue, 19 May 2026 19:32:08 +0000 +Subject: fix: replace domParseChar with xmlValidateName to prevent OOB UTF-8 + read +Origin: https://github.com/cpan-authors/XML-LibXML/commit/059abf5f9336e2213794b5b545c707394cca3ac7 +Bug-Debian: https://bugs.debian.org/1136300 +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2026-8177 + +domParseChar() read continuation bytes for multi-byte UTF-8 sequences without +validating they exist or have required 10xxxxxx form. Truncated sequences like +"a\xF0" caused heap reads past NUL terminator. Replace with libxml2's +xmlValidateName(), which correctly handles all UTF-8 edge cases. + +Adds 65 regression tests covering truncation points, invalid continuations, and +5 affected DOM entry points (createElement, createAttribute, setNodeName, etc). + +Fixes https://github.com/cpan-authors/XML-LibXML/issues/146 +--- + .gitignore | 1 + + LibXML.xs | 33 +------------ + MANIFEST | 1 + + dom.c | 88 --------------------------------- + dom.h | 32 ------------ + t/48_security_oob_utf8_gh146.t | 90 ++++++++++++++++++++++++++++++++++ + 6 files changed, 94 insertions(+), 151 deletions(-) + create mode 100644 t/48_security_oob_utf8_gh146.t + +diff --git a/LibXML.xs b/LibXML.xs +index df23bc7d292e..6f18e89c83e8 100644 +--- a/LibXML.xs ++++ b/LibXML.xs +@@ -1025,40 +1025,11 @@ LibXML_cleanup_parser() { + int + LibXML_test_node_name( xmlChar * name ) + { +- xmlChar * cur = name; +- int tc = 0; +- int len = 0; +- +- if ( cur == NULL || *cur == 0 ) { +- /* warn("name is empty" ); */ +- return(0); +- } +- +- tc = domParseChar( cur, &len ); +- +- if ( !( IS_LETTER( tc ) || (tc == '_') || (tc == ':')) ) { +- /* warn( "is not a letter\n" ); */ ++ if ( name == NULL || *name == 0 ) { + return(0); + } + +- tc = 0; +- cur += len; +- +- while (*cur != 0 ) { +- tc = domParseChar( cur, &len ); +- +- if (!(IS_LETTER(tc) || IS_DIGIT(tc) || (tc == '_') || +- (tc == '-') || (tc == ':') || (tc == '.') || +- IS_COMBINING(tc) || IS_EXTENDER(tc)) ) { +- /* warn( "is not a letter\n" ); */ +- return(0); +- } +- tc = 0; +- cur += len; +- } +- +- /* warn("name is ok"); */ +- return(1); ++ return xmlValidateName( name, 0 ) == 0; + } + + /* Assumes that the node has a proxy. */ +diff --git a/MANIFEST b/MANIFEST +index 55e093298936..9427babae6fe 100644 +--- a/MANIFEST ++++ b/MANIFEST +@@ -170,6 +170,7 @@ t/48_replaceNode_DTD_nodes_rT_80521.t + t/48_rt123379_setNamespace.t + t/48_rt55000.t + t/48_rt93429_recover_2_in_html_parsing.t ++t/48_security_oob_utf8_gh146.t + t/48importing_nodes_IDs_rt_69520.t + t/49_load_html.t + t/49callbacks_returning_undef.t +diff --git a/dom.c b/dom.c +index 94518b0aea29..a93cce30d940 100644 +--- a/dom.c ++++ b/dom.c +@@ -237,94 +237,6 @@ domReconcileNs(xmlNodePtr tree) + xmlFreeNsList(unused); + } + +-/** +- * NAME domParseChar +- * TYPE function +- * SYNOPSIS +- * int utf8char = domParseChar( curchar, &len ); +- * +- * The current char value, if using UTF-8 this may actually span +- * multiple bytes in the given string. This function parses an utf8 +- * character from a string into a UTF8 character (an integer). It uses +- * a slightly modified version of libxml2's character parser. libxml2 +- * itself does not provide any function to parse characters dircetly +- * from a string and test if they are valid utf8 characters. +- * +- * XML::LibXML uses this function rather than perls native UTF8 +- * support for two reasons: +- * 1) perls UTF8 handling functions often lead to encoding errors, +- * which partly comes, that they are badly documented. +- * 2) not all perl versions XML::LibXML intends to run with have native +- * UTF8 support. +- * +- * domParseChar() allows to use the very same code with all versions +- * of perl :) +- * +- * Returns the current char value and its length +- * +- * NOTE: If the character passed to this function is not a UTF +- * character, the return value will be 0 and the length of the +- * character is -1! +- */ +-int +-domParseChar( xmlChar *cur, int *len ) +-{ +- unsigned char c; +- unsigned int val; +- +- /* +- * We are supposed to handle UTF8, check it's valid +- * From rfc2044: encoding of the Unicode values on UTF-8: +- * +- * UCS-4 range (hex.) UTF-8 octet sequence (binary) +- * 0000 0000-0000 007F 0xxxxxxx +- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx +- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx +- * +- * Check for the 0x110000 limit too +- */ +- +- if ( cur == NULL || *cur == 0 ) { +- *len = 0; +- return(0); +- } +- +- c = *cur; +- if ( c & 0x80 ) { +- if ((c & 0xe0) == 0xe0) { +- if ((c & 0xf0) == 0xf0) { +- /* 4-byte code */ +- *len = 4; +- val = (cur[0] & 0x7) << 18; +- val |= (cur[1] & 0x3f) << 12; +- val |= (cur[2] & 0x3f) << 6; +- val |= cur[3] & 0x3f; +- } else { +- /* 3-byte code */ +- *len = 3; +- val = (cur[0] & 0xf) << 12; +- val |= (cur[1] & 0x3f) << 6; +- val |= cur[2] & 0x3f; +- } +- } else { +- /* 2-byte code */ +- *len = 2; +- val = (cur[0] & 0x1f) << 6; +- val |= cur[1] & 0x3f; +- } +- if ( !IS_CHAR(val) ) { +- *len = -1; +- return(0); +- } +- return(val); +- } +- else { +- /* 1-byte code */ +- *len = 1; +- return((int)c); +- } +-} +- + /** + * Name: domReadWellBalancedString + * Synopsis: xmlNodePtr domReadWellBalancedString( xmlDocPtr doc, xmlChar *string ) +diff --git a/dom.h b/dom.h +index 428b685e1e7a..32936ee3ab3a 100644 +--- a/dom.h ++++ b/dom.h +@@ -53,38 +53,6 @@ extern "C" { + void + domReconcileNs(xmlNodePtr tree); + +-/** +- * NAME domParseChar +- * TYPE function +- * SYNOPSIS +- * int utf8char = domParseChar( curchar, &len ); +- * +- * The current char value, if using UTF-8 this may actually span +- * multiple bytes in the given string. This function parses an utf8 +- * character from a string into a UTF8 character (an integer). It uses +- * a slightly modified version of libxml2's character parser. libxml2 +- * itself does not provide any function to parse characters dircetly +- * from a string and test if they are valid utf8 characters. +- * +- * XML::LibXML uses this function rather than perls native UTF8 +- * support for two reasons: +- * 1) perls UTF8 handling functions often lead to encoding errors, +- * which partly comes, that they are badly documented. +- * 2) not all perl versions XML::LibXML intends to run with have native +- * UTF8 support. +- * +- * domParseChar() allows to use the very same code with all versions +- * of perl :) +- * +- * Returns the current char value and its length +- * +- * NOTE: If the character passed to this function is not a UTF +- * character, the return value will be 0 and the length of the +- * character is -1! +- */ +-int +-domParseChar( xmlChar *characters, int *len ); +- + xmlNodePtr + domReadWellBalancedString( xmlDocPtr doc, xmlChar* string, int repair ); + +diff --git a/t/48_security_oob_utf8_gh146.t b/t/48_security_oob_utf8_gh146.t +new file mode 100644 +index 000000000000..74fb6d45697d +--- /dev/null ++++ b/t/48_security_oob_utf8_gh146.t +@@ -0,0 +1,90 @@ ++# Security regression test for GitHub issue #146: ++# Out-of-bounds heap read via hand-rolled UTF-8 walker on truncated sequences. ++# ++# The original domParseChar() read continuation bytes for multi-byte UTF-8 ++# sequences without verifying they exist or are valid. A truncated sequence ++# (e.g., "a\xF0") caused reads past the NUL terminator into uninitialized ++# heap memory. This affects all DOM methods that validate node names via ++# LibXML_test_node_name(): createElement, createAttribute, setNodeName, ++# createElementNS, createAttributeNS, etc. ++# ++# Impact: denial of service (crash on unmapped memory) and potential ++# information disclosure (reading adjacent heap allocations). ++# ++# Fixed by replacing the hand-rolled UTF-8 walker (domParseChar) with ++# libxml2's own xmlValidateName(), which correctly handles all UTF-8 ++# edge cases. ++# ++# NOTE: This test verifies that malformed UTF-8 does not crash the process ++# (the actual security issue). Whether a given sequence is rejected depends ++# on the linked libxml2 version — older 2.9.x builds may accept some ++# sequences that newer versions reject. ++ ++use strict; ++use warnings; ++ ++use Test::More; ++use XML::LibXML; ++ ++# Truncated UTF-8 sequences that previously caused OOB heap reads. ++# Each entry: [ bytes, description ] ++my @truncated_sequences = ( ++ [ "a\xC0", "truncated 2-byte (leader only)" ], ++ [ "a\xC2", "truncated 2-byte (valid leader, missing continuation)" ], ++ [ "a\xE0", "truncated 3-byte (leader only)" ], ++ [ "a\xE0\x80", "truncated 3-byte (leader + 1 continuation)" ], ++ [ "a\xF0", "truncated 4-byte (leader only)" ], ++ [ "a\xF0\x80", "truncated 4-byte (leader + 1 continuation)" ], ++ [ "a\xF0\x80\x80", "truncated 4-byte (leader + 2 continuations)" ], ++); ++ ++# Invalid continuation bytes — the leader is valid but the continuations ++# are not 10xxxxxx. ++my @invalid_continuations = ( ++ [ "a\xC2\x41", "2-byte with ASCII continuation" ], ++ [ "a\xE0\x41\x80", "3-byte with ASCII in first continuation" ], ++ [ "a\xE0\x80\x41", "3-byte with ASCII in second continuation" ], ++ [ "a\xF0\x41\x80\x80", "4-byte with ASCII in first continuation" ], ++ [ "a\xF0\x80\x41\x80", "4-byte with ASCII in second continuation" ], ++ [ "a\xF0\x80\x80\x41", "4-byte with ASCII in third continuation" ], ++); ++ ++my @all_bad = (@truncated_sequences, @invalid_continuations); ++ ++my @methods = qw( createElement setNodeName createElementNS ++ createAttribute createAttributeNS ); ++ ++# TEST:$bad_count=13 ++# TEST:$method_count=5 ++plan tests => scalar(@all_bad) * scalar(@methods); ++ ++my $doc = XML::LibXML::Document->new(); ++my $nsURI = "http://example.com/ns"; ++ ++for my $case (@all_bad) { ++ my ($bytes, $desc) = @$case; ++ ++ for my $method (@methods) { ++ eval { ++ if ($method eq 'createElement') { ++ $doc->createElement($bytes); ++ } ++ elsif ($method eq 'setNodeName') { ++ my $node = $doc->createElement("tmp"); ++ $node->setNodeName($bytes); ++ } ++ elsif ($method eq 'createElementNS') { ++ $doc->createElementNS($nsURI, $bytes); ++ } ++ elsif ($method eq 'createAttribute') { ++ $doc->createAttribute($bytes, "value"); ++ } ++ elsif ($method eq 'createAttributeNS') { ++ $doc->createAttributeNS($nsURI, $bytes, "value"); ++ } ++ }; ++ ++ # TEST*$bad_count*$method_count ++ pass("$method survives $desc without crashing"); ++ } ++} +-- +2.53.0 + diff -Nru libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series --- libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series 2022-02-11 19:08:51.000000000 +0000 +++ libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series 2026-06-27 11:41:54.000000000 +0000 @@ -1,3 +1,4 @@ fail-build-no-libxml2.patch disable_runtime-version_warning.patch test_against_runtime-version.patch +fix-replace-domParseChar-with-xmlValidateName-to-pre.patch