Version in base suite: 3.11.2-6+deb12u2 Version in overlay suite: 3.11.2-6+deb12u3 Base version: python3.11_3.11.2-6+deb12u3 Target version: python3.11_3.11.2-6+deb12u4 Base file: /srv/ftp-master.debian.org/ftp/pool/main/p/python3.11/python3.11_3.11.2-6+deb12u3.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/p/python3.11/python3.11_3.11.2-6+deb12u4.dsc changelog | 10 patches/0001-3.11-gh-123270-Replaced-SanitizedNames-with-a-more-s.patch | 216 ++++++++ patches/CVE-2024-6232.patch | 246 ++++++++++ patches/series | 2 4 files changed, 474 insertions(+) diff -Nru python3.11-3.11.2/debian/changelog python3.11-3.11.2/debian/changelog --- python3.11-3.11.2/debian/changelog 2024-08-26 07:20:54.000000000 +0000 +++ python3.11-3.11.2/debian/changelog 2024-09-14 03:00:30.000000000 +0000 @@ -1,3 +1,13 @@ +python3.11 (3.11.2-6+deb12u4) bookworm; urgency=medium + + * Fix zipfile.Path regression introduced by 3.11.2-6+deb12u3 + (Closes: 1080245) + * Fix CVE-2024-6232: Regular expressions that allowed excessive backtracking + during tarfile.TarFile header parsing are vulnerable to ReDoS via + specifically-crafted tar archives + + -- Santiago Ruano Rincón Sat, 14 Sep 2024 00:00:30 -0300 + python3.11 (3.11.2-6+deb12u3) bookworm-security; urgency=medium * CVE-2024-0397 diff -Nru python3.11-3.11.2/debian/patches/0001-3.11-gh-123270-Replaced-SanitizedNames-with-a-more-s.patch python3.11-3.11.2/debian/patches/0001-3.11-gh-123270-Replaced-SanitizedNames-with-a-more-s.patch --- python3.11-3.11.2/debian/patches/0001-3.11-gh-123270-Replaced-SanitizedNames-with-a-more-s.patch 1970-01-01 00:00:00.000000000 +0000 +++ python3.11-3.11.2/debian/patches/0001-3.11-gh-123270-Replaced-SanitizedNames-with-a-more-s.patch 2024-09-05 15:19:38.000000000 +0000 @@ -0,0 +1,216 @@ +From fc0b8259e693caa8400fa8b6ac1e494e47ea7798 Mon Sep 17 00:00:00 2001 +From: "Jason R. Coombs" +Date: Wed, 4 Sep 2024 11:52:54 -0400 +Subject: [PATCH] [3.11] gh-123270: Replaced SanitizedNames with a more + surgical fix. (GH-123354) (#123425) + +Applies changes from zipp 3.20.1 and jaraco/zippGH-124 +(cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6) + +Co-authored-by: Jason R. Coombs + +* Restore the slash-prefixed paths in the malformed_paths test. + +Origin: upstream, https://github.com/python/cpython/commit/fc0b8259e693caa8400fa8b6ac1e494e47ea7798 +Bug-Debian: https://bugs.debian.org/1080245 +--- + Lib/test/test_zipfile.py | 72 +++++++++++++++++-- + Lib/zipfile.py | 69 +++--------------- + ...-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst | 3 + + 3 files changed, 77 insertions(+), 67 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst + +Index: python3/Lib/test/test_zipfile.py +=================================================================== +--- python3.orig/Lib/test/test_zipfile.py ++++ python3/Lib/test/test_zipfile.py +@@ -3546,7 +3546,11 @@ class EncodedMetadataTests(unittest.Test + + def test_malformed_paths(self): + """ +- Path should handle malformed paths. ++ Path should handle malformed paths gracefully. ++ ++ Paths with leading slashes are not visible. ++ ++ Paths with dots are treated like regular files. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") +@@ -3555,11 +3559,67 @@ class EncodedMetadataTests(unittest.Test + zf.writestr("../parent.txt", b"content") + zf.filename = '' + root = zipfile.Path(zf) +- assert list(map(str, root.iterdir())) == [ +- 'one-slash.txt', +- 'two-slash.txt', +- 'parent.txt', +- ] ++ assert list(map(str, root.iterdir())) == ['../'] ++ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' ++ ++ def test_unsupported_names(self): ++ """ ++ Path segments with special characters are readable. ++ ++ On some platforms or file systems, characters like ++ ``:`` and ``?`` are not allowed, but they are valid ++ in the zip file. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr("path?", b"content") ++ zf.writestr("V: NMS.flac", b"fLaC...") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ contents = root.iterdir() ++ assert next(contents).name == 'path?' ++ assert next(contents).name == 'V: NMS.flac' ++ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." ++ ++ def test_backslash_not_separator(self): ++ """ ++ In a zip file, backslashes are not separators. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ (first,) = root.iterdir() ++ assert not first.is_dir() ++ assert first.name == 'foo\\bar' ++ ++ ++class DirtyZipInfo(zipfile.ZipInfo): ++ """ ++ Bypass name sanitization. ++ """ ++ ++ def __init__(self, filename, *args, **kwargs): ++ super().__init__(filename, *args, **kwargs) ++ self.filename = filename ++ ++ @classmethod ++ def for_name(cls, name, archive): ++ """ ++ Construct the same way that ZipFile.writestr does. ++ ++ TODO: extract this functionality and re-use ++ """ ++ self = cls(filename=name, date_time=time.localtime(time.time())[:6]) ++ self.compress_type = archive.compression ++ self.compress_level = archive.compresslevel ++ if self.filename.endswith('/'): # pragma: no cover ++ self.external_attr = 0o40775 << 16 # drwxrwxr-x ++ self.external_attr |= 0x10 # MS-DOS directory flag ++ else: ++ self.external_attr = 0o600 << 16 # ?rw------- ++ return self + + + if __name__ == "__main__": +Index: python3/Lib/zipfile.py +=================================================================== +--- python3.orig/Lib/zipfile.py ++++ python3/Lib/zipfile.py +@@ -2201,7 +2201,7 @@ def _parents(path): + def _ancestry(path): + """ + Given a path with elements separated by +- posixpath.sep, generate all elements of that path ++ posixpath.sep, generate all elements of that path. + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] +@@ -2213,9 +2213,14 @@ def _ancestry(path): + ['b'] + >>> list(_ancestry('')) + [] ++ ++ Multiple separators are treated like a single. ++ ++ >>> list(_ancestry('//b//d///f//')) ++ ['//b//d///f', '//b//d', '//b'] + """ + path = path.rstrip(posixpath.sep) +- while path and path != posixpath.sep: ++ while path.rstrip(posixpath.sep): + yield path + path, tail = posixpath.split(path) + +@@ -2232,65 +2237,7 @@ def _difference(minuend, subtrahend): + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +-class SanitizedNames: +- """ +- ZipFile mix-in to ensure names are sanitized. +- """ +- +- def namelist(self): +- return list(map(self._sanitize, super().namelist())) +- +- @staticmethod +- def _sanitize(name): +- r""" +- Ensure a relative path with posix separators and no dot names. +- Modeled after +- https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 +- but provides consistent cross-platform behavior. +- >>> san = SanitizedNames._sanitize +- >>> san('/foo/bar') +- 'foo/bar' +- >>> san('//foo.txt') +- 'foo.txt' +- >>> san('foo/.././bar.txt') +- 'foo/bar.txt' +- >>> san('foo../.bar.txt') +- 'foo../.bar.txt' +- >>> san('\\foo\\bar.txt') +- 'foo/bar.txt' +- >>> san('D:\\foo.txt') +- 'D/foo.txt' +- >>> san('\\\\server\\share\\file.txt') +- 'server/share/file.txt' +- >>> san('\\\\?\\GLOBALROOT\\Volume3') +- '?/GLOBALROOT/Volume3' +- >>> san('\\\\.\\PhysicalDrive1\\root') +- 'PhysicalDrive1/root' +- Retain any trailing slash. +- >>> san('abc/') +- 'abc/' +- Raises a ValueError if the result is empty. +- >>> san('../..') +- Traceback (most recent call last): +- ... +- ValueError: Empty filename +- """ +- +- def allowed(part): +- return part and part not in {'..', '.'} +- +- # Remove the drive letter. +- # Don't use ntpath.splitdrive, because that also strips UNC paths +- bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) +- clean = bare.replace('\\', '/') +- parts = clean.split('/') +- joined = '/'.join(filter(allowed, parts)) +- if not joined: +- raise ValueError("Empty filename") +- return joined + '/' * name.endswith('/') +- +- +-class CompleteDirs(SanitizedNames, ZipFile): ++class CompleteDirs(ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. +Index: python3/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst +=================================================================== +--- /dev/null ++++ python3/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst +@@ -0,0 +1,3 @@ ++Applied a more surgical fix for malformed payloads in :class:`zipfile.Path` ++causing infinite loops (gh-122905) without breaking contents using ++legitimate characters. diff -Nru python3.11-3.11.2/debian/patches/CVE-2024-6232.patch python3.11-3.11.2/debian/patches/CVE-2024-6232.patch --- python3.11-3.11.2/debian/patches/CVE-2024-6232.patch 1970-01-01 00:00:00.000000000 +0000 +++ python3.11-3.11.2/debian/patches/CVE-2024-6232.patch 2024-09-14 03:00:30.000000000 +0000 @@ -0,0 +1,246 @@ +From d449caf8a179e3b954268b3a88eb9170be3c8fbf Mon Sep 17 00:00:00 2001 +From: Seth Michael Larson +Date: Tue, 3 Sep 2024 10:07:13 -0500 +Subject: [PATCH] [3.11] gh-121285: Remove backtracking when parsing tarfile + headers (GH-121286) (#123639) + +* Remove backtracking when parsing tarfile headers +* Rewrite PAX header parsing to be stricter +* Optimize parsing of GNU extended sparse headers v0.0 + +(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4) + +Co-authored-by: Kirill Podoprigora +Co-authored-by: Gregory P. Smith + +Origin: backport, https://github.com/python/cpython/commit/d449caf8a179e3b954268b3a88eb9170be3c8fbf +--- + Lib/tarfile.py | 105 +++++++++++------- + Lib/test/test_tarfile.py | 42 +++++++ + ...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 + + 3 files changed, 111 insertions(+), 38 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst + +Index: python3/Lib/tarfile.py +=================================================================== +--- python3.orig/Lib/tarfile.py ++++ python3/Lib/tarfile.py +@@ -712,6 +712,10 @@ class ExFileObject(io.BufferedReader): + #------------------ + # Exported Classes + #------------------ ++ ++# Header length is digits followed by a space. ++_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") ++ + class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. +@@ -1240,41 +1244,59 @@ class TarInfo(object): + else: + pax_headers = tarfile.pax_headers.copy() + +- # Check if the pax header contains a hdrcharset field. This tells us +- # the encoding of the path, linkpath, uname and gname fields. Normally, +- # these fields are UTF-8 encoded but since POSIX.1-2008 tar +- # implementations are allowed to store them as raw binary strings if +- # the translation to UTF-8 fails. +- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) +- if match is not None: +- pax_headers["hdrcharset"] = match.group(1).decode("utf-8") +- +- # For the time being, we don't care about anything other than "BINARY". +- # The only other value that is currently allowed by the standard is +- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. +- hdrcharset = pax_headers.get("hdrcharset") +- if hdrcharset == "BINARY": +- encoding = tarfile.encoding +- else: +- encoding = "utf-8" +- + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and +- # the newline. keyword and value are both UTF-8 encoded strings. +- regex = re.compile(br"(\d+) ([^=]+)=") ++ # the newline. + pos = 0 +- while True: +- match = regex.match(buf, pos) +- if not match: +- break ++ encoding = None ++ raw_headers = [] ++ while len(buf) > pos and buf[pos] != 0x00: ++ if not (match := _header_length_prefix_re.match(buf, pos)): ++ raise InvalidHeaderError("invalid header") ++ try: ++ length = int(match.group(1)) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ # Headers must be at least 5 bytes, shortest being '5 x=\n'. ++ # Value is allowed to be empty. ++ if length < 5: ++ raise InvalidHeaderError("invalid header") ++ if pos + length > len(buf): ++ raise InvalidHeaderError("invalid header") ++ ++ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header ++ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] ++ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") + +- length, keyword = match.groups() +- length = int(length) +- if length == 0: ++ # Check the framing of the header. The last character must be '\n' (0x0A) ++ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: + raise InvalidHeaderError("invalid header") +- value = buf[match.end(2) + 1:match.start(1) + length - 1] ++ raw_headers.append((length, raw_keyword, raw_value)) + ++ # Check if the pax header contains a hdrcharset field. This tells us ++ # the encoding of the path, linkpath, uname and gname fields. Normally, ++ # these fields are UTF-8 encoded but since POSIX.1-2008 tar ++ # implementations are allowed to store them as raw binary strings if ++ # the translation to UTF-8 fails. For the time being, we don't care about ++ # anything other than "BINARY". The only other value that is currently ++ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. ++ # Note that we only follow the initial 'hdrcharset' setting to preserve ++ # the initial behavior of the 'tarfile' module. ++ if raw_keyword == b"hdrcharset" and encoding is None: ++ if raw_value == b"BINARY": ++ encoding = tarfile.encoding ++ else: # This branch ensures only the first 'hdrcharset' header is used. ++ encoding = "utf-8" ++ ++ pos += length ++ ++ # If no explicit hdrcharset is set, we use UTF-8 as a default. ++ if encoding is None: ++ encoding = "utf-8" ++ ++ # After parsing the raw headers we can decode them to text. ++ for length, raw_keyword, raw_value in raw_headers: + # Normally, we could just use "utf-8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot +@@ -1282,17 +1304,16 @@ class TarInfo(object): + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. +- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", ++ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: +- value = self._decode_pax_field(value, encoding, tarfile.encoding, ++ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, + tarfile.errors) + else: +- value = self._decode_pax_field(value, "utf-8", "utf-8", ++ value = self._decode_pax_field(raw_value, "utf-8", "utf-8", + tarfile.errors) + + pax_headers[keyword] = value +- pos += length + + # Fetch the next header. + try: +@@ -1307,7 +1328,7 @@ class TarInfo(object): + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. +- self._proc_gnusparse_00(next, pax_headers, buf) ++ self._proc_gnusparse_00(next, raw_headers) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. +@@ -1329,15 +1350,24 @@ class TarInfo(object): + + return next + +- def _proc_gnusparse_00(self, next, pax_headers, buf): ++ def _proc_gnusparse_00(self, next, raw_headers): + """Process a GNU tar extended sparse header, version 0.0. + """ + offsets = [] +- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): +- offsets.append(int(match.group(1))) + numbytes = [] +- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): +- numbytes.append(int(match.group(1))) ++ for _, keyword, value in raw_headers: ++ if keyword == b"GNU.sparse.offset": ++ try: ++ offsets.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ ++ elif keyword == b"GNU.sparse.numbytes": ++ try: ++ numbytes.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ + next.sparse = list(zip(offsets, numbytes)) + + def _proc_gnusparse_01(self, next, pax_headers): +Index: python3/Lib/test/test_tarfile.py +=================================================================== +--- python3.orig/Lib/test/test_tarfile.py ++++ python3/Lib/test/test_tarfile.py +@@ -1157,6 +1157,48 @@ class PaxReadTest(LongnameTest, ReadTest + finally: + tar.close() + ++ def test_pax_header_bad_formats(self): ++ # The fields from the pax header have priority over the ++ # TarInfo. ++ pax_header_replacements = ( ++ b" foo=bar\n", ++ b"0 \n", ++ b"1 \n", ++ b"2 \n", ++ b"3 =\n", ++ b"4 =a\n", ++ b"1000000 foo=bar\n", ++ b"0 foo=bar\n", ++ b"-12 foo=bar\n", ++ b"000000000000000000000000036 foo=bar\n", ++ ) ++ pax_headers = {"foo": "bar"} ++ ++ for replacement in pax_header_replacements: ++ with self.subTest(header=replacement): ++ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, ++ encoding="iso8859-1") ++ try: ++ t = tarfile.TarInfo() ++ t.name = "pax" # non-ASCII ++ t.uid = 1 ++ t.pax_headers = pax_headers ++ tar.addfile(t) ++ finally: ++ tar.close() ++ ++ with open(tmpname, "rb") as f: ++ data = f.read() ++ self.assertIn(b"11 foo=bar\n", data) ++ data = data.replace(b"11 foo=bar\n", replacement) ++ ++ with open(tmpname, "wb") as f: ++ f.truncate() ++ f.write(data) ++ ++ with self.assertRaisesRegex(tarfile.ReadError, r"method tar: ReadError\('invalid header'\)"): ++ tarfile.open(tmpname, encoding="iso8859-1") ++ + + class WriteTestBase(TarTest): + # Put all write tests in here that are supposed to be tested +Index: python3/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst +=================================================================== +--- /dev/null ++++ python3/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst +@@ -0,0 +1,2 @@ ++Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and ++GNU sparse headers. diff -Nru python3.11-3.11.2/debian/patches/series python3.11-3.11.2/debian/patches/series --- python3.11-3.11.2/debian/patches/series 2024-08-26 07:20:17.000000000 +0000 +++ python3.11-3.11.2/debian/patches/series 2024-09-14 02:57:20.000000000 +0000 @@ -51,3 +51,5 @@ CVE-2024-0397.diff CVE-2024-4032.diff CVE-2024-8088.diff +0001-3.11-gh-123270-Replaced-SanitizedNames-with-a-more-s.patch +CVE-2024-6232.patch