Version in base suite: 7.3.11+dfsg-2+deb12u2 Base version: pypy3_7.3.11+dfsg-2+deb12u2 Target version: pypy3_7.3.11+dfsg-2+deb12u3 Base file: /srv/ftp-master.debian.org/ftp/pool/main/p/pypy3/pypy3_7.3.11+dfsg-2+deb12u2.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/p/pypy3/pypy3_7.3.11+dfsg-2+deb12u3.dsc changelog | 17 clean | 10 patches/CVE-2023-27043-email-parseaddr | 434 +++++++++++++++++ patches/CVE-2024-11168-urllib-parse-bracketed-names | 99 +++ patches/CVE-2024-4032-private-ip-ranges | 282 +++++++++++ patches/CVE-2024-6232-tarfile-redos | 236 +++++++++ patches/CVE-2024-6923-encode-email-header-newlines | 275 ++++++++++ patches/CVE-2024-7592-http-cookie-quadratic-complexity | 125 ++++ patches/CVE-2024-8088-zipfile-loop-dos | 134 +++++ patches/CVE-2024-9287-venv-activation-templates | 289 +++++++++++ patches/series | 8 rules | 4 12 files changed, 1913 insertions(+) diff -Nru pypy3-7.3.11+dfsg/debian/changelog pypy3-7.3.11+dfsg/debian/changelog --- pypy3-7.3.11+dfsg/debian/changelog 2024-05-02 00:39:38.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/changelog 2024-12-23 20:22:45.000000000 +0000 @@ -1,3 +1,20 @@ +pypy3 (7.3.11+dfsg-2+deb12u3) bookworm; urgency=medium + + * Security patches to the standard library: + - CVE-2023-27043: Parse email addresses with special characters, + correctly. + - CVE-2024-9287: Quote path names in venv activation scripts. + - CVE-2024-4032: Fix private IP address ranges. + - CVE-2024-6232: Fix ReDoS when parsing tarfile headers. + - CVE-2024-8088: Avoid infinite loop in zip file parsing. + - CVE-2024-6923: Encode newlines in headers in the email module. + - CVE-2024-7592: Quadratic complexity parsing cookies with backslashes. + - CVE-2024-11168: Ensure addresses in brackets are valid IPv6 addresses. + * Clean the python 2.7 source tree. + * Clean cffi modules C source, lex and yacc tabs. + + -- Stefano Rivera Mon, 23 Dec 2024 16:22:45 -0400 + pypy3 (7.3.11+dfsg-2+deb12u2) bookworm; urgency=medium * Security patches to the standard library: diff -Nru pypy3-7.3.11+dfsg/debian/clean pypy3-7.3.11+dfsg/debian/clean --- pypy3-7.3.11+dfsg/debian/clean 2024-05-02 00:39:38.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/clean 2024-12-23 20:22:45.000000000 +0000 @@ -24,6 +24,16 @@ -a ! -path 'lib_pypy/_cffi_ssl/_cffi_src/*' \ -a ! -path 'lib_pypy/_libmpdec/*' \ -a ! -path 'lib_pypy/_sha3/*' +echo lib_pypy/_blake2/_blake2b_cffi.c +echo lib_pypy/_blake2/_blake2s_cffi.c +echo lib_pypy/_sha3/_sha3_cffi.c +echo pypy/goal/lextab.py +echo pypy/goal/yacctab.py +echo pypy/lextab.py +echo pypy/yacctab.py + +# Python 2.7 +echo cpython27/python # Tests echo pypy/test.db diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2023-27043-email-parseaddr pypy3-7.3.11+dfsg/debian/patches/CVE-2023-27043-email-parseaddr --- pypy3-7.3.11+dfsg/debian/patches/CVE-2023-27043-email-parseaddr 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2023-27043-email-parseaddr 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,434 @@ +From: Petr Viktorin +Date: Fri, 6 Sep 2024 13:14:22 +0200 +Subject: gh-102988: Reject malformed addresses in email.parseaddr() + (GH-111116) (#123768) + +Detect email address parsing errors and return empty tuple to +indicate the parsing error (old API). Add an optional 'strict' +parameter to getaddresses() and parseaddr() functions. Patch by +Thomas Dwyer. + +(cherry picked from commit 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19) + +Co-authored-by: Victor Stinner +Co-Authored-By: Thomas Dwyer + +Origin: upstream, https://github.com/python/cpython/commit/2a9273a0e4466e2f057f9ce6fe98cd8ce570331b +--- + lib-python/3/email/utils.py | 151 +++++++++++++++++++-- + lib-python/3/test/test_email/test_email.py | 204 +++++++++++++++++++++++++++-- + 2 files changed, 338 insertions(+), 17 deletions(-) + +diff --git a/lib-python/3/email/utils.py b/lib-python/3/email/utils.py +index 48d3016..7ca7a7c 100644 +--- a/lib-python/3/email/utils.py ++++ b/lib-python/3/email/utils.py +@@ -48,6 +48,7 @@ TICK = "'" + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): + return address + + ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') ++ ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None ++ ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ ++def _pre_parse_validation(email_header_fields): ++ accepted_values = [] ++ for v in email_header_fields: ++ if not _check_parenthesis(v): ++ v = "('', '')" ++ accepted_values.append(v) ++ ++ return accepted_values ++ ++ ++def _post_parse_validation(parsed_email_header_tuples): ++ accepted_values = [] ++ # The parser would have parsed a correctly formatted domain-literal ++ # The existence of an [ after parsing indicates a parsing failure ++ for v in parsed_email_header_tuples: ++ if '[' in v[1]: ++ v = ('', '') ++ accepted_values.append(v) ++ ++ return accepted_values + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -202,16 +318,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. + """ +- addrs = _AddressList(addr).addresslist +- if not addrs: +- return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ ++ if isinstance(addr, list): ++ addr = addr[0] ++ ++ if not isinstance(addr, str): ++ return ('', '') ++ ++ addr = _pre_parse_validation([addr])[0] ++ addrs = _post_parse_validation(_AddressList(addr).addresslist) ++ ++ if not addrs or len(addrs) > 1: ++ return ('', '') ++ + return addrs[0] + + +diff --git a/lib-python/3/test/test_email/test_email.py b/lib-python/3/test/test_email/test_email.py +index 761ea90..0c68964 100644 +--- a/lib-python/3/test/test_email/test_email.py ++++ b/lib-python/3/test/test_email/test_email.py +@@ -16,6 +16,7 @@ from unittest.mock import patch + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.header import Header, decode_header, make_header +@@ -3263,15 +3264,154 @@ Foo + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + ++ def test_getaddresses_comma_in_name(self): ++ """GH-106669 regression test.""" ++ self.assertEqual( ++ utils.getaddresses( ++ [ ++ '"Bud, Person" ', ++ 'aperson@dom.ain (Al Person)', ++ '"Mariusz Felisiak" ', ++ ] ++ ), ++ [ ++ ('Bud, Person', 'bperson@dom.ain'), ++ ('Al Person', 'aperson@dom.ain'), ++ ('Mariusz Felisiak', 'to@example.com'), ++ ], ++ ) ++ ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') ++ ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) ++ + def test_getaddresses_nasty(self): +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses( +- ['[]*-- =~$']), +- [('', ''), ('', ''), ('', '*--')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +@@ -3460,6 +3600,54 @@ multipart/report + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) ++ + + # Test the iterator/generators + class TestIterators(TestEmailBase): diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-11168-urllib-parse-bracketed-names pypy3-7.3.11+dfsg/debian/patches/CVE-2024-11168-urllib-parse-bracketed-names --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-11168-urllib-parse-bracketed-names 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-11168-urllib-parse-bracketed-names 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,99 @@ +From: Victor Stinner +Date: Mon, 2 Dec 2024 13:36:46 +0100 +Subject: gh-103848: Adds checks to ensure that bracketed hosts found by + urlsplit are of IPv6 or IPvFuture format (#103849) (#126976) + +Co-authored-by: Gregory P. Smith +(cherry picked from commit 29f348e232e82938ba2165843c448c2b291504c5) + +Co-authored-by: JohnJamesUtley <81572567+JohnJamesUtley@users.noreply.github.com> + +Origin: cpython, https://github.com/python/cpython/commit/ddca2953191c67a12b1f19d6bca41016c6ae7132 +--- + lib-python/3/test/test_urlparse.py | 26 ++++++++++++++++++++++++++ + lib-python/3/urllib/parse.py | 16 +++++++++++++++- + 2 files changed, 41 insertions(+), 1 deletion(-) + +diff --git a/lib-python/3/test/test_urlparse.py b/lib-python/3/test/test_urlparse.py +index 574da5b..c84df23 100644 +--- a/lib-python/3/test/test_urlparse.py ++++ b/lib-python/3/test/test_urlparse.py +@@ -1071,6 +1071,32 @@ class UrlParseTestCase(unittest.TestCase): + self.assertEqual(p2.scheme, 'tel') + self.assertEqual(p2.path, '+31641044153') + ++ def test_invalid_bracketed_hosts(self): ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') ++ ++ def test_splitting_bracketed_hosts(self): ++ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') ++ self.assertEqual(p1.hostname, 'v6a.ip') ++ self.assertEqual(p1.username, 'user') ++ self.assertEqual(p1.path, '/path') ++ p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query') ++ self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test') ++ self.assertEqual(p2.username, 'user') ++ self.assertEqual(p2.path, '/path') ++ p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query') ++ self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test') ++ self.assertEqual(p3.username, 'user') ++ self.assertEqual(p3.path, '/path') ++ + def test_port_casting_failure_message(self): + message = "Port could not be cast to integer value as 'oracle'" + p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') +diff --git a/lib-python/3/urllib/parse.py b/lib-python/3/urllib/parse.py +index f5d3662..39b0249 100644 +--- a/lib-python/3/urllib/parse.py ++++ b/lib-python/3/urllib/parse.py +@@ -36,6 +36,7 @@ import sys + import types + import collections + import warnings ++import ipaddress + + __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", + "urlsplit", "urlunsplit", "urlencode", "parse_qs", +@@ -442,6 +443,17 @@ def _checknetloc(netloc): + raise ValueError("netloc '" + netloc + "' contains invalid " + + "characters under NFKC normalization") + ++# Valid bracketed hosts are defined in ++# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ ++def _check_bracketed_host(hostname): ++ if hostname.startswith('v'): ++ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname): ++ raise ValueError(f"IPvFuture address is invalid") ++ else: ++ ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 ++ if isinstance(ip, ipaddress.IPv4Address): ++ raise ValueError(f"An IPv4 address cannot be in brackets") ++ + def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + :///?# +@@ -488,12 +500,14 @@ def urlsplit(url, scheme='', allow_fragments=True): + break + else: + scheme, url = url[:i].lower(), url[i+1:] +- + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") ++ if '[' in netloc and ']' in netloc: ++ bracketed_host = netloc.partition('[')[2].partition(']')[0] ++ _check_bracketed_host(bracketed_host) + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-4032-private-ip-ranges pypy3-7.3.11+dfsg/debian/patches/CVE-2024-4032-private-ip-ranges --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-4032-private-ip-ranges 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-4032-private-ip-ranges 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,282 @@ +From: Petr Viktorin +Date: Tue, 7 May 2024 11:57:58 +0200 +Subject: gh-113171: gh-65056: Fix "private" (non-global) IP address ranges + (GH-113179) (GH-113186) (GH-118177) (GH-118472) + +The _private_networks variables, used by various is_private +implementations, were missing some ranges and at the same time had +overly strict ranges (where there are more specific ranges considered +globally reachable by the IANA registries). + +This patch updates the ranges with what was missing or otherwise +incorrect. + +100.64.0.0/10 is left alone, for now, as it's been made special in [1]. + +The _address_exclude_many() call returns 8 networks for IPv4, 121 +networks for IPv6. + +[1] https://github.com/python/cpython/issues/61602 + +In 3.10 and below, is_private checks whether the network and broadcast +address are both private. +In later versions (where the test wss backported from), it checks +whether they both are in the same private network. + +For 0.0.0.0/0, both 0.0.0.0 and 255.225.255.255 are private, +but one is in 0.0.0.0/8 ("This network") and the other in +255.255.255.255/32 ("Limited broadcast"). + +--------- + +Co-authored-by: Jakub Stasiak + +Origin: cpython, https://github.com/python/cpython/commit/22adf29da8d99933ffed8647d3e0726edd16f7f8 +--- + lib-python/3/ipaddress.py | 95 ++++++++++++++++++++++++++++++------- + lib-python/3/test/test_ipaddress.py | 52 ++++++++++++++++++++ + 2 files changed, 130 insertions(+), 17 deletions(-) + +diff --git a/lib-python/3/ipaddress.py b/lib-python/3/ipaddress.py +index 25f373a..9b35340 100644 +--- a/lib-python/3/ipaddress.py ++++ b/lib-python/3/ipaddress.py +@@ -1322,18 +1322,41 @@ class IPv4Address(_BaseV4, _BaseAddress): + @property + @functools.lru_cache() + def is_private(self): +- """Test if this address is allocated for private networks. ++ """``True`` if the address is defined as not globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exceptions: + +- Returns: +- A boolean, True if the address is reserved per +- iana-ipv4-special-registry. ++ * ``is_private`` is ``False`` for ``100.64.0.0/10`` ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_private == address.ipv4_mapped.is_private + ++ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ +- return any(self in net for net in self._constants._private_networks) ++ return ( ++ any(self in net for net in self._constants._private_networks) ++ and all(self not in net for net in self._constants._private_networks_exceptions) ++ ) + + @property + @functools.lru_cache() + def is_global(self): ++ """``True`` if the address is defined as globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exception: ++ ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global ++ ++ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. ++ """ + return self not in self._constants._public_network and not self.is_private + + @property +@@ -1537,13 +1560,15 @@ class _IPv4Constants: + + _public_network = IPv4Network('100.64.0.0/10') + ++ # Not globally reachable address blocks listed on ++ # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml + _private_networks = [ + IPv4Network('0.0.0.0/8'), + IPv4Network('10.0.0.0/8'), + IPv4Network('127.0.0.0/8'), + IPv4Network('169.254.0.0/16'), + IPv4Network('172.16.0.0/12'), +- IPv4Network('192.0.0.0/29'), ++ IPv4Network('192.0.0.0/24'), + IPv4Network('192.0.0.170/31'), + IPv4Network('192.0.2.0/24'), + IPv4Network('192.168.0.0/16'), +@@ -1554,6 +1579,11 @@ class _IPv4Constants: + IPv4Network('255.255.255.255/32'), + ] + ++ _private_networks_exceptions = [ ++ IPv4Network('192.0.0.9/32'), ++ IPv4Network('192.0.0.10/32'), ++ ] ++ + _reserved_network = IPv4Network('240.0.0.0/4') + + _unspecified_address = IPv4Address('0.0.0.0') +@@ -1995,23 +2025,42 @@ class IPv6Address(_BaseV6, _BaseAddress): + @property + @functools.lru_cache() + def is_private(self): +- """Test if this address is allocated for private networks. ++ """``True`` if the address is defined as not globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exceptions: + +- Returns: +- A boolean, True if the address is reserved per +- iana-ipv6-special-registry. ++ * ``is_private`` is ``False`` for ``100.64.0.0/10`` ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_private == address.ipv4_mapped.is_private + ++ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ +- return any(self in net for net in self._constants._private_networks) ++ ipv4_mapped = self.ipv4_mapped ++ if ipv4_mapped is not None: ++ return ipv4_mapped.is_private ++ return ( ++ any(self in net for net in self._constants._private_networks) ++ and all(self not in net for net in self._constants._private_networks_exceptions) ++ ) + + @property + def is_global(self): +- """Test if this address is allocated for public networks. ++ """``True`` if the address is defined as globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exception: + +- Returns: +- A boolean, true if the address is not reserved per +- iana-ipv6-special-registry. ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global + ++ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ + return not self.is_private + +@@ -2252,19 +2301,31 @@ class _IPv6Constants: + + _multicast_network = IPv6Network('ff00::/8') + ++ # Not globally reachable address blocks listed on ++ # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml + _private_networks = [ + IPv6Network('::1/128'), + IPv6Network('::/128'), + IPv6Network('::ffff:0:0/96'), ++ IPv6Network('64:ff9b:1::/48'), + IPv6Network('100::/64'), + IPv6Network('2001::/23'), +- IPv6Network('2001:2::/48'), + IPv6Network('2001:db8::/32'), +- IPv6Network('2001:10::/28'), ++ # IANA says N/A, let's consider it not globally reachable to be safe ++ IPv6Network('2002::/16'), + IPv6Network('fc00::/7'), + IPv6Network('fe80::/10'), + ] + ++ _private_networks_exceptions = [ ++ IPv6Network('2001:1::1/128'), ++ IPv6Network('2001:1::2/128'), ++ IPv6Network('2001:3::/32'), ++ IPv6Network('2001:4:112::/48'), ++ IPv6Network('2001:20::/28'), ++ IPv6Network('2001:30::/28'), ++ ] ++ + _reserved_networks = [ + IPv6Network('::/8'), IPv6Network('100::/8'), + IPv6Network('200::/7'), IPv6Network('400::/6'), +diff --git a/lib-python/3/test/test_ipaddress.py b/lib-python/3/test/test_ipaddress.py +index 90897f6..bd14f04 100644 +--- a/lib-python/3/test/test_ipaddress.py ++++ b/lib-python/3/test/test_ipaddress.py +@@ -2263,6 +2263,10 @@ class IpaddrUnitTest(unittest.TestCase): + self.assertEqual(True, ipaddress.ip_address( + '172.31.255.255').is_private) + self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private) ++ self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global) ++ self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global) ++ self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global) ++ self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global) + + self.assertEqual(True, + ipaddress.ip_address('169.254.100.200').is_link_local) +@@ -2278,6 +2282,40 @@ class IpaddrUnitTest(unittest.TestCase): + self.assertEqual(False, ipaddress.ip_address('128.0.0.0').is_loopback) + self.assertEqual(True, ipaddress.ip_network('0.0.0.0').is_unspecified) + ++ def testPrivateNetworks(self): ++ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/0").is_private) ++ self.assertEqual(False, ipaddress.ip_network("1.0.0.0/8").is_private) ++ ++ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/8").is_private) ++ self.assertEqual(True, ipaddress.ip_network("10.0.0.0/8").is_private) ++ self.assertEqual(True, ipaddress.ip_network("127.0.0.0/8").is_private) ++ self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) ++ self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) ++ self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) ++ self.assertEqual(True, ipaddress.ip_network("198.18.0.0/15").is_private) ++ self.assertEqual(True, ipaddress.ip_network("198.51.100.0/24").is_private) ++ self.assertEqual(True, ipaddress.ip_network("203.0.113.0/24").is_private) ++ self.assertEqual(True, ipaddress.ip_network("240.0.0.0/4").is_private) ++ self.assertEqual(True, ipaddress.ip_network("255.255.255.255/32").is_private) ++ ++ self.assertEqual(False, ipaddress.ip_network("::/0").is_private) ++ self.assertEqual(False, ipaddress.ip_network("::ff/128").is_private) ++ ++ self.assertEqual(True, ipaddress.ip_network("::1/128").is_private) ++ self.assertEqual(True, ipaddress.ip_network("::/128").is_private) ++ self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) ++ self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) ++ self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) ++ self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private) ++ self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) ++ self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) ++ self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) ++ self.assertEqual(True, ipaddress.ip_network("fe80::/10").is_private) ++ + def testReservedIpv6(self): + + self.assertEqual(True, ipaddress.ip_network('ffff::').is_multicast) +@@ -2351,6 +2389,20 @@ class IpaddrUnitTest(unittest.TestCase): + self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified) + self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified) + ++ self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:1::1').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:1::2').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:2::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:3::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:4::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:10::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:20::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:30::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:40::').is_global) ++ self.assertFalse(ipaddress.ip_address('2002::').is_global) ++ + # some generic IETF reserved addresses + self.assertEqual(True, ipaddress.ip_address('100::').is_reserved) + self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved) diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6232-tarfile-redos pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6232-tarfile-redos --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6232-tarfile-redos 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6232-tarfile-redos 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,236 @@ +From: Seth Michael Larson +Date: Wed, 4 Sep 2024 10:46:01 -0500 +Subject: gh-121285: Remove backtracking when parsing tarfile headers + (GH-121286) (#123641) + +* Remove backtracking when parsing tarfile headers +* Rewrite PAX header parsing to be stricter +* Optimize parsing of GNU extended sparse headers v0.0 + +(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4) + +Co-authored-by: Seth Michael Larson +Co-authored-by: Kirill Podoprigora +Co-authored-by: Gregory P. Smith + +Origin: cpython, https://github.com/python/cpython/commit/b4225ca91547aa97ed3aca391614afbb255bc877 +--- + lib-python/3/tarfile.py | 105 ++++++++++++++++++++++++-------------- + lib-python/3/test/test_tarfile.py | 42 +++++++++++++++ + 2 files changed, 109 insertions(+), 38 deletions(-) + +diff --git a/lib-python/3/tarfile.py b/lib-python/3/tarfile.py +index 9438b08..a0300e7 100644 +--- a/lib-python/3/tarfile.py ++++ b/lib-python/3/tarfile.py +@@ -708,6 +708,9 @@ class ExFileObject(io.BufferedReader): + super().__init__(fileobj) + #class ExFileObject + ++# Header length is digits followed by a space. ++_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") ++ + #------------------ + # Exported Classes + #------------------ +@@ -1229,41 +1232,59 @@ class TarInfo(object): + else: + pax_headers = tarfile.pax_headers.copy() + +- # Check if the pax header contains a hdrcharset field. This tells us +- # the encoding of the path, linkpath, uname and gname fields. Normally, +- # these fields are UTF-8 encoded but since POSIX.1-2008 tar +- # implementations are allowed to store them as raw binary strings if +- # the translation to UTF-8 fails. +- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) +- if match is not None: +- pax_headers["hdrcharset"] = match.group(1).decode("utf-8") +- +- # For the time being, we don't care about anything other than "BINARY". +- # The only other value that is currently allowed by the standard is +- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. +- hdrcharset = pax_headers.get("hdrcharset") +- if hdrcharset == "BINARY": +- encoding = tarfile.encoding +- else: +- encoding = "utf-8" +- + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and +- # the newline. keyword and value are both UTF-8 encoded strings. +- regex = re.compile(br"(\d+) ([^=]+)=") ++ # the newline. + pos = 0 +- while True: +- match = regex.match(buf, pos) +- if not match: +- break ++ encoding = None ++ raw_headers = [] ++ while len(buf) > pos and buf[pos] != 0x00: ++ if not (match := _header_length_prefix_re.match(buf, pos)): ++ raise InvalidHeaderError("invalid header") ++ try: ++ length = int(match.group(1)) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ # Headers must be at least 5 bytes, shortest being '5 x=\n'. ++ # Value is allowed to be empty. ++ if length < 5: ++ raise InvalidHeaderError("invalid header") ++ if pos + length > len(buf): ++ raise InvalidHeaderError("invalid header") ++ ++ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header ++ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] ++ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") + +- length, keyword = match.groups() +- length = int(length) +- if length == 0: ++ # Check the framing of the header. The last character must be '\n' (0x0A) ++ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: + raise InvalidHeaderError("invalid header") +- value = buf[match.end(2) + 1:match.start(1) + length - 1] ++ raw_headers.append((length, raw_keyword, raw_value)) ++ ++ # Check if the pax header contains a hdrcharset field. This tells us ++ # the encoding of the path, linkpath, uname and gname fields. Normally, ++ # these fields are UTF-8 encoded but since POSIX.1-2008 tar ++ # implementations are allowed to store them as raw binary strings if ++ # the translation to UTF-8 fails. For the time being, we don't care about ++ # anything other than "BINARY". The only other value that is currently ++ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. ++ # Note that we only follow the initial 'hdrcharset' setting to preserve ++ # the initial behavior of the 'tarfile' module. ++ if raw_keyword == b"hdrcharset" and encoding is None: ++ if raw_value == b"BINARY": ++ encoding = tarfile.encoding ++ else: # This branch ensures only the first 'hdrcharset' header is used. ++ encoding = "utf-8" + ++ pos += length ++ ++ # If no explicit hdrcharset is set, we use UTF-8 as a default. ++ if encoding is None: ++ encoding = "utf-8" ++ ++ # After parsing the raw headers we can decode them to text. ++ for length, raw_keyword, raw_value in raw_headers: + # Normally, we could just use "utf-8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot +@@ -1271,17 +1292,16 @@ class TarInfo(object): + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. +- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", ++ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: +- value = self._decode_pax_field(value, encoding, tarfile.encoding, ++ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, + tarfile.errors) + else: +- value = self._decode_pax_field(value, "utf-8", "utf-8", ++ value = self._decode_pax_field(raw_value, "utf-8", "utf-8", + tarfile.errors) + + pax_headers[keyword] = value +- pos += length + + # Fetch the next header. + try: +@@ -1296,7 +1316,7 @@ class TarInfo(object): + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. +- self._proc_gnusparse_00(next, pax_headers, buf) ++ self._proc_gnusparse_00(next, raw_headers) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. +@@ -1318,15 +1338,24 @@ class TarInfo(object): + + return next + +- def _proc_gnusparse_00(self, next, pax_headers, buf): ++ def _proc_gnusparse_00(self, next, raw_headers): + """Process a GNU tar extended sparse header, version 0.0. + """ + offsets = [] +- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): +- offsets.append(int(match.group(1))) + numbytes = [] +- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): +- numbytes.append(int(match.group(1))) ++ for _, keyword, value in raw_headers: ++ if keyword == b"GNU.sparse.offset": ++ try: ++ offsets.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ ++ elif keyword == b"GNU.sparse.numbytes": ++ try: ++ numbytes.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ + next.sparse = list(zip(offsets, numbytes)) + + def _proc_gnusparse_01(self, next, pax_headers): +diff --git a/lib-python/3/test/test_tarfile.py b/lib-python/3/test/test_tarfile.py +index df634bc..28b909d 100644 +--- a/lib-python/3/test/test_tarfile.py ++++ b/lib-python/3/test/test_tarfile.py +@@ -1109,6 +1109,48 @@ class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase): + finally: + tar.close() + ++ def test_pax_header_bad_formats(self): ++ # The fields from the pax header have priority over the ++ # TarInfo. ++ pax_header_replacements = ( ++ b" foo=bar\n", ++ b"0 \n", ++ b"1 \n", ++ b"2 \n", ++ b"3 =\n", ++ b"4 =a\n", ++ b"1000000 foo=bar\n", ++ b"0 foo=bar\n", ++ b"-12 foo=bar\n", ++ b"000000000000000000000000036 foo=bar\n", ++ ) ++ pax_headers = {"foo": "bar"} ++ ++ for replacement in pax_header_replacements: ++ with self.subTest(header=replacement): ++ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, ++ encoding="iso8859-1") ++ try: ++ t = tarfile.TarInfo() ++ t.name = "pax" # non-ASCII ++ t.uid = 1 ++ t.pax_headers = pax_headers ++ tar.addfile(t) ++ finally: ++ tar.close() ++ ++ with open(tmpname, "rb") as f: ++ data = f.read() ++ self.assertIn(b"11 foo=bar\n", data) ++ data = data.replace(b"11 foo=bar\n", replacement) ++ ++ with open(tmpname, "wb") as f: ++ f.truncate() ++ f.write(data) ++ ++ with self.assertRaisesRegex(tarfile.ReadError, r"file could not be opened successfully"): ++ tarfile.open(tmpname, encoding="iso8859-1") ++ + + class WriteTestBase(TarTest): + # Put all write tests in here that are supposed to be tested diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6923-encode-email-header-newlines pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6923-encode-email-header-newlines --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6923-encode-email-header-newlines 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-6923-encode-email-header-newlines 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,275 @@ +From: =?utf-8?q?=C5=81ukasz_Langa?= +Date: Wed, 4 Sep 2024 17:39:02 +0200 +Subject: gh-121650: Encode newlines in headers, + and verify headers are sound (GH-122233) (#122610) + +Per RFC 2047: + +> [...] these encoding schemes allow the +> encoding of arbitrary octet values, mail readers that implement this +> decoding should also ensure that display of the decoded data on the +> recipient's terminal will not cause unwanted side-effects + +It seems that the "quoted-word" scheme is a valid way to include +a newline character in a header value, just like we already allow +undecodable bytes or control characters. +They do need to be properly quoted when serialized to text, though. + +This should fail for custom fold() implementations that aren't careful +about newlines. + +(cherry picked from commit 097633981879b3c9de9a1dd120d3aa585ecc2384) + +Co-authored-by: Petr Viktorin +Co-authored-by: Bas Bloemsaat +Co-authored-by: Serhiy Storchaka + +Origin: cpython, https://github.com/python/cpython/commit/f7be505d137a22528cb0fc004422c0081d5d90e6 +--- + lib-python/3/email/_header_value_parser.py | 12 +++-- + lib-python/3/email/_policybase.py | 8 ++++ + lib-python/3/email/errors.py | 4 ++ + lib-python/3/email/generator.py | 13 +++++- + lib-python/3/test/test_email/test_generator.py | 62 ++++++++++++++++++++++++++ + lib-python/3/test/test_email/test_policy.py | 26 +++++++++++ + 6 files changed, 121 insertions(+), 4 deletions(-) + +diff --git a/lib-python/3/email/_header_value_parser.py b/lib-python/3/email/_header_value_parser.py +index 8a8fb8b..e394cfd 100644 +--- a/lib-python/3/email/_header_value_parser.py ++++ b/lib-python/3/email/_header_value_parser.py +@@ -92,6 +92,8 @@ TOKEN_ENDS = TSPECIALS | WSP + ASPECIALS = TSPECIALS | set("*'%") + ATTRIBUTE_ENDS = ASPECIALS | WSP + EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') ++NLSET = {'\n', '\r'} ++SPECIALSNL = SPECIALS | NLSET + + def quote_string(value): + return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' +@@ -2778,9 +2780,13 @@ def _refold_parse_tree(parse_tree, *, policy): + wrap_as_ew_blocked -= 1 + continue + tstr = str(part) +- if part.token_type == 'ptext' and set(tstr) & SPECIALS: +- # Encode if tstr contains special characters. +- want_encoding = True ++ if not want_encoding: ++ if part.token_type == 'ptext': ++ # Encode if tstr contains special characters. ++ want_encoding = not SPECIALSNL.isdisjoint(tstr) ++ else: ++ # Encode if tstr contains newlines. ++ want_encoding = not NLSET.isdisjoint(tstr) + try: + tstr.encode(encoding) + charset = encoding +diff --git a/lib-python/3/email/_policybase.py b/lib-python/3/email/_policybase.py +index c9cbadd..d1f4821 100644 +--- a/lib-python/3/email/_policybase.py ++++ b/lib-python/3/email/_policybase.py +@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): + message_factory -- the class to use to create new message objects. + If the value is None, the default is Message. + ++ verify_generated_headers ++ -- if true, the generator verifies that each header ++ they are properly folded, so that a parser won't ++ treat it as multiple headers, start-of-body, or ++ part of another header. ++ This is a check against custom Header & fold() ++ implementations. + """ + + raise_on_defect = False +@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): + max_line_length = 78 + mangle_from_ = False + message_factory = None ++ verify_generated_headers = True + + def handle_defect(self, obj, defect): + """Based on policy, either raise defect or call register_defect. +diff --git a/lib-python/3/email/errors.py b/lib-python/3/email/errors.py +index d28a680..1a0d5c6 100644 +--- a/lib-python/3/email/errors.py ++++ b/lib-python/3/email/errors.py +@@ -29,6 +29,10 @@ class CharsetError(MessageError): + """An illegal charset was given.""" + + ++class HeaderWriteError(MessageError): ++ """Error while writing headers.""" ++ ++ + # These are parsing defects which the parser was able to work around. + class MessageDefect(ValueError): + """Base class for a message defect.""" +diff --git a/lib-python/3/email/generator.py b/lib-python/3/email/generator.py +index c9b1216..89224ae 100644 +--- a/lib-python/3/email/generator.py ++++ b/lib-python/3/email/generator.py +@@ -14,12 +14,14 @@ import random + from copy import deepcopy + from io import StringIO, BytesIO + from email.utils import _has_surrogates ++from email.errors import HeaderWriteError + + UNDERSCORE = '_' + NL = '\n' # XXX: no longer used by the code below. + + NLCRE = re.compile(r'\r\n|\r|\n') + fcre = re.compile(r'^From ', re.MULTILINE) ++NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') + + + +@@ -223,7 +225,16 @@ class Generator: + + def _write_headers(self, msg): + for h, v in msg.raw_items(): +- self.write(self.policy.fold(h, v)) ++ folded = self.policy.fold(h, v) ++ if self.policy.verify_generated_headers: ++ linesep = self.policy.linesep ++ if not folded.endswith(self.policy.linesep): ++ raise HeaderWriteError( ++ f'folded header does not end with {linesep!r}: {folded!r}') ++ if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): ++ raise HeaderWriteError( ++ f'folded header contains newline: {folded!r}') ++ self.write(folded) + # A blank line always separates headers from body + self.write(self._NL) + +diff --git a/lib-python/3/test/test_email/test_generator.py b/lib-python/3/test/test_email/test_generator.py +index 89e7ede..d29400f 100644 +--- a/lib-python/3/test/test_email/test_generator.py ++++ b/lib-python/3/test/test_email/test_generator.py +@@ -6,6 +6,7 @@ from email.message import EmailMessage + from email.generator import Generator, BytesGenerator + from email.headerregistry import Address + from email import policy ++import email.errors + from test.test_email import TestEmailBase, parameterize + + +@@ -216,6 +217,44 @@ class TestGeneratorBase: + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + ++ def test_keep_encoded_newlines(self): ++ msg = self.msgmaker(self.typ(textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """))) ++ expected = textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """) ++ s = self.ioclass() ++ g = self.genclass(s, policy=self.policy.clone(max_line_length=80)) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), self.typ(expected)) ++ ++ def test_keep_long_encoded_newlines(self): ++ msg = self.msgmaker(self.typ(textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """))) ++ expected = textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject ++ =?utf-8?q?=0A?=Bcc: ++ injection@example.com ++ ++ None ++ """) ++ s = self.ioclass() ++ g = self.genclass(s, policy=self.policy.clone(max_line_length=30)) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), self.typ(expected)) ++ + + class TestGenerator(TestGeneratorBase, TestEmailBase): + +@@ -224,6 +263,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase): + ioclass = io.StringIO + typ = str + ++ def test_verify_generated_headers(self): ++ """gh-121650: by default the generator prevents header injection""" ++ class LiteralHeader(str): ++ name = 'Header' ++ def fold(self, **kwargs): ++ return self ++ ++ for text in ( ++ 'Value\r\nBad Injection\r\n', ++ 'NoNewLine' ++ ): ++ with self.subTest(text=text): ++ message = message_from_string( ++ "Header: Value\r\n\r\nBody", ++ policy=self.policy, ++ ) ++ ++ del message['Header'] ++ message['Header'] = LiteralHeader(text) ++ ++ with self.assertRaises(email.errors.HeaderWriteError): ++ message.as_string() ++ + + class TestBytesGenerator(TestGeneratorBase, TestEmailBase): + +diff --git a/lib-python/3/test/test_email/test_policy.py b/lib-python/3/test/test_email/test_policy.py +index e87c275..ff1ddf7 100644 +--- a/lib-python/3/test/test_email/test_policy.py ++++ b/lib-python/3/test/test_email/test_policy.py +@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase): + 'raise_on_defect': False, + 'mangle_from_': True, + 'message_factory': None, ++ 'verify_generated_headers': True, + } + # These default values are the ones set on email.policy.default. + # If any of these defaults change, the docs must be updated. +@@ -277,6 +278,31 @@ class PolicyAPITests(unittest.TestCase): + with self.assertRaises(email.errors.HeaderParseError): + policy.fold("Subject", subject) + ++ def test_verify_generated_headers(self): ++ """Turning protection off allows header injection""" ++ policy = email.policy.default.clone(verify_generated_headers=False) ++ for text in ( ++ 'Header: Value\r\nBad: Injection\r\n', ++ 'Header: NoNewLine' ++ ): ++ with self.subTest(text=text): ++ message = email.message_from_string( ++ "Header: Value\r\n\r\nBody", ++ policy=policy, ++ ) ++ class LiteralHeader(str): ++ name = 'Header' ++ def fold(self, **kwargs): ++ return self ++ ++ del message['Header'] ++ message['Header'] = LiteralHeader(text) ++ ++ self.assertEqual( ++ message.as_string(), ++ f"{text}\nBody", ++ ) ++ + # XXX: Need subclassing tests. + # For adding subclassed objects, make sure the usual rules apply (subclass + # wins), but that the order still works (right overrides left). diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-7592-http-cookie-quadratic-complexity pypy3-7.3.11+dfsg/debian/patches/CVE-2024-7592-http-cookie-quadratic-complexity --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-7592-http-cookie-quadratic-complexity 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-7592-http-cookie-quadratic-complexity 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,125 @@ +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Wed, 4 Sep 2024 17:49:40 +0200 +Subject: gh-123067: Fix quadratic complexity in parsing "-quoted cookie + values with backslashes (GH-123075) (#123107) + +This fixes CVE-2024-7592. +(cherry picked from commit 44e458357fca05ca0ae2658d62c8c595b048b5ef) + +Co-authored-by: Serhiy Storchaka + +Origin: cpython, https://github.com/python/cpython/commit/d662e2db2605515a767f88ad48096b8ac623c774 +--- + lib-python/3/http/cookies.py | 34 +++++++----------------------- + lib-python/3/test/test_http_cookies.py | 38 ++++++++++++++++++++++++++++++++++ + 2 files changed, 46 insertions(+), 26 deletions(-) + +diff --git a/lib-python/3/http/cookies.py b/lib-python/3/http/cookies.py +index 35ac2dc..2c1f021 100644 +--- a/lib-python/3/http/cookies.py ++++ b/lib-python/3/http/cookies.py +@@ -184,8 +184,13 @@ def _quote(str): + return '"' + str.translate(_Translator) + '"' + + +-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +-_QuotePatt = re.compile(r"[\\].") ++_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub ++ ++def _unquote_replace(m): ++ if m[1]: ++ return chr(int(m[1], 8)) ++ else: ++ return m[2] + + def _unquote(str): + # If there aren't any doublequotes, +@@ -205,30 +210,7 @@ def _unquote(str): + # \012 --> \n + # \" --> " + # +- i = 0 +- n = len(str) +- res = [] +- while 0 <= i < n: +- o_match = _OctalPatt.search(str, i) +- q_match = _QuotePatt.search(str, i) +- if not o_match and not q_match: # Neither matched +- res.append(str[i:]) +- break +- # else: +- j = k = -1 +- if o_match: +- j = o_match.start(0) +- if q_match: +- k = q_match.start(0) +- if q_match and (not o_match or k < j): # QuotePatt matched +- res.append(str[i:k]) +- res.append(str[k+1]) +- i = k + 2 +- else: # OctalPatt matched +- res.append(str[i:j]) +- res.append(chr(int(str[j+1:j+4], 8))) +- i = j + 4 +- return _nulljoin(res) ++ return _unquote_sub(_unquote_replace, str) + + # The _getdate() routine is used to set the expiration time in the cookie's HTTP + # header. By default, _getdate() returns the current time in the appropriate +diff --git a/lib-python/3/test/test_http_cookies.py b/lib-python/3/test/test_http_cookies.py +index 6072c7e..644e75c 100644 +--- a/lib-python/3/test/test_http_cookies.py ++++ b/lib-python/3/test/test_http_cookies.py +@@ -5,6 +5,7 @@ from test.support import run_unittest, run_doctest + import unittest + from http import cookies + import pickle ++from test import support + + + class CookieTests(unittest.TestCase): +@@ -58,6 +59,43 @@ class CookieTests(unittest.TestCase): + for k, v in sorted(case['dict'].items()): + self.assertEqual(C[k].value, v) + ++ def test_unquote(self): ++ cases = [ ++ (r'a="b=\""', 'b="'), ++ (r'a="b=\\"', 'b=\\'), ++ (r'a="b=\="', 'b=='), ++ (r'a="b=\n"', 'b=n'), ++ (r'a="b=\042"', 'b="'), ++ (r'a="b=\134"', 'b=\\'), ++ (r'a="b=\377"', 'b=\xff'), ++ (r'a="b=\400"', 'b=400'), ++ (r'a="b=\42"', 'b=42'), ++ (r'a="b=\\042"', 'b=\\042'), ++ (r'a="b=\\134"', 'b=\\134'), ++ (r'a="b=\\\""', 'b=\\"'), ++ (r'a="b=\\\042"', 'b=\\"'), ++ (r'a="b=\134\""', 'b=\\"'), ++ (r'a="b=\134\042"', 'b=\\"'), ++ ] ++ for encoded, decoded in cases: ++ with self.subTest(encoded): ++ C = cookies.SimpleCookie() ++ C.load(encoded) ++ self.assertEqual(C['a'].value, decoded) ++ ++ @support.requires_resource('cpu') ++ def test_unquote_large(self): ++ n = 10**6 ++ for encoded in r'\\', r'\134': ++ with self.subTest(encoded): ++ data = 'a="b=' + encoded*n + ';"' ++ C = cookies.SimpleCookie() ++ C.load(data) ++ value = C['a'].value ++ self.assertEqual(value[:3], 'b=\\') ++ self.assertEqual(value[-2:], '\\;') ++ self.assertEqual(len(value), n + 3) ++ + def test_load(self): + C = cookies.SimpleCookie() + C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme') diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-8088-zipfile-loop-dos pypy3-7.3.11+dfsg/debian/patches/CVE-2024-8088-zipfile-loop-dos --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-8088-zipfile-loop-dos 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-8088-zipfile-loop-dos 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,134 @@ +From: "Jason R. Coombs" +Date: Wed, 4 Sep 2024 11:46:48 -0400 +Subject: gh-123270: Replaced SanitizedNames with a more surgical fix. + (GH-123354) (#123432) + +Applies changes from zipp 3.20.1 and jaraco/zippGH-124 +(cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6) +(cherry picked from commit 17b77bb41409259bad1cd6c74761c18b6ab1e860) + +Co-authored-by: Jason R. Coombs + +Origin: cpython, https://github.com/python/cpython/commit/962055268ed4f2ca1d717bfc8b6385de50a23ab7 +--- + lib-python/3/test/test_zipfile.py | 77 +++++++++++++++++++++++++++++++++++++++ + lib-python/3/zipfile.py | 9 ++++- + 2 files changed, 84 insertions(+), 2 deletions(-) + +diff --git a/lib-python/3/test/test_zipfile.py b/lib-python/3/test/test_zipfile.py +index f09c82c..26cf5fa 100644 +--- a/lib-python/3/test/test_zipfile.py ++++ b/lib-python/3/test/test_zipfile.py +@@ -3056,6 +3056,83 @@ class TestPath(unittest.TestCase): + data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] + zipfile.CompleteDirs._implied_dirs(data) + ++ def test_malformed_paths(self): ++ """ ++ Path should handle malformed paths gracefully. ++ ++ Paths with leading slashes are not visible. ++ ++ Paths with dots are treated like regular files. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr("/one-slash.txt", b"content") ++ zf.writestr("//two-slash.txt", b"content") ++ zf.writestr("../parent.txt", b"content") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ assert list(map(str, root.iterdir())) == ['../'] ++ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' ++ ++ def test_unsupported_names(self): ++ """ ++ Path segments with special characters are readable. ++ ++ On some platforms or file systems, characters like ++ ``:`` and ``?`` are not allowed, but they are valid ++ in the zip file. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr("path?", b"content") ++ zf.writestr("V: NMS.flac", b"fLaC...") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ contents = root.iterdir() ++ assert next(contents).name == 'path?' ++ assert next(contents).name == 'V: NMS.flac' ++ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." ++ ++ def test_backslash_not_separator(self): ++ """ ++ In a zip file, backslashes are not separators. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ (first,) = root.iterdir() ++ assert not first.is_dir() ++ assert first.name == 'foo\\bar' ++ ++ ++class DirtyZipInfo(zipfile.ZipInfo): ++ """ ++ Bypass name sanitization. ++ """ ++ ++ def __init__(self, filename, *args, **kwargs): ++ super().__init__(filename, *args, **kwargs) ++ self.filename = filename ++ ++ @classmethod ++ def for_name(cls, name, archive): ++ """ ++ Construct the same way that ZipFile.writestr does. ++ ++ TODO: extract this functionality and re-use ++ """ ++ self = cls(filename=name, date_time=time.localtime(time.time())[:6]) ++ self.compress_type = archive.compression ++ self.compress_level = archive.compresslevel ++ if self.filename.endswith('/'): # pragma: no cover ++ self.external_attr = 0o40775 << 16 # drwxrwxr-x ++ self.external_attr |= 0x10 # MS-DOS directory flag ++ else: ++ self.external_attr = 0o600 << 16 # ?rw------- ++ return self ++ + + if __name__ == "__main__": + unittest.main() +diff --git a/lib-python/3/zipfile.py b/lib-python/3/zipfile.py +index 95f95ee..68d643d 100644 +--- a/lib-python/3/zipfile.py ++++ b/lib-python/3/zipfile.py +@@ -2146,7 +2146,7 @@ def _parents(path): + def _ancestry(path): + """ + Given a path with elements separated by +- posixpath.sep, generate all elements of that path ++ posixpath.sep, generate all elements of that path. + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] +@@ -2158,9 +2158,14 @@ def _ancestry(path): + ['b'] + >>> list(_ancestry('')) + [] ++ ++ Multiple separators are treated like a single. ++ ++ >>> list(_ancestry('//b//d///f//')) ++ ['//b//d///f', '//b//d', '//b'] + """ + path = path.rstrip(posixpath.sep) +- while path and path != posixpath.sep: ++ while path.rstrip(posixpath.sep): + yield path + path, tail = posixpath.split(path) + diff -Nru pypy3-7.3.11+dfsg/debian/patches/CVE-2024-9287-venv-activation-templates pypy3-7.3.11+dfsg/debian/patches/CVE-2024-9287-venv-activation-templates --- pypy3-7.3.11+dfsg/debian/patches/CVE-2024-9287-venv-activation-templates 1970-01-01 00:00:00.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/CVE-2024-9287-venv-activation-templates 2024-12-23 20:22:45.000000000 +0000 @@ -0,0 +1,289 @@ +From: Victor Stinner +Date: Mon, 4 Nov 2024 16:16:17 +0100 +Subject: gh-124651: Quote template strings in `venv` activation scripts + (GH-124712) (GH-126185) (GH-126269) (GH-126300) + +(cherry picked from commit ae961ae94bf19c8f8c7fbea3d1c25cc55ce8ae97) + +Origin: https://github.com/python/cpython/pull/126300 +--- + lib-python/3/test/test_venv.py | 81 +++++++++++++++++++++++++++ + lib-python/3/venv/__init__.py | 42 ++++++++++++-- + lib-python/3/venv/scripts/common/activate | 6 +- + lib-python/3/venv/scripts/nt/activate.bat | 4 +- + lib-python/3/venv/scripts/posix/activate.csh | 6 +- + lib-python/3/venv/scripts/posix/activate.fish | 6 +- + 6 files changed, 129 insertions(+), 16 deletions(-) + +diff --git a/lib-python/3/test/test_venv.py b/lib-python/3/test/test_venv.py +index 30c608e..5f7f2b2 100644 +--- a/lib-python/3/test/test_venv.py ++++ b/lib-python/3/test/test_venv.py +@@ -14,6 +14,7 @@ import struct + import subprocess + import sys + import tempfile ++import shlex + from test.support import (captured_stdout, captured_stderr, requires_zlib, + can_symlink, EnvironmentVarGuard, rmtree, + import_module, +@@ -85,6 +86,10 @@ class BaseTest(unittest.TestCase): + result = f.read() + return result + ++ def assertEndsWith(self, string, tail): ++ if not string.endswith(tail): ++ self.fail(f"String {string!r} does not end with {tail!r}") ++ + class BasicTest(BaseTest): + """Test venv module functionality.""" + +@@ -342,6 +347,82 @@ class BasicTest(BaseTest): + 'import sys; print(sys.executable)']) + self.assertEqual(out.strip(), envpy.encode()) + ++ # gh-124651: test quoted strings ++ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') ++ def test_special_chars_bash(self): ++ """ ++ Test that the template strings are quoted properly (bash) ++ """ ++ rmtree(self.env_dir) ++ bash = shutil.which('bash') ++ if bash is None: ++ self.skipTest('bash required for this test') ++ env_name = '"\';&&$e|\'"' ++ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) ++ builder = venv.EnvBuilder(clear=True) ++ builder.create(env_dir) ++ activate = os.path.join(env_dir, self.bindir, 'activate') ++ test_script = os.path.join(self.env_dir, 'test_special_chars.sh') ++ with open(test_script, "w") as f: ++ f.write(f'source {shlex.quote(activate)}\n' ++ 'python -c \'import sys; print(sys.executable)\'\n' ++ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' ++ 'deactivate\n') ++ out, err = check_output([bash, test_script]) ++ lines = out.splitlines() ++ self.assertTrue(env_name.encode() in lines[0]) ++ self.assertEndsWith(lines[1], env_name.encode()) ++ ++ # gh-124651: test quoted strings ++ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') ++ def test_special_chars_csh(self): ++ """ ++ Test that the template strings are quoted properly (csh) ++ """ ++ rmtree(self.env_dir) ++ csh = shutil.which('tcsh') or shutil.which('csh') ++ if csh is None: ++ self.skipTest('csh required for this test') ++ env_name = '"\';&&$e|\'"' ++ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) ++ builder = venv.EnvBuilder(clear=True) ++ builder.create(env_dir) ++ activate = os.path.join(env_dir, self.bindir, 'activate.csh') ++ test_script = os.path.join(self.env_dir, 'test_special_chars.csh') ++ with open(test_script, "w") as f: ++ f.write(f'source {shlex.quote(activate)}\n' ++ 'python -c \'import sys; print(sys.executable)\'\n' ++ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' ++ 'deactivate\n') ++ out, err = check_output([csh, test_script]) ++ lines = out.splitlines() ++ self.assertTrue(env_name.encode() in lines[0]) ++ self.assertEndsWith(lines[1], env_name.encode()) ++ ++ # gh-124651: test quoted strings on Windows ++ @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') ++ def test_special_chars_windows(self): ++ """ ++ Test that the template strings are quoted properly on Windows ++ """ ++ rmtree(self.env_dir) ++ env_name = "'&&^$e" ++ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) ++ builder = venv.EnvBuilder(clear=True) ++ builder.create(env_dir) ++ activate = os.path.join(env_dir, self.bindir, 'activate.bat') ++ test_batch = os.path.join(self.env_dir, 'test_special_chars.bat') ++ with open(test_batch, "w") as f: ++ f.write('@echo off\n' ++ f'"{activate}" & ' ++ f'{self.exe} -c "import sys; print(sys.executable)" & ' ++ f'{self.exe} -c "import os; print(os.environ[\'VIRTUAL_ENV\'])" & ' ++ 'deactivate') ++ out, err = check_output([test_batch]) ++ lines = out.splitlines() ++ self.assertTrue(env_name.encode() in lines[0]) ++ self.assertEndsWith(lines[1], env_name.encode()) ++ + @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') + def test_unicode_in_batch_file(self): + """ +diff --git a/lib-python/3/venv/__init__.py b/lib-python/3/venv/__init__.py +index 83cbf73..7128da3 100644 +--- a/lib-python/3/venv/__init__.py ++++ b/lib-python/3/venv/__init__.py +@@ -11,6 +11,7 @@ import subprocess + import sys + import sysconfig + import types ++import shlex + + + CORE_VENV_DEPS = ('pip', 'setuptools') +@@ -405,11 +406,41 @@ Failing command: {} + :param context: The information for the environment creation request + being processed. + """ +- text = text.replace('__VENV_DIR__', context.env_dir) +- text = text.replace('__VENV_NAME__', context.env_name) +- text = text.replace('__VENV_PROMPT__', context.prompt) +- text = text.replace('__VENV_BIN_NAME__', context.bin_name) +- text = text.replace('__VENV_PYTHON__', context.env_exe) ++ replacements = { ++ '__VENV_DIR__': context.env_dir, ++ '__VENV_NAME__': context.env_name, ++ '__VENV_PROMPT__': context.prompt, ++ '__VENV_BIN_NAME__': context.bin_name, ++ '__VENV_PYTHON__': context.env_exe, ++ } ++ ++ def quote_ps1(s): ++ """ ++ This should satisfy PowerShell quoting rules [1], unless the quoted ++ string is passed directly to Windows native commands [2]. ++ [1]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules ++ [2]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_parsing#passing-arguments-that-contain-quote-characters ++ """ ++ s = s.replace("'", "''") ++ return f"'{s}'" ++ ++ def quote_bat(s): ++ return s ++ ++ # gh-124651: need to quote the template strings properly ++ quote = shlex.quote ++ script_path = context.script_path ++ if script_path.endswith('.ps1'): ++ quote = quote_ps1 ++ elif script_path.endswith('.bat'): ++ quote = quote_bat ++ else: ++ # fallbacks to POSIX shell compliant quote ++ quote = shlex.quote ++ ++ replacements = {key: quote(s) for key, s in replacements.items()} ++ for key, quoted in replacements.items(): ++ text = text.replace(key, quoted) + return text + + def install_scripts(self, context, path): +@@ -449,6 +480,7 @@ Failing command: {} + with open(srcfile, 'rb') as f: + data = f.read() + if not srcfile.endswith(('.exe', '.pdb')): ++ context.script_path = srcfile + try: + data = data.decode('utf-8') + data = self.replace_variables(data, context) +diff --git a/lib-python/3/venv/scripts/common/activate b/lib-python/3/venv/scripts/common/activate +index 45af353..1d116ca 100644 +--- a/lib-python/3/venv/scripts/common/activate ++++ b/lib-python/3/venv/scripts/common/activate +@@ -37,11 +37,11 @@ deactivate () { + # unset irrelevant variables + deactivate nondestructive + +-VIRTUAL_ENV="__VENV_DIR__" ++VIRTUAL_ENV=__VENV_DIR__ + export VIRTUAL_ENV + + _OLD_VIRTUAL_PATH="$PATH" +-PATH="$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" ++PATH="$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" + export PATH + + # unset PYTHONHOME if set +@@ -54,7 +54,7 @@ fi + + if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" +- PS1="__VENV_PROMPT__${PS1:-}" ++ PS1=__VENV_PROMPT__"${PS1:-}" + export PS1 + fi + +diff --git a/lib-python/3/venv/scripts/nt/activate.bat b/lib-python/3/venv/scripts/nt/activate.bat +index f61413e..11210c7 100644 +--- a/lib-python/3/venv/scripts/nt/activate.bat ++++ b/lib-python/3/venv/scripts/nt/activate.bat +@@ -8,7 +8,7 @@ if defined _OLD_CODEPAGE ( + "%SystemRoot%\System32\chcp.com" 65001 > nul + ) + +-set VIRTUAL_ENV=__VENV_DIR__ ++set "VIRTUAL_ENV=__VENV_DIR__" + + if not defined PROMPT set PROMPT=$P$G + +@@ -24,7 +24,7 @@ set PYTHONHOME= + if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH% + if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH% + +-set PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH% ++set "PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%" + + :END + if defined _OLD_CODEPAGE ( +diff --git a/lib-python/3/venv/scripts/posix/activate.csh b/lib-python/3/venv/scripts/posix/activate.csh +index 68a0dc7..5130113 100644 +--- a/lib-python/3/venv/scripts/posix/activate.csh ++++ b/lib-python/3/venv/scripts/posix/activate.csh +@@ -8,16 +8,16 @@ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PA + # Unset irrelevant variables. + deactivate nondestructive + +-setenv VIRTUAL_ENV "__VENV_DIR__" ++setenv VIRTUAL_ENV __VENV_DIR__ + + set _OLD_VIRTUAL_PATH="$PATH" +-setenv PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" ++setenv PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" + + + set _OLD_VIRTUAL_PROMPT="$prompt" + + if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then +- set prompt = "__VENV_PROMPT__$prompt" ++ set prompt = __VENV_PROMPT__"$prompt" + endif + + alias pydoc python -m pydoc +diff --git a/lib-python/3/venv/scripts/posix/activate.fish b/lib-python/3/venv/scripts/posix/activate.fish +index 54b9ea5..62ab531 100644 +--- a/lib-python/3/venv/scripts/posix/activate.fish ++++ b/lib-python/3/venv/scripts/posix/activate.fish +@@ -29,10 +29,10 @@ end + # Unset irrelevant variables. + deactivate nondestructive + +-set -gx VIRTUAL_ENV "__VENV_DIR__" ++set -gx VIRTUAL_ENV __VENV_DIR__ + + set -gx _OLD_VIRTUAL_PATH $PATH +-set -gx PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__" $PATH ++set -gx PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__ $PATH + + # Unset PYTHONHOME if set. + if set -q PYTHONHOME +@@ -52,7 +52,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + set -l old_status $status + + # Output the venv prompt; color taken from the blue of the Python logo. +- printf "%s%s%s" (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal) ++ printf "%s%s%s" (set_color 4B8BBE) __VENV_PROMPT__ (set_color normal) + + # Restore the return status of the previous command. + echo "exit $old_status" | . diff -Nru pypy3-7.3.11+dfsg/debian/patches/series pypy3-7.3.11+dfsg/debian/patches/series --- pypy3-7.3.11+dfsg/debian/patches/series 2024-05-02 00:39:38.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/patches/series 2024-12-23 20:22:45.000000000 +0000 @@ -26,3 +26,11 @@ CVE-2023-40217-test-reliability.patch CVE-2023-6597-tempfile-symlink.patch CVE-2024-0450-zipfile-quoted-overlap.patch +CVE-2023-27043-email-parseaddr +CVE-2024-9287-venv-activation-templates +CVE-2024-4032-private-ip-ranges +CVE-2024-6232-tarfile-redos +CVE-2024-8088-zipfile-loop-dos +CVE-2024-6923-encode-email-header-newlines +CVE-2024-7592-http-cookie-quadratic-complexity +CVE-2024-11168-urllib-parse-bracketed-names diff -Nru pypy3-7.3.11+dfsg/debian/rules pypy3-7.3.11+dfsg/debian/rules --- pypy3-7.3.11+dfsg/debian/rules 2024-05-02 00:39:38.000000000 +0000 +++ pypy3-7.3.11+dfsg/debian/rules 2024-12-23 20:22:45.000000000 +0000 @@ -61,6 +61,10 @@ override_dh_auto_install: debian/scripts/gen-backend-versions.py +override_dh_auto_clean: + sed 's/^@/#/' cpython27/Makefile.pre.in | $(MAKE) -C cpython27 -f - srcdir=. distclean + dh_auto_clean + override_dh_fixperms-arch: debian/scripts/cleanup-lib.sh pypy3-lib find debian/pypy3-tk \( -name '*.pyc' -o -name '__pycache__' \) -delete