Version in base suite: 1.26.0-4 Base version: pypdf2_1.26.0-4 Target version: pypdf2_1.26.0-4+deb11u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/p/pypdf2/pypdf2_1.26.0-4.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/p/pypdf2/pypdf2_1.26.0-4+deb11u1.dsc changelog | 11 + patches/0001-MAINT-Quadratic-runtime-while-parsing-reduced-to-lin.patch | 50 +++++++ patches/CVE-2022-24859.patch | 63 ++++++++++ patches/series | 2 4 files changed, 126 insertions(+) diff -Nru pypdf2-1.26.0/debian/changelog pypdf2-1.26.0/debian/changelog --- pypdf2-1.26.0/debian/changelog 2020-01-19 08:08:58.000000000 +0000 +++ pypdf2-1.26.0/debian/changelog 2024-02-11 18:50:22.000000000 +0000 @@ -1,3 +1,14 @@ +pypdf2 (1.26.0-4+deb11u1) bullseye; urgency=medium + + * Forward-port CVE fixes by LTS team + - CVE-2023-36810: Quadratic runtime with malformed PDF missing xref marker. + - Fix CVE-2022-24859: + Sebastian Krause discovered that manipulated inline images can force + PyPDF2, a pure Python PDF library, into an infinite loop, if a + maliciously crafted PDF file is processed. + + -- Scott Kitterman Sun, 11 Feb 2024 13:50:22 -0500 + pypdf2 (1.26.0-4) unstable; urgency=medium * Remove Python 2 from build dependencies (closes: #937505). diff -Nru pypdf2-1.26.0/debian/patches/0001-MAINT-Quadratic-runtime-while-parsing-reduced-to-lin.patch pypdf2-1.26.0/debian/patches/0001-MAINT-Quadratic-runtime-while-parsing-reduced-to-lin.patch --- pypdf2-1.26.0/debian/patches/0001-MAINT-Quadratic-runtime-while-parsing-reduced-to-lin.patch 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.26.0/debian/patches/0001-MAINT-Quadratic-runtime-while-parsing-reduced-to-lin.patch 2024-02-11 18:49:50.000000000 +0000 @@ -0,0 +1,50 @@ +From 82ee233ea82a40c626e95a191fe2d52c745db870 Mon Sep 17 00:00:00 2001 +From: dsk7 +Date: Sat, 23 Apr 2022 19:12:13 +0200 +Subject: MAINT: Quadratic runtime while parsing reduced to linear (#808) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When the PdfFileReader tries to find the xref marker, the readNextEndLine methods builds a so called line by reading byte-for-byte. Every time a new byte is read, it is concatenated with the currently read line. This leads to quadratic runtime O(n²) behavior as Python strings (also byte-strings) are immutable and have to be copied where n is the size of the file. +For files where the xref marker can not be found at the end this takes a enormous amount of time: + +* 1mb of zeros at the end: 45.54 seconds +* 2mb of zeros at the end: 357.04 seconds +(measured on a laptop made in 2015) + +This pull request changes the relevant section of the code to become linear runtime O(n), leading to a run time of less then a second for both cases mentioned above. Furthermore this PR adds a regression test. +--- + PyPDF2/pdf.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py +index 9979414..8b355e0 100644 +--- a/PyPDF2/pdf.py ++++ b/PyPDF2/pdf.py +@@ -1930,7 +1930,7 @@ class PdfFileReader(object): + def readNextEndLine(self, stream): + debug = False + if debug: print(">>readNextEndLine") +- line = b_("") ++ line_parts = [] + while True: + # Prevent infinite loops in malformed PDFs + if stream.tell() == 0: +@@ -1957,10 +1957,10 @@ class PdfFileReader(object): + break + else: + if debug: print(" x is neither") +- line = x + line +- if debug: print((" RNEL line:", line)) ++ line_parts.append(x) + if debug: print("leaving RNEL") +- return line ++ line_parts.reverse() ++ return b"".join(line_parts) + + def decrypt(self, password): + """ +-- +2.30.2 + diff -Nru pypdf2-1.26.0/debian/patches/CVE-2022-24859.patch pypdf2-1.26.0/debian/patches/CVE-2022-24859.patch --- pypdf2-1.26.0/debian/patches/CVE-2022-24859.patch 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.26.0/debian/patches/CVE-2022-24859.patch 2024-02-11 18:49:50.000000000 +0000 @@ -0,0 +1,63 @@ +From: Markus Koschany +Date: Fri, 3 Jun 2022 08:12:01 +0200 +Subject: CVE-2022-24859 + +Bug-Debian: https://bugs.debian.org/1009879 +Origin: https://github.com/py-pdf/PyPDF2/pull/740 +--- + PyPDF2/pdf.py | 32 ++++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 10 deletions(-) + +diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py +index 9979414..b55dfba 100644 +--- a/PyPDF2/pdf.py ++++ b/PyPDF2/pdf.py +@@ -2723,11 +2723,25 @@ class ContentStream(DecodedStreamObject): + # left at beginning of ID + tmp = stream.read(3) + assert tmp[:2] == b_("ID") +- data = b_("") ++ data = BytesIO() ++ # Read the inline image, while checking for EI (End Image) operator. + while True: +- # Read the inline image, while checking for EI (End Image) operator. +- tok = stream.read(1) +- if tok == b_("E"): ++ # Read 8 kB at a time and check if the chunk contains the E operator. ++ buf = stream.read(8192) ++ # We have reached the end of the stream, but haven't found the EI operator. ++ if not buf: ++ raise utils.PdfReadError("Unexpected end of stream") ++ loc = buf.find(b_("E")) ++ ++ if loc == -1: ++ data.write(buf) ++ else: ++ # Write out everything before the E. ++ data.write(buf[0:loc]) ++ ++ # Seek back in the stream to read the E next. ++ stream.seek(loc - len(buf), 1) ++ tok = stream.read(1) + # Check for End Image + tok2 = stream.read(1) + if tok2 == b_("I"): +@@ -2744,14 +2758,12 @@ class ContentStream(DecodedStreamObject): + stream.seek(-1, 1) + break + else: +- stream.seek(-1,1) +- data += info ++ stream.seek(-1, 1) ++ data.write(info) + else: + stream.seek(-1, 1) +- data += tok +- else: +- data += tok +- return {"settings": settings, "data": data} ++ data.write(tok) ++ return {"settings": settings, "data": data.getvalue()} + + def _getData(self): + newdata = BytesIO() diff -Nru pypdf2-1.26.0/debian/patches/series pypdf2-1.26.0/debian/patches/series --- pypdf2-1.26.0/debian/patches/series 2016-09-05 17:14:14.000000000 +0000 +++ pypdf2-1.26.0/debian/patches/series 2024-02-11 18:49:50.000000000 +0000 @@ -1 +1,3 @@ Prevent_infinite_loop_in_readObject.patch +CVE-2022-24859.patch +0001-MAINT-Quadratic-runtime-while-parsing-reduced-to-lin.patch