Version in base suite: 5.4.0-1 Base version: python-internetarchive_5.4.0-1 Target version: python-internetarchive_5.4.0-2~deb13u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/p/python-internetarchive/python-internetarchive_5.4.0-1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/p/python-internetarchive/python-internetarchive_5.4.0-2~deb13u1.dsc changelog | 8 patches/CVE-2025-58438/0001-Add-robust-cross-platform-filename-sanitization-and-.patch | 190 ++++++++++ patches/CVE-2025-58438/0002-Encode-in-sanitize_filename_windows-to-ensure-the-en.patch | 36 + patches/CVE-2025-58438/0003-Added-directory-traversal-attack-check-to-download.patch | 55 ++ patches/CVE-2025-58438/0004-fixed-typo.patch | 25 + patches/CVE-2025-58438/0005-Added-tests-for-file-sanitization.patch | 140 +++++++ patches/CVE-2025-58438/0006-Added-tests-for-file-sanitization.patch | 86 ++++ patches/series | 6 8 files changed, 546 insertions(+) gpgv: Signature made Tue Oct 21 18:48:59 2025 UTC gpgv: using RSA key B6E62F3D12AC38495C0DA90510C293B6C37C4E36 gpgv: Note: signatures using the SHA1 algorithm are rejected gpgv: Can't check signature: Bad public key dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmpsza0n8lu/python-internetarchive_5.4.0-2~deb13u1.dsc: no acceptable signature found diff -Nru python-internetarchive-5.4.0/debian/changelog python-internetarchive-5.4.0/debian/changelog --- python-internetarchive-5.4.0/debian/changelog 2025-05-06 19:56:02.000000000 +0000 +++ python-internetarchive-5.4.0/debian/changelog 2025-09-09 17:26:36.000000000 +0000 @@ -1,3 +1,11 @@ +python-internetarchive (5.4.0-2~deb13u1) trixie-security; urgency=high + + * Non-maintainer upload by the Security Team. + * backport fix for directory transversal issue (Closes: #1114635, + CVE-2025-58438) + + -- Antoine Beaupré Tue, 09 Sep 2025 13:26:36 -0400 + python-internetarchive (5.4.0-1) unstable; urgency=medium * new upstream release diff -Nru python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0001-Add-robust-cross-platform-filename-sanitization-and-.patch python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0001-Add-robust-cross-platform-filename-sanitization-and-.patch --- python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0001-Add-robust-cross-platform-filename-sanitization-and-.patch 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0001-Add-robust-cross-platform-filename-sanitization-and-.patch 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,190 @@ +From d324f30f0d307a4054912f77f9df635d0091248e Mon Sep 17 00:00:00 2001 +From: jake +Date: Tue, 19 Aug 2025 10:15:05 -0700 +Subject: [PATCH 1/6] Add robust cross-platform filename sanitization and + unsanitization utilities + +- Sanitize filenames for Windows and POSIX systems with percent-encoding +- Handle invalid characters and trailing spaces/dots on Windows +- Optionally avoid colon encoding for macOS compatibility +- Provide unsanitize function to decode percent-encoded sequences +- Add helper to sanitize only filename part of a full file path +- Include warnings when sanitization or decoding modifies input +--- + internetarchive/files.py | 8 +++ + internetarchive/utils.py | 134 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 142 insertions(+) + +diff --git a/internetarchive/files.py b/internetarchive/files.py +index 84a8294..8ac216b 100644 +--- a/internetarchive/files.py ++++ b/internetarchive/files.py +@@ -233,6 +233,14 @@ def download( # noqa: C901,PLR0911,PLR0912,PLR0915 + self.item.session.mount_http_adapter(max_retries=retries) + file_path = file_path or self.name + ++ # Sanitize only the filename portion of file_path to prevent invalid characters ++ # and potential directory traversal issues. ++ # We use `utils.sanitize_filepath` instead of `utils.sanitize_filename` because: ++ # - `sanitize_filepath` preserves the directory path intact (does not encode path separators), ++ # - allowing `os.makedirs` to create intermediate directories correctly, ++ # - while still sanitizing just the filename to ensure it is safe for filesystem use. ++ file_path = utils.sanitize_filepath(file_path) ++ + if destdir: + if return_responses is not True: + try: +diff --git a/internetarchive/utils.py b/internetarchive/utils.py +index 3f22711..b2c07cf 100644 +--- a/internetarchive/utils.py ++++ b/internetarchive/utils.py +@@ -29,8 +29,10 @@ + + import hashlib + import os ++import platform + import re + import sys ++import warnings + from collections.abc import Mapping + from typing import Iterable + from xml.dom.minidom import parseString +@@ -464,3 +466,135 @@ def is_valid_email(email): + # Ensures the TLD has at least 2 characters + pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}$' + return re.match(pattern, email) is not None ++ ++ ++def is_windows() -> bool: ++ return ( ++ platform.system().lower() == "windows" ++ or sys.platform.startswith("win") ++ ) ++ ++ ++def sanitize_filepath(filepath: str, avoid_colon: bool = False) -> str: ++ """ ++ Sanitizes only the filename part of a full file path, leaving the directory path intact. ++ ++ This is useful when you need to ensure the filename is safe for filesystem use ++ without modifying the directory structure. Typically used before creating files ++ or directories to prevent invalid filename characters. ++ ++ Args: ++ filepath (str): The full file path to sanitize. ++ avoid_colon (bool): If True, colon ':' in the filename will be percent-encoded ++ for macOS compatibility. Defaults to False. ++ ++ Returns: ++ str: The sanitized file path with the filename portion percent-encoded as needed. ++ """ ++ parent_dir = os.path.dirname(filepath) ++ filename = os.path.basename(filepath) ++ sanitized_filename = sanitize_filename(filename, avoid_colon) ++ return os.path.join(parent_dir, sanitized_filename) ++ ++ ++def sanitize_filename(name: str, avoid_colon: bool = False) -> str: ++ """ ++ Sanitizes a filename by replacing invalid characters with percent-encoded values. ++ This function is designed to be compatible with both Windows and POSIX systems. ++ ++ Args: ++ name (str): The original string to sanitize. ++ avoid_colon (bool): If True, colon ':' will be percent-encoded. ++ ++ Returns: ++ str: A sanitized version of the filename. ++ """ ++ original = name ++ if is_windows(): ++ sanitized = sanitize_filename_windows(name) ++ else: ++ sanitized = sanitize_filename_posix(name, avoid_colon) ++ ++ if sanitized != original: ++ warnings.warn( ++ f"Filename sanitized: original='{original}' sanitized='{sanitized}'", ++ UserWarning, ++ stacklevel=2 ++ ) ++ ++ return sanitized ++ ++ ++def unsanitize_filename(name: str) -> str: ++ """ ++ Reverses percent-encoding of the form %XX back to original characters. ++ Works for filenames sanitized by sanitize_filename (Windows or POSIX). ++ ++ Args: ++ name (str): Sanitized filename string with %XX encodings. ++ ++ Returns: ++ str: Original filename with all %XX sequences decoded. ++ """ ++ if '%' in name: ++ if re.search(r'%[0-9A-Fa-f]{2}', name): ++ warnings.warn( ++ "Filename contains percent-encoded sequences that will be decoded.", ++ UserWarning, ++ stacklevel=2 ++ ) ++ def decode_match(match): ++ hex_value = match.group(1) ++ return chr(int(hex_value, 16)) ++ ++ return re.sub(r'%([0-9A-Fa-f]{2})', decode_match, name) ++ ++ ++def sanitize_filename_windows(name: str) -> str: ++ r""" ++ Replaces Windows-invalid filename characters with percent-encoded values. ++ Characters replaced: < > : " / \ | ? * ++ ++ Args: ++ name (str): The original string. ++ ++ Returns: ++ str: A sanitized version safe for filesystem use. ++ """ ++ invalid_chars = r'[<>:"/\\|?*\x00-\x1F]' ++ ++ def encode(char): ++ return f'%{ord(char.group()):02X}' ++ ++ # Replace invalid characters ++ name = re.sub(invalid_chars, encode, name) ++ ++ # Remove trailing dots or spaces (not allowed in Windows filenames) ++ return name.rstrip(' .') ++ ++ ++def sanitize_filename_posix(name: str, avoid_colon: bool = False) -> str: ++ """ ++ Sanitizes filenames for Linux, BSD, and Unix-like systems. ++ ++ - Percent-encodes forward slash '/' (always) ++ - Optionally percent-encodes colon ':' for macOS compatibility ++ ++ Args: ++ name (str): Original filename string. ++ avoid_colon (bool): If True, colon ':' will be encoded. ++ ++ Returns: ++ str: Sanitized filename safe for POSIX systems. ++ """ ++ # Build regex pattern dynamically ++ chars_to_encode = r'/' ++ if avoid_colon: ++ chars_to_encode += ':' ++ ++ pattern = f'[{re.escape(chars_to_encode)}]' ++ ++ def encode_char(match): ++ return f'%{ord(match.group()):02X}' ++ ++ return re.sub(pattern, encode_char, name) +-- +2.47.2 + diff -Nru python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0002-Encode-in-sanitize_filename_windows-to-ensure-the-en.patch python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0002-Encode-in-sanitize_filename_windows-to-ensure-the-en.patch --- python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0002-Encode-in-sanitize_filename_windows-to-ensure-the-en.patch 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0002-Encode-in-sanitize_filename_windows-to-ensure-the-en.patch 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,36 @@ +From eceef89d001fbf0daec330f3fcf396a05b58337e Mon Sep 17 00:00:00 2001 +From: jake +Date: Thu, 4 Sep 2025 10:55:32 -0700 +Subject: [PATCH 2/6] Encode % in sanitize_filename_windows to ensure the + encoding is reliably reversible + +--- + internetarchive/utils.py | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/internetarchive/utils.py b/internetarchive/utils.py +index b2c07cf..58677a4 100644 +--- a/internetarchive/utils.py ++++ b/internetarchive/utils.py +@@ -553,7 +553,7 @@ def decode_match(match): + def sanitize_filename_windows(name: str) -> str: + r""" + Replaces Windows-invalid filename characters with percent-encoded values. +- Characters replaced: < > : " / \ | ? * ++ Characters replaced: < > : " / \ | ? * % + + Args: + name (str): The original string. +@@ -561,7 +561,8 @@ def sanitize_filename_windows(name: str) -> str: + Returns: + str: A sanitized version safe for filesystem use. + """ +- invalid_chars = r'[<>:"/\\|?*\x00-\x1F]' ++ # Encode `%` so that it's possible to round-trip (i.e. via `unsanitize_filename`) ++ invalid_chars = r'[<>:"/\\|?*\x00-\x1F]%' + + def encode(char): + return f'%{ord(char.group()):02X}' +-- +2.47.2 + diff -Nru python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0003-Added-directory-traversal-attack-check-to-download.patch python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0003-Added-directory-traversal-attack-check-to-download.patch --- python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0003-Added-directory-traversal-attack-check-to-download.patch 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0003-Added-directory-traversal-attack-check-to-download.patch 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,55 @@ +From d583bd558f155c9494c50033cd6365b10840b014 Mon Sep 17 00:00:00 2001 +From: jake +Date: Thu, 4 Sep 2025 11:07:29 -0700 +Subject: [PATCH 3/6] Added directory traversal attack check to download + +--- + internetarchive/files.py | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/internetarchive/files.py b/internetarchive/files.py +index 8ac216b..55eebca 100644 +--- a/internetarchive/files.py ++++ b/internetarchive/files.py +@@ -29,6 +29,7 @@ + import sys + from contextlib import nullcontext, suppress + from email.utils import parsedate_to_datetime ++from pathlib import Path + from time import sleep + from urllib.parse import quote + +@@ -233,8 +234,8 @@ def download( # noqa: C901,PLR0911,PLR0912,PLR0915 + self.item.session.mount_http_adapter(max_retries=retries) + file_path = file_path or self.name + +- # Sanitize only the filename portion of file_path to prevent invalid characters +- # and potential directory traversal issues. ++ # Critical security check: Sanitize only the filename portion of file_path to ++ # prevent invalid characters and potential directory traversal issues. + # We use `utils.sanitize_filepath` instead of `utils.sanitize_filename` because: + # - `sanitize_filepath` preserves the directory path intact (does not encode path separators), + # - allowing `os.makedirs` to create intermediate directories correctly, +@@ -251,6 +252,19 @@ def download( # noqa: C901,PLR0911,PLR0912,PLR0915 + raise OSError(f'{destdir} is not a directory!') + file_path = os.path.join(destdir, file_path) + ++ # Critical security check: Prevent directory traversal attacks by ensuring ++ # the download path doesn't escape the target directory using path resolution ++ # and relative path validation. This protects against malicious filenames ++ # containing ../ sequences or other path manipulation attempts. ++ try: ++ # Resolve both paths to handle symlinks and absolute paths ++ target_path = Path(file_path).resolve() ++ base_dir = Path(destdir).resolve() if destdir else Path.cwd().resolve() ++ # Ensure the target path is relative to base directory ++ target_path.relative_to(base_dir) ++ except ValueError: ++ raise ValueError(f"Download path {file_path} is outside target directory {base_dir}") ++ + parent_dir = os.path.dirname(file_path) + + # Check if we should skip... +-- +2.47.2 + diff -Nru python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0004-fixed-typo.patch python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0004-fixed-typo.patch --- python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0004-fixed-typo.patch 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0004-fixed-typo.patch 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,25 @@ +From d05d2bb9706557eaa7d80e08215ffff7dd4229b2 Mon Sep 17 00:00:00 2001 +From: jake +Date: Thu, 4 Sep 2025 11:18:08 -0700 +Subject: [PATCH 4/6] fixed typo + +--- + internetarchive/utils.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/internetarchive/utils.py b/internetarchive/utils.py +index 58677a4..1a1ef40 100644 +--- a/internetarchive/utils.py ++++ b/internetarchive/utils.py +@@ -562,7 +562,7 @@ def sanitize_filename_windows(name: str) -> str: + str: A sanitized version safe for filesystem use. + """ + # Encode `%` so that it's possible to round-trip (i.e. via `unsanitize_filename`) +- invalid_chars = r'[<>:"/\\|?*\x00-\x1F]%' ++ invalid_chars = r'[<>:"/\\|?*\x00-\x1F%]' + + def encode(char): + return f'%{ord(char.group()):02X}' +-- +2.47.2 + diff -Nru python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0005-Added-tests-for-file-sanitization.patch python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0005-Added-tests-for-file-sanitization.patch --- python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0005-Added-tests-for-file-sanitization.patch 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0005-Added-tests-for-file-sanitization.patch 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,140 @@ +From e676fc510cc8ff7f3ab3ccb79de795e8c474874a Mon Sep 17 00:00:00 2001 +From: jake +Date: Thu, 4 Sep 2025 12:14:31 -0700 +Subject: [PATCH 5/6] Added tests for file sanitization + +--- + tests/test_item.py | 13 ------- + tests/test_utils.py | 90 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 90 insertions(+), 13 deletions(-) + +diff --git a/tests/test_item.py b/tests/test_item.py +index ddb076a..939bfdd 100644 +--- a/tests/test_item.py ++++ b/tests/test_item.py +@@ -234,19 +234,6 @@ def test_download_checksum(tmpdir, caplog): + assert 'nasa_meta.xml, file already exists based on checksum.' in caplog.text + + +-def test_download_destdir(tmpdir, nasa_item): +- tmpdir.chdir() +- with IaRequestsMock() as rsps: +- rsps.add(responses.GET, DOWNLOAD_URL_RE, +- body='new destdir', +- adding_headers=EXPECTED_LAST_MOD_HEADER) +- dest = os.path.join(str(tmpdir), 'new destdir') +- nasa_item.download(files='nasa_meta.xml', destdir=dest) +- assert 'nasa' in os.listdir(dest) +- with open(os.path.join(dest, 'nasa/nasa_meta.xml')) as fh: +- assert fh.read() == 'new destdir' +- +- + def test_download_no_directory(tmpdir, nasa_item): + url_re = re.compile(f'{PROTOCOL}//archive.org/download/.*') + tmpdir.chdir() +diff --git a/tests/test_utils.py b/tests/test_utils.py +index 298305f..5867cfd 100644 +--- a/tests/test_utils.py ++++ b/tests/test_utils.py +@@ -1,4 +1,8 @@ + import string ++import warnings ++from unittest.mock import patch ++ ++import pytest + + import internetarchive.utils + from tests.conftest import NASA_METADATA_PATH, IaRequestsMock +@@ -95,3 +99,89 @@ def test_is_valid_metadata_key(): + + for metadata_key in invalid: + assert not internetarchive.utils.is_valid_metadata_key(metadata_key) ++ ++ ++def test_is_windows(): ++ with patch('platform.system', return_value='Windows'), \ ++ patch('sys.platform', 'win32'): ++ assert internetarchive.utils.is_windows() is True ++ ++ with patch('platform.system', return_value='Linux'), \ ++ patch('sys.platform', 'linux'): ++ assert internetarchive.utils.is_windows() is False ++ ++def test_sanitize_filename_windows(): ++ test_cases = [ ++ ('file:name.txt', 'file%3Aname.txt'), ++ ('file%name.txt', 'file%25name.txt'), ++ ('con.txt', 'con.txt'), # Reserved name, but no invalid chars so unchanged ++ ('file .txt', 'file .txt'), # Internal space preserved (not trailing) ++ ('file ', 'file'), # Trailing spaces removed ++ ('file..', 'file'), # Trailing dots removed ++ ('file . ', 'file'), # Trailing space and dot removed ++ ] ++ ++ for input_name, expected in test_cases: ++ result = internetarchive.utils.sanitize_filename_windows(input_name) ++ assert result == expected ++ ++ ++def test_sanitize_filename_posix(): ++ # Test without colon encoding ++ result = internetarchive.utils.sanitize_filename_posix('file/name.txt', False) ++ assert result == 'file%2Fname.txt' ++ ++ # Test with colon encoding ++ result = internetarchive.utils.sanitize_filename_posix('file:name.txt', True) ++ assert result == 'file%3Aname.txt' ++ ++ # Test mixed encoding ++ result = internetarchive.utils.sanitize_filename_posix('file/:name.txt', True) ++ assert result == 'file%2F%3Aname.txt' ++ ++ ++def test_unsanitize_filename(): ++ test_cases = [ ++ ('file%3Aname.txt', 'file:name.txt'), ++ ('file%2Fname.txt', 'file/name.txt'), ++ ('file%25name.txt', 'file%name.txt'), # Percent sign ++ ('normal.txt', 'normal.txt'), # No encoding ++ ] ++ ++ for input_name, expected in test_cases: ++ with warnings.catch_warnings(record=True) as w: ++ result = internetarchive.utils.unsanitize_filename(input_name) ++ assert result == expected ++ if '%' in input_name: ++ assert len(w) == 1 ++ assert issubclass(w[0].category, UserWarning) ++ ++ ++def test_sanitize_filename(): ++ # Test Windows path ++ with patch('internetarchive.utils.is_windows', return_value=True): ++ with warnings.catch_warnings(record=True) as w: ++ result = internetarchive.utils.sanitize_filename('file:name.txt') ++ assert result == 'file%3Aname.txt' ++ assert len(w) == 1 ++ assert "sanitized" in str(w[0].message) ++ ++ # Test POSIX path ++ with patch('internetarchive.utils.is_windows', return_value=False): ++ result = internetarchive.utils.sanitize_filename('file/name.txt', False) ++ assert result == 'file%2Fname.txt' ++ ++ ++def test_sanitize_filepath(): ++ # Test with colon encoding ++ result = internetarchive.utils.sanitize_filepath('/path/to/file:name.txt', True) ++ assert result == '/path/to/file%3Aname.txt' ++ ++ # Test without colon encoding ++ result = internetarchive.utils.sanitize_filepath('/path/to/file:name.txt', False) ++ assert result == '/path/to/file:name.txt' # Colon not encoded on POSIX by default ++ ++ # Test Windows path (mocked) ++ with patch('internetarchive.utils.is_windows', return_value=True): ++ result = internetarchive.utils.sanitize_filepath('/path/to/con.txt') ++ assert result == '/path/to/con.txt' # Reserved name sanitized +-- +2.47.2 + diff -Nru python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0006-Added-tests-for-file-sanitization.patch python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0006-Added-tests-for-file-sanitization.patch --- python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0006-Added-tests-for-file-sanitization.patch 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/CVE-2025-58438/0006-Added-tests-for-file-sanitization.patch 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,86 @@ +From ccf95b00e380cc97f5fd51d1623eec1de8e6a220 Mon Sep 17 00:00:00 2001 +From: jake +Date: Thu, 4 Sep 2025 12:16:57 -0700 +Subject: [PATCH 6/6] Added tests for file sanitization + +--- + tests/test_files.py | 42 ++++++++++++++++++++++++++++++++++++++++++ + tests/test_item.py | 13 +++++++++++++ + 2 files changed, 55 insertions(+) + create mode 100644 tests/test_files.py + +diff --git a/tests/test_files.py b/tests/test_files.py +new file mode 100644 +index 0000000..23b6eef +--- /dev/null ++++ b/tests/test_files.py +@@ -0,0 +1,42 @@ ++import os ++import re ++from unittest.mock import patch ++ ++import pytest ++import responses ++ ++from tests.conftest import PROTOCOL, IaRequestsMock ++ ++DOWNLOAD_URL_RE = re.compile(f'{PROTOCOL}//archive.org/download/.*') ++EXPECTED_LAST_MOD_HEADER = {"Last-Modified": "Tue, 14 Nov 2023 20:25:48 GMT"} ++ ++ ++def test_file_download_sanitizes_filename(tmpdir, nasa_item): ++ tmpdir.chdir() ++ ++ # Mock is_windows to return True to test Windows-style sanitization ++ with patch('internetarchive.utils.is_windows', return_value=True): ++ with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: ++ rsps.add(responses.GET, DOWNLOAD_URL_RE, ++ body='test content', ++ adding_headers=EXPECTED_LAST_MOD_HEADER) ++ # Test filename with Windows-invalid characters ++ file_obj = nasa_item.get_file('nasa_meta.xml') ++ problematic_name = 'file:withchars.xml' ++ file_obj.download(file_path=problematic_name, destdir=str(tmpdir)) ++ ++ # Should create sanitized filename with percent encoding ++ expected_name = 'file%3Awith%3Cillegal%3Echars.xml' ++ expected_path = os.path.join(str(tmpdir), expected_name) ++ assert os.path.exists(expected_path) ++ ++ ++def test_file_download_prevents_directory_traversal(tmpdir, nasa_item): ++ tmpdir.chdir() ++ # Don't mock the request since it won't be made due to the security check ++ with IaRequestsMock(assert_all_requests_are_fired=False): ++ # Test directory traversal attempt by getting the file and calling download directly ++ file_obj = nasa_item.get_file('nasa_meta.xml') ++ malicious_path = os.path.join('..', 'nasa_meta.xml') ++ with pytest.raises(ValueError, match="outside target directory"): ++ file_obj.download(file_path=malicious_path, destdir=str(tmpdir)) +diff --git a/tests/test_item.py b/tests/test_item.py +index 939bfdd..ddb076a 100644 +--- a/tests/test_item.py ++++ b/tests/test_item.py +@@ -234,6 +234,19 @@ def test_download_checksum(tmpdir, caplog): + assert 'nasa_meta.xml, file already exists based on checksum.' in caplog.text + + ++def test_download_destdir(tmpdir, nasa_item): ++ tmpdir.chdir() ++ with IaRequestsMock() as rsps: ++ rsps.add(responses.GET, DOWNLOAD_URL_RE, ++ body='new destdir', ++ adding_headers=EXPECTED_LAST_MOD_HEADER) ++ dest = os.path.join(str(tmpdir), 'new destdir') ++ nasa_item.download(files='nasa_meta.xml', destdir=dest) ++ assert 'nasa' in os.listdir(dest) ++ with open(os.path.join(dest, 'nasa/nasa_meta.xml')) as fh: ++ assert fh.read() == 'new destdir' ++ ++ + def test_download_no_directory(tmpdir, nasa_item): + url_re = re.compile(f'{PROTOCOL}//archive.org/download/.*') + tmpdir.chdir() +-- +2.47.2 + diff -Nru python-internetarchive-5.4.0/debian/patches/series python-internetarchive-5.4.0/debian/patches/series --- python-internetarchive-5.4.0/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 +++ python-internetarchive-5.4.0/debian/patches/series 2025-09-09 17:26:36.000000000 +0000 @@ -0,0 +1,6 @@ +CVE-2025-58438/0001-Add-robust-cross-platform-filename-sanitization-and-.patch +CVE-2025-58438/0002-Encode-in-sanitize_filename_windows-to-ensure-the-en.patch +CVE-2025-58438/0003-Added-directory-traversal-attack-check-to-download.patch +CVE-2025-58438/0004-fixed-typo.patch +CVE-2025-58438/0005-Added-tests-for-file-sanitization.patch +CVE-2025-58438/0006-Added-tests-for-file-sanitization.patch