Version in base suite: 2.5.0-1+deb12u1
Base version: expat_2.5.0-1+deb12u1
Target version: expat_2.5.0-1+deb12u2
Base file: /srv/ftp-master.debian.org/ftp/pool/main/e/expat/expat_2.5.0-1+deb12u1.dsc
Target file: /srv/ftp-master.debian.org/policy/pool/main/e/expat/expat_2.5.0-1+deb12u2.dsc
changelog | 16
libexpat1.symbols | 2
patches/expat-2.5.0-CVE-2023-52425.patch | 1466 +++++++++++++++++++++++++++++
patches/expat-2.5.0-CVE-2024-50602.patch | 108 ++
patches/expat-2.5.0-CVE-2024-8176.patch | 1535 +++++++++++++++++++++++++++++++
patches/series | 3
6 files changed, 3130 insertions(+)
diff -Nru expat-2.5.0/debian/changelog expat-2.5.0/debian/changelog
--- expat-2.5.0/debian/changelog 2024-09-08 06:44:19.000000000 +0000
+++ expat-2.5.0/debian/changelog 2025-04-05 05:36:55.000000000 +0000
@@ -1,3 +1,19 @@
+expat (2.5.0-1+deb12u2) bookworm; urgency=medium
+
+ [ Tomas Korbar
The functions in this section configure the built-in +- protection against various forms of +- billion laughs attacks.
+++@@ -2188,6 +2185,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p, + + + ++XML_SetReparseDeferralEnabled
++++/* Added in Expat 2.6.0. */ ++XML_Bool XMLCALL ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); ++++++++ +++ Large tokens may require many parse calls before enough data is available for Expat to parse it in full. ++ If Expat retried parsing the token on every parse call, parsing could take quadratic time. ++ To avoid this, Expat only retries once a significant amount of new data is available. ++ This function allows disabling this behavior. ++
++++ The
++enabled
argument should beXML_TRUE
orXML_FALSE
. ++++ Returns
++XML_TRUE
on success, andXML_FALSE
on error. ++Miscellaneous functions
+ +The functions in this section either obtain state information from +diff --git a/expat/doc/xmlwf.xml b/expat/doc/xmlwf.xml +index 9603abf..3d35393 100644 +--- a/expat/doc/xmlwf.xml ++++ b/expat/doc/xmlwf.xml +@@ -313,6 +313,16 @@ supports both. + + + ++
++ ++ +++ ++ ++++ Disable reparse deferral, and allow quadratic parse runtime ++ on large tokens (default: reparse deferral enabled). ++ +++ + +diff --git a/expat/lib/expat.h b/expat/lib/expat.h +index 1c83563..842dd70 100644 +--- a/expat/lib/expat.h ++++ b/expat/lib/expat.h +@@ -16,6 +16,7 @@ + Copyright (c) 2016 Thomas Beutlich + Copyright (c) 2017 Rhodri James + Copyright (c) 2022 Thijs Schreijer ++ Copyright (c) 2023 Sony Corporation / Snild Dolkow + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining +@@ -1050,6 +1051,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); + #endif + ++/* Added in Expat 2.6.0. */ ++XMLPARSEAPI(XML_Bool) ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); ++ + /* Expat follows the semantic versioning convention. + See http://semver.org. + */ +diff --git a/expat/lib/internal.h b/expat/lib/internal.h +index e09f533..e2709c8 100644 +--- a/expat/lib/internal.h ++++ b/expat/lib/internal.h +@@ -31,6 +31,7 @@ + Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2018 Yury Gribov + Copyright (c) 2019 David Loffredo ++ Copyright (c) 2023 Sony Corporation / Snild Dolkow + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining +@@ -160,6 +161,9 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); + const char *unsignedCharToPrintable(unsigned char c); + #endif + ++extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c ++extern unsigned int g_parseAttempts; // used for testing only ++ + #ifdef __cplusplus + } + #endif +diff --git a/expat/lib/libexpat.def.cmake b/expat/lib/libexpat.def.cmake +index cf434a2..3ff4d55 100644 +--- a/expat/lib/libexpat.def.cmake ++++ b/expat/lib/libexpat.def.cmake +@@ -77,3 +77,4 @@ EXPORTS + ; added with version 2.4.0 + @_EXPAT_COMMENT_DTD@ XML_SetBillionLaughsAttackProtectionActivationThreshold @69 + @_EXPAT_COMMENT_DTD@ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 ++XML_SetReparseDeferralEnabled @71 +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index b6c2eca..2ae64e9 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -73,6 +73,7 @@ + # endif + #endif + ++#include + #include + #include /* memset(), memcpy() */ + #include +@@ -196,6 +197,8 @@ typedef char ICHAR; + /* Do safe (NULL-aware) pointer arithmetic */ + #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) + ++#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) ++ + #include "internal.h" + #include "xmltok.h" + #include "xmlrole.h" +@@ -602,6 +605,9 @@ static unsigned long getDebugLevel(const char *variableName, + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) + ++XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c ++unsigned int g_parseAttempts = 0; // used for testing only ++ + struct XML_ParserStruct { + /* The first member must be m_userData so that the XML_GetUserData + macro works. */ +@@ -617,6 +623,9 @@ struct XML_ParserStruct { + const char *m_bufferLim; + XML_Index m_parseEndByteIndex; + const char *m_parseEndPtr; ++ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ ++ XML_Bool m_reparseDeferralEnabled; ++ int m_lastBufferRequestSize; + XML_Char *m_dataBuf; + XML_Char *m_dataBufEnd; + XML_StartElementHandler m_startElementHandler; +@@ -948,6 +957,47 @@ get_hash_secret_salt(XML_Parser parser) { + return parser->m_hash_secret_salt; + } + ++static enum XML_Error ++callProcessor(XML_Parser parser, const char *start, const char *end, ++ const char **endPtr) { ++ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); ++ ++ if (parser->m_reparseDeferralEnabled ++ && ! parser->m_parsingStatus.finalBuffer) { ++ // Heuristic: don't try to parse a partial token again until the amount of ++ // available data has increased significantly. ++ const size_t had_before = parser->m_partialTokenBytesBefore; ++ // ...but *do* try anyway if we're close to causing a reallocation. ++ size_t available_buffer ++ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); ++#if XML_CONTEXT_BYTES > 0 ++ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); ++#endif ++ available_buffer ++ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); ++ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok ++ const bool enough ++ = (have_now >= 2 * had_before) ++ || ((size_t)parser->m_lastBufferRequestSize > available_buffer); ++ ++ if (! enough) { ++ *endPtr = start; // callers may expect this to be set ++ return XML_ERROR_NONE; ++ } ++ } ++ g_parseAttempts += 1; ++ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); ++ if (ret == XML_ERROR_NONE) { ++ // if we consumed nothing, remember what we had on this parse attempt. ++ if (*endPtr == start) { ++ parser->m_partialTokenBytesBefore = have_now; ++ } else { ++ parser->m_partialTokenBytesBefore = 0; ++ } ++ } ++ return ret; ++} ++ + static XML_Bool /* only valid for root parser */ + startParsing(XML_Parser parser) { + /* hash functions must be initialized before setContext() is called */ +@@ -1129,6 +1179,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; ++ parser->m_partialTokenBytesBefore = 0; ++ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; ++ parser->m_lastBufferRequestSize = 0; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; +@@ -1298,6 +1351,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt; ++ XML_Bool oldReparseDeferralEnabled; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) +@@ -1342,6 +1396,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + to worry which hash secrets each table has. + */ + oldhash_secret_salt = parser->m_hash_secret_salt; ++ oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; + + #ifdef XML_DTD + if (! context) +@@ -1394,6 +1449,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; + parser->m_ns_triplets = oldns_triplets; + parser->m_hash_secret_salt = oldhash_secret_salt; ++ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; + parser->m_parentParser = oldParser; + #ifdef XML_DTD + parser->m_paramEntityParsing = oldParamEntityParsing; +@@ -1848,55 +1904,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + parser->m_parsingStatus.parsing = XML_PARSING; + } + +- if (len == 0) { +- parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; +- if (! isFinal) +- return XML_STATUS_OK; +- parser->m_positionPtr = parser->m_bufferPtr; +- parser->m_parseEndPtr = parser->m_bufferEnd; +- +- /* If data are left over from last buffer, and we now know that these +- data are the final chunk of input, then we have to check them again +- to detect errors based on that fact. +- */ +- parser->m_errorCode +- = parser->m_processor(parser, parser->m_bufferPtr, +- parser->m_parseEndPtr, &parser->m_bufferPtr); +- +- if (parser->m_errorCode == XML_ERROR_NONE) { +- switch (parser->m_parsingStatus.parsing) { +- case XML_SUSPENDED: +- /* It is hard to be certain, but it seems that this case +- * cannot occur. This code is cleaning up a previous parse +- * with no new data (since len == 0). Changing the parsing +- * state requires getting to execute a handler function, and +- * there doesn't seem to be an opportunity for that while in +- * this circumstance. +- * +- * Given the uncertainty, we retain the code but exclude it +- * from coverage tests. +- * +- * LCOV_EXCL_START +- */ +- XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, +- parser->m_bufferPtr, &parser->m_position); +- parser->m_positionPtr = parser->m_bufferPtr; +- return XML_STATUS_SUSPENDED; +- /* LCOV_EXCL_STOP */ +- case XML_INITIALIZED: +- case XML_PARSING: +- parser->m_parsingStatus.parsing = XML_FINISHED; +- /* fall through */ +- default: +- return XML_STATUS_OK; +- } +- } +- parser->m_eventEndPtr = parser->m_eventPtr; +- parser->m_processor = errorProcessor; +- return XML_STATUS_ERROR; +- } + #ifndef XML_CONTEXT_BYTES +- else if (parser->m_bufferPtr == parser->m_bufferEnd) { ++ if (parser->m_bufferPtr == parser->m_bufferEnd) { + const char *end; + int nLeftOver; + enum XML_Status result; +@@ -1907,12 +1916,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; + } ++ // though this isn't a buffer request, we assume that `len` is the app's ++ // preferred buffer fill size, and therefore save it here. ++ parser->m_lastBufferRequestSize = len; + parser->m_parseEndByteIndex += len; + parser->m_positionPtr = s; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + + parser->m_errorCode +- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); ++ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -1939,23 +1951,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + &parser->m_position); + nLeftOver = s + len - end; + if (nLeftOver) { +- if (parser->m_buffer == NULL +- || nLeftOver > parser->m_bufferLim - parser->m_buffer) { +- /* avoid _signed_ integer overflow */ +- char *temp = NULL; +- const int bytesToAllocate = (int)((unsigned)len * 2U); +- if (bytesToAllocate > 0) { +- temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); +- } +- if (temp == NULL) { +- parser->m_errorCode = XML_ERROR_NO_MEMORY; +- parser->m_eventPtr = parser->m_eventEndPtr = NULL; +- parser->m_processor = errorProcessor; +- return XML_STATUS_ERROR; +- } +- parser->m_buffer = temp; +- parser->m_bufferLim = parser->m_buffer + bytesToAllocate; ++ // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED ++ // (and XML_ERROR_FINISHED) from XML_GetBuffer. ++ const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; ++ parser->m_parsingStatus.parsing = XML_PARSING; ++ void *const temp = XML_GetBuffer(parser, nLeftOver); ++ parser->m_parsingStatus.parsing = originalStatus; ++ // GetBuffer may have overwritten this, but we want to remember what the ++ // app requested, not how many bytes were left over after parsing. ++ parser->m_lastBufferRequestSize = len; ++ if (temp == NULL) { ++ // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). ++ parser->m_eventPtr = parser->m_eventEndPtr = NULL; ++ parser->m_processor = errorProcessor; ++ return XML_STATUS_ERROR; + } ++ // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we ++ // don't have any data to preserve, and can copy straight into the start ++ // of the buffer rather than the GetBuffer return pointer (which may be ++ // pointing further into the allocated buffer). + memcpy(parser->m_buffer, end, nLeftOver); + } + parser->m_bufferPtr = parser->m_buffer; +@@ -1967,15 +1981,14 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + return result; + } + #endif /* not defined XML_CONTEXT_BYTES */ +- else { +- void *buff = XML_GetBuffer(parser, len); +- if (buff == NULL) +- return XML_STATUS_ERROR; +- else { +- memcpy(buff, s, len); +- return XML_ParseBuffer(parser, len, isFinal); +- } ++ void *buff = XML_GetBuffer(parser, len); ++ if (buff == NULL) ++ return XML_STATUS_ERROR; ++ if (len > 0) { ++ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above ++ memcpy(buff, s, len); + } ++ return XML_ParseBuffer(parser, len, isFinal); + } + + enum XML_Status XMLCALL +@@ -2021,8 +2034,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + +- parser->m_errorCode = parser->m_processor( +- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); ++ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, ++ &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -2067,10 +2080,14 @@ XML_GetBuffer(XML_Parser parser, int len) { + default:; + } + +- if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { +-#ifdef XML_CONTEXT_BYTES ++ // whether or not the request succeeds, `len` seems to be the app's preferred ++ // buffer fill size; remember it. ++ parser->m_lastBufferRequestSize = len; ++ if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) ++ || parser->m_buffer == NULL) { ++#if XML_CONTEXT_BYTES > 0 + int keep; +-#endif /* defined XML_CONTEXT_BYTES */ ++#endif /* XML_CONTEXT_BYTES > 0 */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int)((unsigned)len + + (unsigned)EXPAT_SAFE_PTR_DIFF( +@@ -2079,7 +2096,7 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +-#ifdef XML_CONTEXT_BYTES ++#if XML_CONTEXT_BYTES > 0 + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; +@@ -2089,10 +2106,11 @@ XML_GetBuffer(XML_Parser parser, int len) { + return NULL; + } + neededSize += keep; +-#endif /* defined XML_CONTEXT_BYTES */ +- if (neededSize +- <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { +-#ifdef XML_CONTEXT_BYTES ++#endif /* XML_CONTEXT_BYTES > 0 */ ++ if (parser->m_buffer && parser->m_bufferPtr ++ && neededSize ++ <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { ++#if XML_CONTEXT_BYTES > 0 + if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { + int offset + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) +@@ -2105,19 +2123,17 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_bufferPtr -= offset; + } + #else +- if (parser->m_buffer && parser->m_bufferPtr) { +- memmove(parser->m_buffer, parser->m_bufferPtr, +- EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); +- parser->m_bufferEnd +- = parser->m_buffer +- + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); +- parser->m_bufferPtr = parser->m_buffer; +- } +-#endif /* not defined XML_CONTEXT_BYTES */ ++ memmove(parser->m_buffer, parser->m_bufferPtr, ++ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); ++ parser->m_bufferEnd ++ = parser->m_buffer ++ + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); ++ parser->m_bufferPtr = parser->m_buffer; ++#endif /* XML_CONTEXT_BYTES > 0 */ + } else { + char *newBuf; + int bufferSize +- = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); ++ = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); + if (bufferSize == 0) + bufferSize = INIT_BUFFER_SIZE; + do { +@@ -2134,7 +2150,7 @@ XML_GetBuffer(XML_Parser parser, int len) { + return NULL; + } + parser->m_bufferLim = newBuf + bufferSize; +-#ifdef XML_CONTEXT_BYTES ++#if XML_CONTEXT_BYTES > 0 + if (parser->m_bufferPtr) { + memcpy(newBuf, &parser->m_bufferPtr[-keep], + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) +@@ -2164,7 +2180,7 @@ XML_GetBuffer(XML_Parser parser, int len) { + parser->m_bufferEnd = newBuf; + } + parser->m_bufferPtr = parser->m_buffer = newBuf; +-#endif /* not defined XML_CONTEXT_BYTES */ ++#endif /* XML_CONTEXT_BYTES > 0 */ + } + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; +@@ -2214,7 +2230,7 @@ XML_ResumeParser(XML_Parser parser) { + } + parser->m_parsingStatus.parsing = XML_PARSING; + +- parser->m_errorCode = parser->m_processor( ++ parser->m_errorCode = callProcessor( + parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { +@@ -2567,6 +2583,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + } + #endif /* XML_DTD */ + ++XML_Bool XMLCALL ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { ++ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { ++ parser->m_reparseDeferralEnabled = enabled; ++ return XML_TRUE; ++ } ++ return XML_FALSE; ++} ++ + /* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more +@@ -4488,15 +4513,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + parser->m_processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } +- /* If we are at the end of the buffer, this would cause XmlPrologTok to +- return XML_TOK_NONE on the next call, which would then cause the +- function to exit with *nextPtr set to s - that is what we want for other +- tokens, but not for the BOM - we would rather like to skip it; +- then, when this routine is entered the next time, XmlPrologTok will +- return XML_TOK_INVALID, since the BOM is still in the buffer ++ /* XmlPrologTok has now set the encoding based on the BOM it found, and we ++ must move s and nextPtr forward to consume the BOM. ++ ++ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we ++ would leave the BOM in the buffer and return. On the next call to this ++ function, our XmlPrologTok call would return XML_TOK_INVALID, since it ++ is not valid to have multiple BOMs. + */ +- else if (tok == XML_TOK_BOM && next == end +- && ! parser->m_parsingStatus.finalBuffer) { ++ else if (tok == XML_TOK_BOM) { + # ifdef XML_DTD + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { +@@ -4506,7 +4531,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + # endif + + *nextPtr = next; +- return XML_ERROR_NONE; ++ s = next; + } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with +diff --git a/expat/tests/minicheck.c b/expat/tests/minicheck.c +index 1c65748..f383380 100644 +--- a/expat/tests/minicheck.c ++++ b/expat/tests/minicheck.c +@@ -208,6 +208,21 @@ srunner_run_all(SRunner *runner, int verbosity) { + } + } + ++void ++_fail(const char *file, int line, const char *msg) { ++ /* Always print the error message so it isn't lost. In this case, ++ we have a failure, so there's no reason to be quiet about what ++ it is. ++ */ ++ _check_current_filename = file; ++ _check_current_lineno = line; ++ if (msg != NULL) { ++ const int has_newline = (msg[strlen(msg) - 1] == '\n'); ++ fprintf(stderr, "ERROR: %s%s", msg, has_newline ? "" : "\n"); ++ } ++ longjmp(env, 1); ++} ++ + void + _fail_unless(int condition, const char *file, int line, const char *msg) { + /* Always print the error message so it isn't lost. In this case, +diff --git a/expat/tests/minicheck.h b/expat/tests/minicheck.h +index cc1f835..032b54e 100644 +--- a/expat/tests/minicheck.h ++++ b/expat/tests/minicheck.h +@@ -64,7 +64,14 @@ extern "C" { + } \ + } + +-#define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg) ++ ++# define fail(msg) _fail(__FILE__, __LINE__, msg) ++# define assert_true(cond) \ ++ do { \ ++ if (! (cond)) { \ ++ _fail(__FILE__, __LINE__, "check failed: " #cond); \ ++ } \ ++ } while (0) + + typedef void (*tcase_setup_function)(void); + typedef void (*tcase_teardown_function)(void); +@@ -103,6 +110,11 @@ void _check_set_test_info(char const *function, char const *filename, + * Prototypes for the actual implementation. + */ + ++# if defined(__GNUC__) ++__attribute__((noreturn)) ++# endif ++void ++_fail(const char *file, int line, const char *msg); + void _fail_unless(int condition, const char *file, int line, const char *msg); + Suite *suite_create(const char *name); + TCase *tcase_create(const char *name); +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index 915fa52..941f61d 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -54,6 +54,7 @@ + #include + #include + #include /* intptr_t uint64_t */ ++#include + + #if ! defined(__cplusplus) + # include +@@ -1071,7 +1072,7 @@ START_TEST(test_column_number_after_parse) { + const char *text = " "; + XML_Size colno; + +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + colno = XML_GetCurrentColumnNumber(g_parser); +@@ -2582,7 +2583,7 @@ START_TEST(test_default_current) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD")); ++ CharData_CheckXMLChars(&storage, XCS("DCDCDCDD")); + + /* Again, without the defaulting */ + XML_ParserReset(g_parser, NULL); +@@ -2593,7 +2594,7 @@ START_TEST(test_default_current) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DcccccD")); ++ CharData_CheckXMLChars(&storage, XCS("DcccD")); + + /* Now with an internal entity to complicate matters */ + XML_ParserReset(g_parser, NULL); +@@ -3946,6 +3947,19 @@ START_TEST(test_get_buffer_3_overflow) { + END_TEST + #endif // defined(XML_CONTEXT_BYTES) + ++START_TEST(test_getbuffer_allocates_on_zero_len) { ++ for (int first_len = 1; first_len >= 0; first_len--) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_GetBuffer(parser, first_len) != NULL); ++ assert_true(XML_GetBuffer(parser, 0) != NULL); ++ if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) ++ xml_failure(parser); ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ + /* Test position information macros */ + START_TEST(test_byte_info_at_end) { + const char *text = " "; +@@ -6205,6 +6219,12 @@ START_TEST(test_utf8_in_start_tags) { + char doc[1024]; + size_t failCount = 0; + ++ // we need all the bytes to be parsed, but we don't want the errors that can ++ // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. ++ if (g_reparseDeferralEnabledDefault) { ++ return; ++ } ++ + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { +@@ -6830,6 +6850,613 @@ START_TEST(test_nested_entity_suspend) { + } + END_TEST + ++/* Regression test for quadratic parsing on large tokens */ ++START_TEST(test_big_tokens_take_linear_time) { ++ const char *const too_slow_failure_message ++ = "Compared to the baseline runtime of the first test, this test has a " ++ "slowdown of more than . " ++ "Please keep increasing the value by 1 until it reliably passes the " ++ "test on your hardware and open a bug sharing that number with us. " ++ "Thanks in advance!"; ++ const struct { ++ const char *pre; ++ const char *post; ++ } text[] = { ++ {"", ""}, // assumed good, used as baseline ++ {""}, // CDATA, performed OK before patch ++ {" "}, // big attribute, used to be O(N²) ++ {" "}, // long comment, used to be O(N²) ++ {" <", "/> "}, // big elem name, used to be O(N²) ++ }; ++ const int num_cases = sizeof(text) / sizeof(text[0]); ++ // For the test we need avalue that is: ++ // (1) big enough that the test passes reliably (avoiding flaky tests), and ++ // (2) small enough that the test actually catches regressions. ++ const int max_slowdown = 15; ++ char aaaaaa[4096]; ++ const int fillsize = (int)sizeof(aaaaaa); ++ const int fillcount = 100; ++ ++ memset(aaaaaa, 'a', fillsize); ++ ++ if (! g_reparseDeferralEnabledDefault) { ++ return; // heuristic is disabled; we would get O(n^2) and fail. ++ } ++#if defined(_WIN32) ++ if (CLOCKS_PER_SEC < 100000) { ++ // Skip this test if clock() doesn't have reasonably good resolution. ++ // This workaround is only applied to Windows targets, since XSI requires ++ // the value to be 1 000 000 (10x the condition here), and we want to be ++ // very sure that at least one platform in CI can catch regressions. ++ return; ++ } ++#endif ++ ++ clock_t baseline = 0; ++ for (int i = 0; i < num_cases; ++i) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ enum XML_Status status; ++ const clock_t start = clock(); ++ ++ // parse the start text ++ status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, ++ (int)strlen(text[i].pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ // parse lots of 'a', failing the test early if it takes too long ++ for (int f = 0; f < fillcount; ++f) { ++ status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ // i == 0 means we're still calculating the baseline value ++ if (i > 0) { ++ const clock_t now = clock(); ++ const clock_t clocks_so_far = now - start; ++ const int slowdown = clocks_so_far / baseline; ++ if (slowdown >= max_slowdown) { ++ fprintf( ++ stderr, ++ "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n", ++ f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown); ++ fail(too_slow_failure_message); ++ } ++ } ++ } ++ // parse the end text ++ status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, ++ (int)strlen(text[i].post), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ // how long did it take in total? ++ const clock_t end = clock(); ++ const clock_t taken = end - start; ++ if (i == 0) { ++ assert_true(taken > 0); // just to make sure we don't div-by-0 later ++ baseline = taken; ++ } ++ const int slowdown = taken / baseline; ++ if (slowdown >= max_slowdown) { ++ fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n", ++ (int)taken, (int)baseline, slowdown, max_slowdown); ++ fail(too_slow_failure_message); ++ } ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral) { ++ const char *const pre = " "; ++ const char *const start = " "; ++ char eeeeee[100]; ++ const int fillsize = (int)sizeof(eeeeee); ++ memset(eeeeee, 'e', fillsize); ++ ++ for (int enabled = 0; enabled <= 1; enabled += 1) { ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); ++ // pre-grow the buffer to avoid reparsing due to almost-fullness ++ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ enum XML_Status status; ++ // parse the start text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done ++ ++ // ..and the start of the token ++ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one ++ ++ // try to parse lots of 'e', but the token isn't finished ++ for (int c = 0; c < 100; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one ++ ++ // end the token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ if (enabled) { ++ // In general, we may need to push more data to trigger a reparse attempt, ++ // but in this test, the data is constructed to always require it. ++ CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect ++ // 2x the token length should suffice; the +1 covers the start and end. ++ for (int c = 0; c < 101; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ } ++ CharData_CheckXMLChars(&storage, XCS("dx")); // the should be done ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++struct element_decl_data { ++ XML_Parser parser; ++ int count; ++}; ++ ++static void ++element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { ++ UNUSED_P(name); ++ struct element_decl_data *testdata = (struct element_decl_data *)userData; ++ testdata->count += 1; ++ XML_FreeContentModel(testdata->parser, model); ++} ++ ++static int ++external_inherited_parser(XML_Parser p, const XML_Char *context, ++ const XML_Char *base, const XML_Char *systemId, ++ const XML_Char *publicId) { ++ UNUSED_P(base); ++ UNUSED_P(systemId); ++ UNUSED_P(publicId); ++ const char *const pre = "\n"; ++ const char *const start = "\n"; ++ const char *const post = "\n"; ++ const int enabled = *(int *)XML_GetUserData(p); ++ char eeeeee[100]; ++ char spaces[100]; ++ const int fillsize = (int)sizeof(eeeeee); ++ assert_true(fillsize == (int)sizeof(spaces)); ++ memset(eeeeee, 'e', fillsize); ++ memset(spaces, ' ', fillsize); ++ ++ XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); ++ assert_true(parser != NULL); ++ // pre-grow the buffer to avoid reparsing due to almost-fullness ++ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); ++ ++ struct element_decl_data testdata; ++ testdata.parser = parser; ++ testdata.count = 0; ++ XML_SetUserData(parser, &testdata); ++ XML_SetElementDeclHandler(parser, element_decl_counter); ++ ++ enum XML_Status status; ++ // parse the initial text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 1); // first element should be done ++ ++ // ..and the start of the big token ++ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 1); // still just the first one ++ ++ // try to parse lots of 'e', but the token isn't finished ++ for (int c = 0; c < 100; ++c) { ++ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ assert_true(testdata.count == 1); // *still* just the first one ++ ++ // end the big token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ ++ if (enabled) { ++ // In general, we may need to push more data to trigger a reparse attempt, ++ // but in this test, the data is constructed to always require it. ++ assert_true(testdata.count == 1); // or the test is incorrect ++ // 2x the token length should suffice; the +1 covers the start and end. ++ for (int c = 0; c < 101; ++c) { ++ status = XML_Parse(parser, spaces, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ } ++ } ++ assert_true(testdata.count == 2); // the big token should be done ++ ++ // parse the final text ++ status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done ++ ++ XML_ParserFree(parser); ++ return XML_STATUS_OK; ++} ++ ++START_TEST(test_reparse_deferral_is_inherited) { ++ const char *const text ++ = " "; ++ for (int enabled = 0; enabled <= 1; ++enabled) { ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ XML_SetUserData(parser, (void *)&enabled); ++ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); ++ // this handler creates a sub-parser and checks that its deferral behavior ++ // is what we expected, based on the value of `enabled` (in userdata). ++ XML_SetExternalEntityRefHandler(parser, external_inherited_parser); ++ assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); ++ if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) ++ xml_failure(parser); ++ ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral_on_null_parser) { ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) ++ == XML_FALSE); ++ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) ++ == XML_FALSE); ++} ++END_TEST ++ ++START_TEST(test_set_reparse_deferral_on_the_fly) { ++ const char *const pre = " "; ++ char iiiiii[100]; ++ const int fillsize = (int)sizeof(iiiiii); ++ memset(iiiiii, 'i', fillsize); ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ enum XML_Status status; ++ // parse the start text ++ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done ++ ++ // try to parse some 'i', but the token isn't finished ++ status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one ++ ++ // end the token. ++ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("d")); // not yet. ++ ++ // now change the heuristic setting and add *no* data ++ assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); ++ // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. ++ status = XML_Parse(parser, "", 0, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ CharData_CheckXMLChars(&storage, XCS("dx")); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_set_bad_reparse_option) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); ++ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); ++ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); ++ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++static size_t g_totalAlloc = 0; ++static size_t g_biggestAlloc = 0; ++ ++static void * ++counting_realloc(void *ptr, size_t size) { ++ g_totalAlloc += size; ++ if (size > g_biggestAlloc) { ++ g_biggestAlloc = size; ++ } ++ return realloc(ptr, size); ++} ++ ++static void * ++counting_malloc(size_t size) { ++ return counting_realloc(NULL, size); ++} ++ ++START_TEST(test_bypass_heuristic_when_close_to_bufsize) { ++ if (! g_reparseDeferralEnabledDefault) { ++ return; // this test is irrelevant when the deferral heuristic is disabled. ++ } ++ ++ const int document_length = 65536; ++ char *const document = (char *)malloc(document_length); ++ ++ const XML_Memory_Handling_Suite memfuncs = { ++ counting_malloc, ++ counting_realloc, ++ free, ++ }; ++ ++ const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; ++ const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; ++ const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; ++ ++ for (const int *leading = leading_list; *leading >= 0; leading++) { ++ for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { ++ for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { ++ // start by checking that the test looks reasonably valid ++ assert_true(*leading + *bigtoken <= document_length); ++ ++ // put 'x' everywhere; some will be overwritten by elements. ++ memset(document, 'x', document_length); ++ // maybe add an initial tag ++ if (*leading) { ++ assert_true(*leading >= 3); // or the test case is invalid ++ memcpy(document, "", 3); ++ } ++ // add the large token ++ document[*leading + 0] = '<'; ++ document[*leading + 1] = 'b'; ++ memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token ++ document[*leading + *bigtoken - 1] = '>'; ++ ++ // 1 for 'b', plus 1 or 0 depending on the presence of 'a' ++ const int expected_elem_total = 1 + (*leading ? 1 : 0); ++ ++ XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); ++ assert_true(parser != NULL); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ g_biggestAlloc = 0; ++ g_totalAlloc = 0; ++ int offset = 0; ++ // fill data until the big token is covered (but not necessarily parsed) ++ while (offset < *leading + *bigtoken) { ++ assert_true(offset + *fillsize <= document_length); ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ } ++ // Now, check that we've had a buffer allocation that could fit the ++ // context bytes and our big token. In order to detect a special case, ++ // we need to know how many bytes of our big token were included in the ++ // first push that contained _any_ bytes of the big token: ++ const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); ++ if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { ++ // Special case: we aren't saving any context, and the whole big token ++ // was covered by a single fill, so Expat may have parsed directly ++ // from our input pointer, without allocating an internal buffer. ++ } else if (*leading < XML_CONTEXT_BYTES) { ++ assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); ++ } else { ++ assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); ++ } ++ // fill data until the big token is actually parsed ++ while (storage.count < expected_elem_total) { ++ const size_t alloc_before = g_totalAlloc; ++ assert_true(offset + *fillsize <= document_length); ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ // since all the bytes of the big token are already in the buffer, ++ // the bufsize ceiling should make us finish its parsing without any ++ // further buffer allocations. We assume that there will be no other ++ // large allocations in this test. ++ assert_true(g_totalAlloc - alloc_before < 4096); ++ } ++ // test-the-test: was our alloc even called? ++ assert_true(g_totalAlloc > 0); ++ // test-the-test: there shouldn't be any extra start elements ++ assert_true(storage.count == expected_elem_total); ++ ++ XML_ParserFree(parser); ++ } ++ } ++ } ++ free(document); ++} ++END_TEST ++ ++START_TEST(test_varying_buffer_fills) { ++ const int KiB = 1024; ++ const int MiB = 1024 * KiB; ++ const int document_length = 16 * MiB; ++ const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB ++ ++ char *const document = (char *)malloc(document_length); ++ assert_true(document != NULL); ++ memset(document, 'x', document_length); ++ document[0] = '<'; ++ document[1] = 't'; ++ memset(&document[2], ' ', big - 2); // a very spacy token ++ document[big - 1] = '>'; ++ ++ // Each testcase is a list of buffer fill sizes, terminated by a value < 0. ++ // When reparse deferral is enabled, the final (negated) value is the expected ++ // maximum number of bytes scanned in parse attempts. ++ const int testcases[][30] = { ++ {8 * MiB, -8 * MiB}, ++ {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total ++ // zero-size fills shouldn't trigger the bypass ++ {4 * MiB, 0, 4 * MiB, -12 * MiB}, ++ {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, ++ {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, ++ // try to hit the buffer ceiling only once (at the end) ++ {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, ++ // try to hit the same buffer ceiling multiple times ++ {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, ++ ++ // try to hit every ceiling, by always landing 1K shy of the buffer size ++ {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, ++ 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, ++ ++ // try to avoid every ceiling, by always landing 1B past the buffer size ++ // the normal 2x heuristic threshold still forces parse attempts. ++ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 ++ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 ++ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 ++ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 ++ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 ++ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 ++ 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 ++ -(10 * MiB + 682 * KiB + 7)}, ++ // try to avoid every ceiling again, except on our last fill. ++ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 ++ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 ++ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 ++ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 ++ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 ++ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 ++ 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 ++ -(10 * MiB + 682 * KiB + 6)}, ++ ++ // try to hit ceilings on the way multiple times ++ {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer ++ 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer ++ 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer ++ 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer ++ // we'll make a parse attempt at every parse call ++ -(45 * MiB + 12)}, ++ }; ++ const int testcount = sizeof(testcases) / sizeof(testcases[0]); ++ for (int test_i = 0; test_i < testcount; test_i++) { ++ const int *fillsize = testcases[test_i]; ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ g_parseAttempts = 0; ++ ++ CharData storage; ++ CharData_Init(&storage); ++ XML_SetUserData(parser, &storage); ++ XML_SetStartElementHandler(parser, start_element_event_handler); ++ ++ int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) ++ int scanned_bytes = 0; // sum of (buffered bytes at each actual parse) ++ int offset = 0; ++ while (*fillsize >= 0) { ++ assert_true(offset + *fillsize <= document_length); // or test is invalid ++ const unsigned attempts_before = g_parseAttempts; ++ const enum XML_Status status ++ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); ++ if (status != XML_STATUS_OK) { ++ xml_failure(parser); ++ } ++ offset += *fillsize; ++ fillsize++; ++ assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow ++ worstcase_bytes += offset; // we might've tried to parse all pending bytes ++ if (g_parseAttempts != attempts_before) { ++ assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse ++ assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow ++ scanned_bytes += offset; // we *did* try to parse all pending bytes ++ } ++ } ++ assert_true(storage.count == 1); // the big token should've been parsed ++ assert_true(scanned_bytes > 0); // test-the-test: does our counter work? ++ if (g_reparseDeferralEnabledDefault) { ++ // heuristic is enabled; some XML_Parse calls may have deferred reparsing ++ const int max_bytes_scanned = -*fillsize; ++ if (scanned_bytes > max_bytes_scanned) { ++ fprintf(stderr, ++ "bytes scanned in parse attempts: actual=%d limit=%d \n", ++ scanned_bytes, max_bytes_scanned); ++ fail("too many bytes scanned in parse attempts"); ++ } ++ assert_true(scanned_bytes <= worstcase_bytes); ++ } else { ++ // heuristic is disabled; every XML_Parse() will have reparsed ++ assert_true(scanned_bytes == worstcase_bytes); ++ } ++ ++ XML_ParserFree(parser); ++ } ++ free(document); ++} ++END_TEST ++ ++ + /* + * Namespaces tests. + */ +@@ -6902,13 +7529,13 @@ START_TEST(test_return_ns_triplet) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- if (! triplet_start_flag) +- fail("triplet_start_checker not invoked"); + /* Check that unsetting "return triplets" fails while still parsing */ + XML_SetReturnNSTriplet(g_parser, XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); ++ if (! triplet_start_flag) ++ fail("triplet_start_checker not invoked"); + if (! triplet_end_flag) + fail("triplet_end_checker not invoked"); + if (dummy_handler_flags +@@ -12219,6 +12846,7 @@ make_suite(void) { + #if defined(XML_CONTEXT_BYTES) + tcase_add_test(tc_basic, test_get_buffer_3_overflow); + #endif ++ tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); + tcase_add_test(tc_basic, test_byte_info_at_end); + tcase_add_test(tc_basic, test_byte_info_at_error); + tcase_add_test(tc_basic, test_byte_info_at_cdata); +@@ -12337,7 +12965,14 @@ make_suite(void) { + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_pool_integrity_with_unfinished_attr); + tcase_add_test(tc_basic, test_nested_entity_suspend); +- ++ tcase_add_test(tc_basic, test_big_tokens_take_linear_time); ++ tcase_add_test(tc_basic, test_set_reparse_deferral); ++ tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); ++ tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); ++ tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); ++ tcase_add_test(tc_basic, test_set_bad_reparse_option); ++ tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); ++ tcase_add_test(tc_basic, test_varying_buffer_fills); + suite_add_tcase(s, tc_namespace); + tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown); + tcase_add_test(tc_namespace, test_return_ns_triplet); +diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c +index 471f2a2..7c62919 100644 +--- a/expat/xmlwf/xmlwf.c ++++ b/expat/xmlwf/xmlwf.c +@@ -914,6 +914,9 @@ usage(const XML_Char *prog, int rc) { + T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") + T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") + T("\n") ++ T("reparse deferral:\n") ++ T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n") ++ T("\n") + T("info arguments:\n") + T(" -h show this [h]elp message and exit\n") + T(" -v show program's [v]ersion number and exit\n") +@@ -967,6 +970,8 @@ tmain(int argc, XML_Char **argv) { + unsigned long long attackThresholdBytes; + XML_Bool attackThresholdGiven = XML_FALSE; + ++ XML_Bool disableDeferral = XML_FALSE; ++ + int exitCode = XMLWF_EXIT_SUCCESS; + enum XML_ParamEntityParsing paramEntityParsing + = XML_PARAM_ENTITY_PARSING_NEVER; +@@ -1089,6 +1094,11 @@ tmain(int argc, XML_Char **argv) { + #endif + break; + } ++ case T('q'): { ++ disableDeferral = XML_TRUE; ++ j++; ++ break; ++ } + case T('\0'): + if (j > 1) { + i++; +@@ -1134,6 +1144,16 @@ tmain(int argc, XML_Char **argv) { + #endif + } + ++ if (disableDeferral) { ++ const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE); ++ if (! success) { ++ // This prevents tperror(..) from reporting misleading "[..]: Success" ++ errno = EINVAL; ++ tperror(T("Failed to disable reparse deferral")); ++ exit(XMLWF_EXIT_INTERNAL_ERROR); ++ } ++ } ++ + if (requireStandalone) + XML_SetNotStandaloneHandler(parser, notStandalone); + XML_SetParamEntityParsing(parser, paramEntityParsing); +diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py +index c2a527f..1bd0a0a 100755 +--- a/expat/xmlwf/xmlwf_helpgen.py ++++ b/expat/xmlwf/xmlwf_helpgen.py +@@ -81,6 +81,10 @@ billion_laughs.add_argument('-a', metavar='FACTOR', + help='set maximum tolerated [a]mplification factor (default: 100.0)') + billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)') + ++reparse_deferral = parser.add_argument_group('reparse deferral') ++reparse_deferral.add_argument('-q', metavar='FACTOR', ++ help='disable reparse deferral, and allow [q]uadratic parse runtime with large tokens') ++ + parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') + + info = parser.add_argument_group('info arguments') +diff --git a/testdata/largefiles/aaaaaa_attr.xml b/testdata/largefiles/aaaaaa_attr.xml +new file mode 100644 +index 0000000..66e3d25 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_attr.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_cdata.xml b/testdata/largefiles/aaaaaa_cdata.xml +new file mode 100644 +index 0000000..66f64bd +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_cdata.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_comment.xml b/testdata/largefiles/aaaaaa_comment.xml +new file mode 100644 +index 0000000..bb9af13 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_comment.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_tag.xml b/testdata/largefiles/aaaaaa_tag.xml +new file mode 100644 +index 0000000..946f701 +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_tag.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file +diff --git a/testdata/largefiles/aaaaaa_text.xml b/testdata/largefiles/aaaaaa_text.xml +new file mode 100644 +index 0000000..e266acb +--- /dev/null ++++ b/testdata/largefiles/aaaaaa_text.xml +@@ -0,0 +1 @@ ++ +\ No newline at end of file diff -Nru expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-50602.patch expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-50602.patch --- expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-50602.patch 1970-01-01 00:00:00.000000000 +0000 +++ expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-50602.patch 2025-04-05 05:36:55.000000000 +0000 @@ -0,0 +1,108 @@ +commit 38905b99bb78a6a691ed8358f30030116783656c +Author: Tomas Korbar ACHARS +Date: Thu Nov 7 15:00:46 2024 +0100 + + Fix CVE-2024-50602 + + See https://github.com/libexpat/libexpat/pull/915 + +diff --git a/expat/lib/expat.h b/expat/lib/expat.h +index 842dd70..69b0ba1 100644 +--- a/expat/lib/expat.h ++++ b/expat/lib/expat.h +@@ -128,7 +128,9 @@ enum XML_Error { + /* Added in 2.3.0. */ + XML_ERROR_NO_BUFFER, + /* Added in 2.4.0. */ +- XML_ERROR_AMPLIFICATION_LIMIT_BREACH ++ XML_ERROR_AMPLIFICATION_LIMIT_BREACH, ++ /* Added in 2.6.4. */ ++ XML_ERROR_NOT_STARTED, + }; + + enum XML_Content_Type { +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index e0c2873..8b2af91 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -2193,6 +2193,9 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) { + if (parser == NULL) + return XML_STATUS_ERROR; + switch (parser->m_parsingStatus.parsing) { ++ case XML_INITIALIZED: ++ parser->m_errorCode = XML_ERROR_NOT_STARTED; ++ return XML_STATUS_ERROR; + case XML_SUSPENDED: + if (resumable) { + parser->m_errorCode = XML_ERROR_SUSPENDED; +@@ -2203,7 +2206,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) { + case XML_FINISHED: + parser->m_errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; +- default: ++ case XML_PARSING: + if (resumable) { + #ifdef XML_DTD + if (parser->m_isParamEntity) { +@@ -2214,6 +2217,9 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) { + parser->m_parsingStatus.parsing = XML_SUSPENDED; + } else + parser->m_parsingStatus.parsing = XML_FINISHED; ++ break; ++ default: ++ assert(0); + } + return XML_STATUS_OK; + } +@@ -2478,6 +2484,9 @@ XML_ErrorString(enum XML_Error code) { + case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: + return XML_L( + "limit on input amplification factor (from DTD and entities) breached"); ++ /* Added in 2.6.4. */ ++ case XML_ERROR_NOT_STARTED: ++ return XML_L("parser not started"); + } + return NULL; + } +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index ed88f9f..5769aa0 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -8618,6 +8618,28 @@ START_TEST(test_misc_tag_mismatch_reset_leak) { + } + END_TEST + ++START_TEST(test_misc_resumeparser_not_crashing) { ++ XML_Parser parser = XML_ParserCreate(NULL); ++ XML_GetBuffer(parser, 1); ++ XML_StopParser(parser, /*resumable=*/XML_TRUE); ++ XML_ResumeParser(parser); // could crash here, previously ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++START_TEST(test_misc_stopparser_rejects_unstarted_parser) { ++ const XML_Bool cases[] = {XML_TRUE, XML_FALSE}; ++ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ const XML_Bool resumable = cases[i]; ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE); ++ assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR); ++ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NOT_STARTED); ++ XML_ParserFree(parser); ++ } ++} ++END_TEST ++ + static void + alloc_setup(void) { + XML_Memory_Handling_Suite memsuite = {duff_allocator, duff_reallocator, free}; +@@ -13025,6 +13047,8 @@ make_suite(void) { + tcase_add_test__ifdef_xml_dtd( + tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317); + tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak); ++ tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing); ++ tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser); + + suite_add_tcase(s, tc_alloc); + tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown); diff -Nru expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-8176.patch expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-8176.patch --- expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-8176.patch 1970-01-01 00:00:00.000000000 +0000 +++ expat-2.5.0/debian/patches/expat-2.5.0-CVE-2024-8176.patch 2025-04-05 05:36:55.000000000 +0000 @@ -0,0 +1,1535 @@ +commit c0de4903900004dd3ca91f246e5f6489a49a132b +Author: Tomas Korbar +Date: Mon Mar 24 10:04:33 2025 +0100 + + Fix CVE-2024-8176 + +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index 8b2af91..d68d2c8 100644 +--- a/expat/lib/xmlparse.c ++++ b/expat/lib/xmlparse.c +@@ -305,6 +305,10 @@ typedef struct { + const XML_Char *publicId; + const XML_Char *notation; + XML_Bool open; ++ XML_Bool hasMore; /* true if entity has not been completely processed */ ++ /* An entity can be open while being already completely processed (hasMore == ++ XML_FALSE). The reason is the delayed closing of entities until their inner ++ entities are processed and closed */ + XML_Bool is_param; + XML_Bool is_internal; /* true if declared in internal subset outside PE */ + } ENTITY; +@@ -395,6 +399,12 @@ typedef struct { + int *scaffIndex; + } DTD; + ++enum EntityType { ++ ENTITY_INTERNAL, ++ ENTITY_ATTRIBUTE, ++ ENTITY_VALUE, ++}; ++ + typedef struct open_internal_entity { + const char *internalEventPtr; + const char *internalEventEndPtr; +@@ -402,6 +412,7 @@ typedef struct open_internal_entity { + ENTITY *entity; + int startTagLevel; + XML_Bool betweenDecl; /* WFC: PE Between Declarations */ ++ enum EntityType type; + } OPEN_INTERNAL_ENTITY; + + enum XML_Account { +@@ -461,8 +472,8 @@ static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, + const char *next, const char **nextPtr, + XML_Bool haveMore, XML_Bool allowClosingDoctype, + enum XML_Account account); +-static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, +- XML_Bool betweenDecl); ++static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity, ++ XML_Bool betweenDecl, enum EntityType type); + static enum XML_Error doContent(XML_Parser parser, int startTagLevel, + const ENCODING *enc, const char *start, + const char *end, const char **endPtr, +@@ -492,16 +503,21 @@ static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, + const char *, STRING_POOL *, + enum XML_Account account); +-static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, +- XML_Bool isCdata, const char *, +- const char *, STRING_POOL *, +- enum XML_Account account); ++static enum XML_Error ++appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, ++ const char *ptr, const char *end, STRING_POOL *pool, ++ enum XML_Account account, const char **nextPtr); + static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); + static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); + static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end, +- enum XML_Account account); ++ enum XML_Account account, ++ const char **nextPtr); ++static enum XML_Error callStoreEntityValue(XML_Parser parser, ++ const ENCODING *enc, ++ const char *start, const char *end, ++ enum XML_Account account); + static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); + static int reportComment(XML_Parser parser, const ENCODING *enc, +@@ -669,6 +685,10 @@ struct XML_ParserStruct { + const char *m_positionPtr; + OPEN_INTERNAL_ENTITY *m_openInternalEntities; + OPEN_INTERNAL_ENTITY *m_freeInternalEntities; ++ OPEN_INTERNAL_ENTITY *m_openAttributeEntities; ++ OPEN_INTERNAL_ENTITY *m_freeAttributeEntities; ++ OPEN_INTERNAL_ENTITY *m_openValueEntities; ++ OPEN_INTERNAL_ENTITY *m_freeValueEntities; + XML_Bool m_defaultExpandInternalEntities; + int m_tagLevel; + ENTITY *m_declEntity; +@@ -716,6 +736,7 @@ struct XML_ParserStruct { + ACCOUNTING m_accounting; + ENTITY_STATS m_entity_stats; + #endif ++ XML_Bool m_reenter; + }; + + #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +@@ -986,7 +1007,29 @@ callProcessor(XML_Parser parser, const char *start, const char *end, + } + } + g_parseAttempts += 1; +- const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); ++ // Run in a loop to eliminate dangerous recursion depths ++ enum XML_Error ret; ++ *endPtr = start; ++ while (1) { ++ // Use endPtr as the new start in each iteration, since it will ++ // be set to the next start point by m_processor. ++ ret = parser->m_processor(parser, *endPtr, end, endPtr); ++ ++ // Make parsing status (and in particular XML_SUSPENDED) take ++ // precedence over re-enter flag when they disagree ++ if (parser->m_parsingStatus.parsing != XML_PARSING) { ++ parser->m_reenter = XML_FALSE; ++ } ++ ++ if (! parser->m_reenter) { ++ break; ++ } ++ ++ parser->m_reenter = XML_FALSE; ++ if (ret != XML_ERROR_NONE) ++ return ret; ++ } ++ + if (ret == XML_ERROR_NONE) { + // if we consumed nothing, remember what we had on this parse attempt. + if (*endPtr == start) { +@@ -1097,6 +1140,8 @@ parserCreate(const XML_Char *encodingName, + parser->m_freeBindingList = NULL; + parser->m_freeTagList = NULL; + parser->m_freeInternalEntities = NULL; ++ parser->m_freeAttributeEntities = NULL; ++ parser->m_freeValueEntities = NULL; + + parser->m_groupSize = 0; + parser->m_groupConnector = NULL; +@@ -1199,6 +1244,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_eventEndPtr = NULL; + parser->m_positionPtr = NULL; + parser->m_openInternalEntities = NULL; ++ parser->m_openAttributeEntities = NULL; ++ parser->m_openValueEntities = NULL; + parser->m_defaultExpandInternalEntities = XML_TRUE; + parser->m_tagLevel = 0; + parser->m_tagStack = NULL; +@@ -1209,6 +1256,8 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_unknownEncodingData = NULL; + parser->m_parentParser = NULL; + parser->m_parsingStatus.parsing = XML_INITIALIZED; ++ // Reentry can only be triggered inside m_processor calls ++ parser->m_reenter = XML_FALSE; + #ifdef XML_DTD + parser->m_isParamEntity = XML_FALSE; + parser->m_useForeignDTD = XML_FALSE; +@@ -1268,6 +1317,24 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + } ++ /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but ++ * for attributes) */ ++ openEntityList = parser->m_openAttributeEntities; ++ while (openEntityList) { ++ OPEN_INTERNAL_ENTITY *openEntity = openEntityList; ++ openEntityList = openEntity->next; ++ openEntity->next = parser->m_freeAttributeEntities; ++ parser->m_freeAttributeEntities = openEntity; ++ } ++ /* move m_openValueEntities to m_freeValueEntities (i.e. same task but ++ * for value entities) */ ++ openEntityList = parser->m_openValueEntities; ++ while (openEntityList) { ++ OPEN_INTERNAL_ENTITY *openEntity = openEntityList; ++ openEntityList = openEntity->next; ++ openEntity->next = parser->m_freeValueEntities; ++ parser->m_freeValueEntities = openEntity; ++ } + moveToFreeBindingList(parser, parser->m_inheritedBindings); + FREE(parser, parser->m_unknownEncodingMem); + if (parser->m_unknownEncodingRelease) +@@ -1281,6 +1348,19 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { + return XML_TRUE; + } + ++static XML_Bool ++parserBusy(XML_Parser parser) { ++ switch (parser->m_parsingStatus.parsing) { ++ case XML_PARSING: ++ case XML_SUSPENDED: ++ return XML_TRUE; ++ case XML_INITIALIZED: ++ case XML_FINISHED: ++ default: ++ return XML_FALSE; ++ } ++} ++ + enum XML_Status XMLCALL + XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser == NULL) +@@ -1289,8 +1369,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + XXX There's no way for the caller to determine which of the + XXX possible error cases caused the XML_STATUS_ERROR return. + */ +- if (parser->m_parsingStatus.parsing == XML_PARSING +- || parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parserBusy(parser)) + return XML_STATUS_ERROR; + + /* Get rid of any previous encoding name */ +@@ -1527,7 +1606,34 @@ XML_ParserFree(XML_Parser parser) { + entityList = entityList->next; + FREE(parser, openEntity); + } +- ++ /* free m_openAttributeEntities and m_freeAttributeEntities */ ++ entityList = parser->m_openAttributeEntities; ++ for (;;) { ++ OPEN_INTERNAL_ENTITY *openEntity; ++ if (entityList == NULL) { ++ if (parser->m_freeAttributeEntities == NULL) ++ break; ++ entityList = parser->m_freeAttributeEntities; ++ parser->m_freeAttributeEntities = NULL; ++ } ++ openEntity = entityList; ++ entityList = entityList->next; ++ FREE(parser, openEntity); ++ } ++ /* free m_openValueEntities and m_freeValueEntities */ ++ entityList = parser->m_openValueEntities; ++ for (;;) { ++ OPEN_INTERNAL_ENTITY *openEntity; ++ if (entityList == NULL) { ++ if (parser->m_freeValueEntities == NULL) ++ break; ++ entityList = parser->m_freeValueEntities; ++ parser->m_freeValueEntities = NULL; ++ } ++ openEntity = entityList; ++ entityList = entityList->next; ++ FREE(parser, openEntity); ++ } + destroyBindings(parser->m_freeBindingList, parser); + destroyBindings(parser->m_inheritedBindings, parser); + poolDestroy(&parser->m_tempPool); +@@ -1569,8 +1675,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { + return XML_ERROR_INVALID_ARGUMENT; + #ifdef XML_DTD + /* block after XML_Parse()/XML_ParseBuffer() has been called */ +- if (parser->m_parsingStatus.parsing == XML_PARSING +- || parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parserBusy(parser)) + return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; + parser->m_useForeignDTD = useDTD; + return XML_ERROR_NONE; +@@ -1585,8 +1690,7 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { + if (parser == NULL) + return; + /* block after XML_Parse()/XML_ParseBuffer() has been called */ +- if (parser->m_parsingStatus.parsing == XML_PARSING +- || parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parserBusy(parser)) + return; + parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; + } +@@ -1855,8 +1959,7 @@ XML_SetParamEntityParsing(XML_Parser parser, + if (parser == NULL) + return 0; + /* block after XML_Parse()/XML_ParseBuffer() has been called */ +- if (parser->m_parsingStatus.parsing == XML_PARSING +- || parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parserBusy(parser)) + return 0; + #ifdef XML_DTD + parser->m_paramEntityParsing = peParsing; +@@ -1873,8 +1976,7 @@ XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { + if (parser->m_parentParser) + return XML_SetHashSalt(parser->m_parentParser, hash_salt); + /* block after XML_Parse()/XML_ParseBuffer() has been called */ +- if (parser->m_parsingStatus.parsing == XML_PARSING +- || parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parserBusy(parser)) + return 0; + parser->m_hash_secret_salt = hash_salt; + return 1; +@@ -2188,6 +2290,11 @@ XML_GetBuffer(XML_Parser parser, int len) { + return parser->m_bufferEnd; + } + ++static void ++triggerReenter(XML_Parser parser) { ++ parser->m_reenter = XML_TRUE; ++} ++ + enum XML_Status XMLCALL + XML_StopParser(XML_Parser parser, XML_Bool resumable) { + if (parser == NULL) +@@ -2659,8 +2766,9 @@ static enum XML_Error PTRCALL + contentProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + enum XML_Error result = doContent( +- parser, 0, parser->m_encoding, start, end, endPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); ++ parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end, ++ endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, ++ XML_ACCOUNT_DIRECT); + if (result == XML_ERROR_NONE) { + if (! storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; +@@ -2748,6 +2856,11 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start, + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; ++ case XML_PARSING: ++ if (parser->m_reenter) { ++ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE ++ } ++ /* Fall through */ + default: + start = next; + } +@@ -2921,7 +3034,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + reportDefault(parser, enc, s, next); + break; + } +- result = processInternalEntity(parser, entity, XML_FALSE); ++ result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL); + if (result != XML_ERROR_NONE) + return result; + } else if (parser->m_externalEntityRefHandler) { +@@ -3047,7 +3160,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + } + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { +- if (parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parser->m_parsingStatus.parsing == XML_SUSPENDED ++ || (parser->m_parsingStatus.parsing == XML_PARSING ++ && parser->m_reenter)) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); +@@ -3108,7 +3223,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + } + if ((parser->m_tagLevel == 0) + && (parser->m_parsingStatus.parsing != XML_FINISHED)) { +- if (parser->m_parsingStatus.parsing == XML_SUSPENDED) ++ if (parser->m_parsingStatus.parsing == XML_SUSPENDED ++ || (parser->m_parsingStatus.parsing == XML_PARSING ++ && parser->m_reenter)) + parser->m_processor = epilogProcessor; + else + return epilogProcessor(parser, next, end, nextPtr); +@@ -3241,14 +3358,22 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + break; + /* LCOV_EXCL_STOP */ + } +- *eventPP = s = next; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: ++ *eventPP = next; + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: ++ *eventPP = next; + return XML_ERROR_ABORTED; ++ case XML_PARSING: ++ if (parser->m_reenter) { ++ *nextPtr = next; ++ return XML_ERROR_NONE; ++ } ++ /* Fall through */ + default:; ++ *eventPP = s = next; + } + } + /* not reached */ +@@ -4165,14 +4290,21 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, + /* LCOV_EXCL_STOP */ + } + +- *eventPP = s = next; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: ++ *eventPP = next; + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: ++ *eventPP = next; + return XML_ERROR_ABORTED; ++ case XML_PARSING: ++ if (parser->m_reenter) { ++ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE ++ } ++ /* Fall through */ + default:; ++ *eventPP = s = next; + } + } + /* not reached */ +@@ -4504,7 +4636,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + } + /* found end of entity value - can store it now */ + return storeEntityValue(parser, parser->m_encoding, s, end, +- XML_ACCOUNT_DIRECT); ++ XML_ACCOUNT_DIRECT, NULL); + } else if (tok == XML_TOK_XML_DECL) { + enum XML_Error result; + result = processXmlDecl(parser, 0, start, next); +@@ -4631,7 +4763,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end, + break; + } + /* found end of entity value - can store it now */ +- return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); ++ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); + } + start = next; + } +@@ -5069,9 +5201,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + break; + case XML_ROLE_ENTITY_VALUE: + if (dtd->keepProcessing) { +- enum XML_Error result +- = storeEntityValue(parser, enc, s + enc->minBytesPerChar, +- next - enc->minBytesPerChar, XML_ACCOUNT_NONE); ++ enum XML_Error result = callStoreEntityValue( ++ parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, ++ XML_ACCOUNT_NONE); + if (parser->m_declEntity) { + parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); + parser->m_declEntity->textLen +@@ -5467,7 +5599,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + enum XML_Error result; + XML_Bool betweenDecl + = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); +- result = processInternalEntity(parser, entity, betweenDecl); ++ result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL); + if (result != XML_ERROR_NONE) + return result; + handleDefault = XML_FALSE; +@@ -5672,6 +5804,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; ++ case XML_PARSING: ++ if (parser->m_reenter) { ++ *nextPtr = next; ++ return XML_ERROR_NONE; ++ } ++ /* Fall through */ + default: + s = next; + tok = XmlPrologTok(enc, s, end, &next); +@@ -5739,28 +5877,58 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, + default: + return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; + } +- parser->m_eventPtr = s = next; + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: ++ parser->m_eventPtr = next; + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: ++ parser->m_eventPtr = next; + return XML_ERROR_ABORTED; ++ case XML_PARSING: ++ if (parser->m_reenter) { ++ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE ++ } ++ /* Fall through */ + default:; ++ parser->m_eventPtr = s = next; + } + } + } + + static enum XML_Error +-processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { +- const char *textStart, *textEnd; +- const char *next; +- enum XML_Error result; +- OPEN_INTERNAL_ENTITY *openEntity; ++processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, ++ enum EntityType type) { ++ OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList; ++ switch (type) { ++ case ENTITY_INTERNAL: ++ parser->m_processor = internalEntityProcessor; ++ openEntityList = &parser->m_openInternalEntities; ++ freeEntityList = &parser->m_freeInternalEntities; ++ break; ++ case ENTITY_ATTRIBUTE: ++ openEntityList = &parser->m_openAttributeEntities; ++ freeEntityList = &parser->m_freeAttributeEntities; ++ break; ++ case ENTITY_VALUE: ++ openEntityList = &parser->m_openValueEntities; ++ freeEntityList = &parser->m_freeValueEntities; ++ break; ++ /* default case serves merely as a safety net in case of a ++ * wrong entityType. Therefore we exclude the following lines ++ * from the test coverage. ++ * ++ * LCOV_EXCL_START ++ */ ++ default: ++ // Should not reach here ++ assert(0); ++ /* LCOV_EXCL_STOP */ ++ } + +- if (parser->m_freeInternalEntities) { +- openEntity = parser->m_freeInternalEntities; +- parser->m_freeInternalEntities = openEntity->next; ++ if (*freeEntityList) { ++ openEntity = *freeEntityList; ++ *freeEntityList = openEntity->next; + } else { + openEntity + = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); +@@ -5768,56 +5936,33 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { + return XML_ERROR_NO_MEMORY; + } + entity->open = XML_TRUE; ++ entity->hasMore = XML_TRUE; + #ifdef XML_DTD + entityTrackingOnOpen(parser, entity, __LINE__); + #endif + entity->processed = 0; +- openEntity->next = parser->m_openInternalEntities; +- parser->m_openInternalEntities = openEntity; ++ openEntity->next = *openEntityList; ++ *openEntityList = openEntity; + openEntity->entity = entity; ++ openEntity->type = type; + openEntity->startTagLevel = parser->m_tagLevel; + openEntity->betweenDecl = betweenDecl; + openEntity->internalEventPtr = NULL; + openEntity->internalEventEndPtr = NULL; +- textStart = (const char *)entity->textPtr; +- textEnd = (const char *)(entity->textPtr + entity->textLen); +- /* Set a safe default value in case 'next' does not get set */ +- next = textStart; +- +-#ifdef XML_DTD +- if (entity->is_param) { +- int tok +- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); +- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, +- tok, next, &next, XML_FALSE, XML_FALSE, +- XML_ACCOUNT_ENTITY_EXPANSION); +- } else +-#endif /* XML_DTD */ +- result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, +- textStart, textEnd, &next, XML_FALSE, +- XML_ACCOUNT_ENTITY_EXPANSION); +- +- if (result == XML_ERROR_NONE) { +- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { +- entity->processed = (int)(next - textStart); +- parser->m_processor = internalEntityProcessor; +- } else { +-#ifdef XML_DTD +- entityTrackingOnClose(parser, entity, __LINE__); +-#endif /* XML_DTD */ +- entity->open = XML_FALSE; +- parser->m_openInternalEntities = openEntity->next; +- /* put openEntity back in list of free instances */ +- openEntity->next = parser->m_freeInternalEntities; +- parser->m_freeInternalEntities = openEntity; +- } ++ // Only internal entities make use of the reenter flag ++ // therefore no need to set it for other entity types ++ if (type == ENTITY_INTERNAL) { ++ triggerReenter(parser); + } +- return result; ++ return XML_ERROR_NONE; + } + + static enum XML_Error PTRCALL + internalEntityProcessor(XML_Parser parser, const char *s, const char *end, + const char **nextPtr) { ++ UNUSED_P(s); ++ UNUSED_P(end); ++ UNUSED_P(nextPtr); + ENTITY *entity; + const char *textStart, *textEnd; + const char *next; +@@ -5827,72 +5972,63 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, + return XML_ERROR_UNEXPECTED_STATE; + + entity = openEntity->entity; +- textStart = ((const char *)entity->textPtr) + entity->processed; +- textEnd = (const char *)(entity->textPtr + entity->textLen); +- /* Set a safe default value in case 'next' does not get set */ +- next = textStart; +- +-#ifdef XML_DTD +- if (entity->is_param) { +- int tok +- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); +- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, +- tok, next, &next, XML_FALSE, XML_TRUE, +- XML_ACCOUNT_ENTITY_EXPANSION); +- } else +-#endif /* XML_DTD */ +- result = doContent(parser, openEntity->startTagLevel, +- parser->m_internalEncoding, textStart, textEnd, &next, +- XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); +- +- if (result != XML_ERROR_NONE) +- return result; +- +- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { +- entity->processed = (int)(next - (const char *)entity->textPtr); ++ // This will return early ++ if (entity->hasMore) { ++ textStart = ((const char *)entity->textPtr) + entity->processed; ++ textEnd = (const char *)(entity->textPtr + entity->textLen); ++ /* Set a safe default value in case 'next' does not get set */ ++ next = textStart; ++ ++ if (entity->is_param) { ++ int tok ++ = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); ++ result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, ++ tok, next, &next, XML_FALSE, XML_FALSE, ++ XML_ACCOUNT_ENTITY_EXPANSION); ++ } else { ++ result = doContent(parser, openEntity->startTagLevel, ++ parser->m_internalEncoding, textStart, textEnd, &next, ++ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); ++ } ++ ++ if (result != XML_ERROR_NONE) ++ return result; ++ // Check if entity is complete, if not, mark down how much of it is ++ // processed ++ if (textEnd != next ++ && (parser->m_parsingStatus.parsing == XML_SUSPENDED ++ || (parser->m_parsingStatus.parsing == XML_PARSING ++ && parser->m_reenter))) { ++ entity->processed = (int)(next - (const char *)entity->textPtr); ++ return result; ++ } ++ // Entity is complete. We cannot close it here since we need to first ++ // process its possible inner entities (which are added to the ++ // m_openInternalEntities during doProlog or doContent calls above) ++ entity->hasMore = XML_FALSE; ++ triggerReenter(parser); + return result; +- } +- ++ } // End of entity processing, "if" block will return here ++ // Remove fully processed openEntity from open entity list. + #ifdef XML_DTD + entityTrackingOnClose(parser, entity, __LINE__); + #endif ++ // openEntity is m_openInternalEntities' head, as we set it at the start of ++ // this function and we skipped doProlog and doContent calls with hasMore set ++ // to false. This means we can directly remove the head of ++ // m_openInternalEntities ++ assert(parser->m_openInternalEntities == openEntity); + entity->open = XML_FALSE; +- parser->m_openInternalEntities = openEntity->next; ++ parser->m_openInternalEntities = parser->m_openInternalEntities->next; + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + +- // If there are more open entities we want to stop right here and have the +- // upcoming call to XML_ResumeParser continue with entity content, or it would +- // be ignored altogether. +- if (parser->m_openInternalEntities != NULL +- && parser->m_parsingStatus.parsing == XML_SUSPENDED) { +- return XML_ERROR_NONE; +- } +- +-#ifdef XML_DTD +- if (entity->is_param) { +- int tok; +- parser->m_processor = prologProcessor; +- tok = XmlPrologTok(parser->m_encoding, s, end, &next); +- return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, +- XML_ACCOUNT_DIRECT); +- } else +-#endif /* XML_DTD */ +- { +- parser->m_processor = contentProcessor; +- /* see externalEntityContentProcessor vs contentProcessor */ +- result = doContent(parser, parser->m_parentParser ? 1 : 0, +- parser->m_encoding, s, end, nextPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer, +- XML_ACCOUNT_DIRECT); +- if (result == XML_ERROR_NONE) { +- if (! storeRawNames(parser)) +- return XML_ERROR_NO_MEMORY; +- } +- return result; ++ if (parser->m_openInternalEntities == NULL) { ++ parser->m_processor = entity->is_param ? prologProcessor : contentProcessor; + } ++ triggerReenter(parser); ++ return XML_ERROR_NONE; + } + + static enum XML_Error PTRCALL +@@ -5908,8 +6044,70 @@ static enum XML_Error + storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, STRING_POOL *pool, + enum XML_Account account) { +- enum XML_Error result +- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); ++ const char *next = ptr; ++ enum XML_Error result = XML_ERROR_NONE; ++ ++ while (1) { ++ if (! parser->m_openAttributeEntities) { ++ result = appendAttributeValue(parser, enc, isCdata, next, end, pool, ++ account, &next); ++ } else { ++ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities; ++ if (! openEntity) ++ return XML_ERROR_UNEXPECTED_STATE; ++ ++ ENTITY *const entity = openEntity->entity; ++ const char *const textStart ++ = ((const char *)entity->textPtr) + entity->processed; ++ const char *const textEnd ++ = (const char *)(entity->textPtr + entity->textLen); ++ /* Set a safe default value in case 'next' does not get set */ ++ const char *nextInEntity = textStart; ++ if (entity->hasMore) { ++ result = appendAttributeValue( ++ parser, parser->m_internalEncoding, isCdata, textStart, textEnd, ++ pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity); ++ if (result != XML_ERROR_NONE) ++ break; ++ // Check if entity is complete, if not, mark down how much of it is ++ // processed. A XML_SUSPENDED check here is not required as ++ // appendAttributeValue will never suspend the parser. ++ if (textEnd != nextInEntity) { ++ entity->processed ++ = (int)(nextInEntity - (const char *)entity->textPtr); ++ continue; ++ } ++ ++ // Entity is complete. We cannot close it here since we need to first ++ // process its possible inner entities (which are added to the ++ // m_openAttributeEntities during appendAttributeValue) ++ entity->hasMore = XML_FALSE; ++ continue; ++ } // End of entity processing, "if" block skips the rest ++ ++ // Remove fully processed openEntity from open entity list. ++#if XML_DTD == 1 ++ entityTrackingOnClose(parser, entity, __LINE__); ++#endif ++ // openEntity is m_openAttributeEntities' head, since we set it at the ++ // start of this function and because we skipped appendAttributeValue call ++ // with hasMore set to false. This means we can directly remove the head ++ // of m_openAttributeEntities ++ assert(parser->m_openAttributeEntities == openEntity); ++ entity->open = XML_FALSE; ++ parser->m_openAttributeEntities = parser->m_openAttributeEntities->next; ++ ++ /* put openEntity back in list of free instances */ ++ openEntity->next = parser->m_freeAttributeEntities; ++ parser->m_freeAttributeEntities = openEntity; ++ } ++ ++ // Break if an error occurred or there is nothing left to process ++ if (result || (parser->m_openAttributeEntities == NULL && end == next)) { ++ break; ++ } ++ } ++ + if (result) + return result; + if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) +@@ -5922,7 +6120,7 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + static enum XML_Error + appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, STRING_POOL *pool, +- enum XML_Account account) { ++ enum XML_Account account, const char **nextPtr) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + #ifndef XML_DTD + UNUSED_P(account); +@@ -5940,6 +6138,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + #endif + switch (tok) { + case XML_TOK_NONE: ++ if (nextPtr) { ++ *nextPtr = next; ++ } + return XML_ERROR_NONE; + case XML_TOK_INVALID: + if (enc == parser->m_encoding) +@@ -6080,21 +6281,11 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } else { + enum XML_Error result; +- const XML_Char *textEnd = entity->textPtr + entity->textLen; +- entity->open = XML_TRUE; +-#ifdef XML_DTD +- entityTrackingOnOpen(parser, entity, __LINE__); +-#endif +- result = appendAttributeValue(parser, parser->m_internalEncoding, +- isCdata, (const char *)entity->textPtr, +- (const char *)textEnd, pool, +- XML_ACCOUNT_ENTITY_EXPANSION); +-#ifdef XML_DTD +- entityTrackingOnClose(parser, entity, __LINE__); +-#endif +- entity->open = XML_FALSE; +- if (result) +- return result; ++ result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE); ++ if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) { ++ *nextPtr = next; ++ } ++ return result; + } + } break; + default: +@@ -6122,7 +6313,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + static enum XML_Error + storeEntityValue(XML_Parser parser, const ENCODING *enc, + const char *entityTextPtr, const char *entityTextEnd, +- enum XML_Account account) { ++ enum XML_Account account, const char **nextPtr) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + STRING_POOL *pool = &(dtd->entityValuePool); + enum XML_Error result = XML_ERROR_NONE; +@@ -6140,8 +6331,9 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, + return XML_ERROR_NO_MEMORY; + } + ++ const char *next; + for (;;) { +- const char *next ++ next + = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); + +@@ -6205,16 +6397,8 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, + } else + dtd->keepProcessing = dtd->standalone; + } else { +- entity->open = XML_TRUE; +- entityTrackingOnOpen(parser, entity, __LINE__); +- result = storeEntityValue( +- parser, parser->m_internalEncoding, (const char *)entity->textPtr, +- (const char *)(entity->textPtr + entity->textLen), +- XML_ACCOUNT_ENTITY_EXPANSION); +- entityTrackingOnClose(parser, entity, __LINE__); +- entity->open = XML_FALSE; +- if (result) +- goto endEntityValue; ++ result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE); ++ goto endEntityValue; + } + break; + } +@@ -6302,9 +6486,84 @@ endEntityValue: + #ifdef XML_DTD + parser->m_prologState.inEntityValue = oldInEntityValue; + #endif /* XML_DTD */ ++ // If 'nextPtr' is given, it should be updated during the processing ++ if (nextPtr != NULL) { ++ *nextPtr = next; ++ } + return result; + } + ++static enum XML_Error ++callStoreEntityValue(XML_Parser parser, const ENCODING *enc, ++ const char *entityTextPtr, const char *entityTextEnd, ++ enum XML_Account account) { ++ const char *next = entityTextPtr; ++ enum XML_Error result = XML_ERROR_NONE; ++ while (1) { ++ if (! parser->m_openValueEntities) { ++ result ++ = storeEntityValue(parser, enc, next, entityTextEnd, account, &next); ++ } else { ++ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities; ++ if (! openEntity) ++ return XML_ERROR_UNEXPECTED_STATE; ++ ++ ENTITY *const entity = openEntity->entity; ++ const char *const textStart ++ = ((const char *)entity->textPtr) + entity->processed; ++ const char *const textEnd ++ = (const char *)(entity->textPtr + entity->textLen); ++ /* Set a safe default value in case 'next' does not get set */ ++ const char *nextInEntity = textStart; ++ if (entity->hasMore) { ++ result = storeEntityValue(parser, parser->m_internalEncoding, textStart, ++ textEnd, XML_ACCOUNT_ENTITY_EXPANSION, ++ &nextInEntity); ++ if (result != XML_ERROR_NONE) ++ break; ++ // Check if entity is complete, if not, mark down how much of it is ++ // processed. A XML_SUSPENDED check here is not required as ++ // appendAttributeValue will never suspend the parser. ++ if (textEnd != nextInEntity) { ++ entity->processed ++ = (int)(nextInEntity - (const char *)entity->textPtr); ++ continue; ++ } ++ ++ // Entity is complete. We cannot close it here since we need to first ++ // process its possible inner entities (which are added to the ++ // m_openValueEntities during storeEntityValue) ++ entity->hasMore = XML_FALSE; ++ continue; ++ } // End of entity processing, "if" block skips the rest ++ ++ // Remove fully processed openEntity from open entity list. ++# if XML_DTD == 1 ++ entityTrackingOnClose(parser, entity, __LINE__); ++# endif ++ // openEntity is m_openValueEntities' head, since we set it at the ++ // start of this function and because we skipped storeEntityValue call ++ // with hasMore set to false. This means we can directly remove the head ++ // of m_openValueEntities ++ assert(parser->m_openValueEntities == openEntity); ++ entity->open = XML_FALSE; ++ parser->m_openValueEntities = parser->m_openValueEntities->next; ++ ++ /* put openEntity back in list of free instances */ ++ openEntity->next = parser->m_freeValueEntities; ++ parser->m_freeValueEntities = openEntity; ++ } ++ ++ // Break if an error occurred or there is nothing left to process ++ if (result ++ || (parser->m_openValueEntities == NULL && entityTextEnd == next)) { ++ break; ++ } ++ } ++ ++ return result; ++} ++ + static void FASTCALL + normalizeLines(XML_Char *s) { + XML_Char *p; +diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c +index 5769aa0..5db384d 100644 +--- a/expat/tests/runtests.c ++++ b/expat/tests/runtests.c +@@ -47,6 +47,8 @@ + #endif + + #include ++#include ++#include // for SIZE_MAX + #include + #include + #include +@@ -1788,6 +1788,89 @@ START_TEST(test_wfc_no_recursive_entity_ + } + END_TEST + ++START_TEST(test_no_indirectly_recursive_entity_refs) { ++ struct TestCase { ++ const char *doc; ++ bool usesParameterEntities; ++ }; ++ ++ const struct TestCase cases[] = { ++ // general entity + character data ++ {"\n" ++ " \n" ++ "]>&e2;\n", ++ false}, ++ ++ // general entity + attribute value ++ {"\n" ++ " \n" ++ "]>\n", ++ false}, ++ ++ // parameter entity ++ {"\n" ++ " \n" ++ " \">\n" ++ " %define_g;\n" ++ "]>\n" ++ " \n", ++ true}, ++ }; ++ const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE}; ++ ++ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]); ++ j++) { ++ const XML_Bool reset_wanted = reset_or_not[j]; ++ const char *const doc = cases[i].doc; ++ const bool usesParameterEntities = cases[i].usesParameterEntities; ++ ++#ifdef XML_DTD // both GE and DTD ++ const bool rejection_expected = true; ++#else // neither DTD nor GE ++ const bool rejection_expected = false; ++#endif ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++#ifdef XML_DTD ++ if (usesParameterEntities) { ++ assert_true( ++ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS) ++ == 1); ++ } ++#else ++ UNUSED_P(usesParameterEntities); ++#endif // XML_DTD ++ ++ const enum XML_Status status ++ = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), ++ /*isFinal*/ XML_TRUE); ++ ++ if (rejection_expected) { ++ assert_true(status == XML_STATUS_ERROR); ++ assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF); ++ } else { ++ assert_true(status == XML_STATUS_OK); ++ } ++ ++ if (reset_wanted) { ++ // This covers free'ing of (eventually) all three open entity lists by ++ // XML_ParserReset. ++ XML_ParserReset(parser, NULL); ++ } ++ ++ // This covers free'ing of (eventually) all three open entity lists by ++ // XML_ParserFree (unless XML_ParserReset has already done that above). ++ XML_ParserFree(parser); ++ } ++ } ++} ++END_TEST ++ + /* Test incomplete external entities are faulted */ + START_TEST(test_ext_entity_invalid_parse) { + const char *text = "\n" + "\n" + "%pe2;\n", +- external_entity_null_loader}; ++ external_entity_null_loader, NULL}; + + XML_SetUserData(g_parser, &test_data); + XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); +@@ -5265,7 +5357,7 @@ START_TEST(test_skipped_unloaded_ext_entity) { + = {"\n" + "\n" + "%pe2;\n", +- NULL}; ++ NULL, NULL}; + + XML_SetUserData(g_parser, &test_data); + XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); +@@ -6355,6 +6447,25 @@ accumulate_entity_decl(void *userData, const XML_Char *entityName, + CharData_AppendXMLChars(storage, XCS("\n"), 1); + } + ++typedef struct { ++ XML_Parser parser; ++ CharData *storage; ++} ParserPlusStorage; ++ ++static void XMLCALL ++accumulate_char_data_and_suspend(void *userData, const XML_Char *s, int len) { ++ ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData; ++ ++ CharData_AppendXMLChars(parserPlusStorage->storage, s, len); ++ ++ for (int i = 0; i < len; i++) { ++ if (s[i] == 'Z') { ++ XML_StopParser(parserPlusStorage->parser, /*resumable=*/XML_TRUE); ++ break; ++ } ++ } ++} ++ + START_TEST(test_utf16_pe) { + /* '> +@@ -6808,11 +6919,6 @@ START_TEST(test_pool_integrity_with_unfinished_attr) { + } + END_TEST + +-typedef struct { +- XML_Parser parser; +- CharData *storage; +-} ParserPlusStorage; +- + static void XMLCALL + accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) { + ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData; +@@ -6820,6 +6926,147 @@ accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) { + XML_StopParser(parserPlusStorage->parser, XML_TRUE); + } + ++/* Test a possible early return location in internalEntityProcessor */ ++START_TEST(test_entity_ref_no_elements) { ++ const char *const text = "\n" ++ "]> &e1;"; // intentionally missing newline ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR); ++ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++/* Tests if chained entity references lead to unbounded recursion */ ++START_TEST(test_deep_nested_entity) { ++ const size_t N_LINES = 60000; ++ const size_t SIZE_PER_LINE = 50; ++ ++ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); ++ if (text == NULL) { ++ fail("malloc failed"); ++ } ++ ++ char *textPtr = text; ++ ++ // Create the XML ++ textPtr += snprintf(textPtr, SIZE_PER_LINE, ++ "\n"); ++ ++ for (size_t i = 1; i < N_LINES; ++i) { ++ textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n", ++ (long unsigned)i, (long unsigned)(i - 1)); ++ } ++ ++ snprintf(textPtr, SIZE_PER_LINE, "]> &s%lu; \n", ++ (long unsigned)(N_LINES - 1)); ++ ++ const XML_Char *const expected = XCS("deepText"); ++ ++ CharData storage; ++ CharData_Init(&storage); ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ XML_SetCharacterDataHandler(parser, accumulate_characters); ++ XML_SetUserData(parser, &storage); ++ ++ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(parser); ++ ++ CharData_CheckXMLChars(&storage, expected); ++ XML_ParserFree(parser); ++ free(text); ++} ++END_TEST ++ ++/* Tests if chained entity references in attributes ++lead to unbounded recursion */ ++START_TEST(test_deep_nested_attribute_entity) { ++ const size_t N_LINES = 60000; ++ const size_t SIZE_PER_LINE = 100; ++ ++ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); ++ if (text == NULL) { ++ fail("malloc failed"); ++ } ++ ++ char *textPtr = text; ++ ++ // Create the XML ++ textPtr += snprintf(textPtr, SIZE_PER_LINE, ++ "\n"); ++ ++ for (size_t i = 1; i < N_LINES; ++i) { ++ textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n", ++ (long unsigned)i, (long unsigned)(i - 1)); ++ } ++ ++ snprintf(textPtr, SIZE_PER_LINE, "]>mainText \n", ++ (long unsigned)(N_LINES - 1)); ++ ++ AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}}; ++ ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}}; ++ info[0].attributes = doc_info; ++ ++ XML_SetStartElementHandler(g_parser, counting_start_element_handler); ++ XML_SetUserData(g_parser, &info); ++ ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ ++ free(text); ++} ++END_TEST ++ ++START_TEST(test_deep_nested_entity_delayed_interpretation) { ++ const size_t N_LINES = 70000; ++ const size_t SIZE_PER_LINE = 100; ++ ++ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE); ++ if (text == NULL) { ++ fail("malloc failed"); ++ } ++ ++ char *textPtr = text; ++ ++ // Create the XML ++ textPtr += snprintf(textPtr, SIZE_PER_LINE, ++ "\n"); ++ ++ for (size_t i = 1; i < N_LINES; ++i) { ++ textPtr += snprintf(textPtr, SIZE_PER_LINE, ++ " \n", (long unsigned)i, ++ (long unsigned)(i - 1)); ++ } ++ ++ snprintf(textPtr, SIZE_PER_LINE, ++ " \">\n" ++ " %%define_g;\n" ++ "]>\n" ++ "\n", ++ (long unsigned)(N_LINES - 1)); ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); ++ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(parser); ++ ++ XML_ParserFree(parser); ++ free(text); ++} ++END_TEST ++ + START_TEST(test_nested_entity_suspend) { + const char *const text = "'>\n" +@@ -6850,6 +7097,35 @@ START_TEST(test_nested_entity_suspend) { + } + END_TEST + ++START_TEST(test_nested_entity_suspend_2) { ++ const char *const text = "\n" ++ " \n" ++ " \n" ++ "]>\n" ++ " &ge3; "; ++ const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1") ++ XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3"); ++ CharData storage; ++ CharData_Init(&storage); ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ParserPlusStorage parserPlusStorage = {parser, &storage}; ++ ++ XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend); ++ XML_SetUserData(parser, &parserPlusStorage); ++ ++ enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); ++ while (status == XML_STATUS_SUSPENDED) { ++ status = XML_ResumeParser(parser); ++ } ++ if (status != XML_STATUS_OK) ++ xml_failure(parser); ++ ++ CharData_CheckXMLChars(&storage, expected); ++ XML_ParserFree(parser); ++} ++END_TEST ++ + /* Regression test for quadratic parsing on large tokens */ + START_TEST(test_big_tokens_take_linear_time) { + const char *const too_slow_failure_message +@@ -8256,6 +8532,29 @@ duff_reallocator(void *ptr, size_t size) { + return realloc(ptr, size); + } + ++// Portable remake of strndup(3) for C99; does not care about space efficiency ++static char * ++portable_strndup(const char *s, size_t n) { ++ if ((s == NULL) || (n == SIZE_MAX)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ char *const buffer = (char *)malloc(n + 1); ++ if (buffer == NULL) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ ++ errno = 0; ++ ++ memcpy(buffer, s, n); ++ ++ buffer[n] = '\0'; ++ ++ return buffer; ++} ++ + /* Test that a failure to allocate the parser structure fails gracefully */ + START_TEST(test_misc_alloc_create_parser) { + XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free}; +@@ -8630,7 +8929,7 @@ END_TEST + START_TEST(test_misc_stopparser_rejects_unstarted_parser) { + const XML_Bool cases[] = {XML_TRUE, XML_FALSE}; + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { +- const XML_Bool resumable = cases[i]; ++ resumable = cases[i]; + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE); + assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR); +@@ -8640,6 +8939,105 @@ START_TEST(test_misc_stopparser_rejects_unstarted_parser) { + } + END_TEST + ++/* Adaptation of accumulate_characters that takes ExtHdlrData input to work with ++ * test_renter_loop_finite_content below */ ++static void XMLCALL ++accumulate_characters_ext_handler(void *userData, const XML_Char *s, int len) { ++ ExtHdlrData *const test_data = (ExtHdlrData *)userData; ++ CharData_AppendXMLChars(test_data->storage, s, len); ++} ++ ++/* Test that internalEntityProcessor does not re-enter forever; ++ * based on files tests/xmlconf/xmltest/valid/ext-sa/012.{xml,ent} */ ++START_TEST(test_renter_loop_finite_content) { ++ CharData storage; ++ CharData_Init(&storage); ++ const char *const text = "\n" ++ "\n" ++ "\n" ++ "\n" ++ "\n" ++ "\n" ++ "]>\n" ++ "&e1; \n"; ++ ExtHdlrData test_data = {"&e4;\n", external_entity_null_loader, &storage}; ++ const XML_Char *const expected = XCS("(e5)\n"); ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ assert_true(parser != NULL); ++ XML_SetUserData(parser, &test_data); ++ XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader); ++ XML_SetCharacterDataHandler(parser, accumulate_characters_ext_handler); ++ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(parser); ++ ++ CharData_CheckXMLChars(&storage, expected); ++ XML_ParserFree(parser); ++} ++END_TEST ++ ++// Inspired by function XML_OriginalString of Perl's XML::Parser ++static char * ++dup_original_string(XML_Parser parser) { ++ const int byte_count = XML_GetCurrentByteCount(parser); ++ ++ assert_true(byte_count >= 0); ++ ++ int offset = -1; ++ int size = -1; ++ ++ const char *const context = XML_GetInputContext(parser, &offset, &size); ++ ++#if XML_CONTEXT_BYTES > 0 ++ assert_true(context != NULL); ++ assert_true(offset >= 0); ++ assert_true(size >= 0); ++ return portable_strndup(context + offset, byte_count); ++#else ++ assert_true(context == NULL); ++ return NULL; ++#endif ++} ++ ++static void ++on_characters_issue_980(void *userData, const XML_Char *s, int len) { ++ (void)s; ++ (void)len; ++ XML_Parser parser = (XML_Parser)userData; ++ ++ char *const original_string = dup_original_string(parser); ++ ++#if XML_CONTEXT_BYTES > 0 ++ assert_true(original_string != NULL); ++ assert_true(strcmp(original_string, "&draft.day;") == 0); ++ free(original_string); ++#else ++ assert_true(original_string == NULL); ++#endif ++} ++ ++START_TEST(test_misc_expected_event_ptr_issue_980) { ++ // NOTE: This is a tiny subset of sample "REC-xml-19980210.xml" ++ // from Perl's XML::Parser ++ const char *const doc = "\n" ++ "]>\n" ++ "&draft.day; \n"; ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ XML_SetUserData(parser, parser); ++ XML_SetCharacterDataHandler(parser, on_characters_issue_980); ++ ++ assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), ++ /*isFinal=*/XML_TRUE) ++ == XML_STATUS_OK); ++ ++ XML_ParserFree(parser); ++} ++END_TEST ++ + static void + alloc_setup(void) { + XML_Memory_Handling_Suite memsuite = {duff_allocator, duff_reallocator, free}; +@@ -9218,6 +9616,31 @@ START_TEST(test_alloc_internal_entity) { + } + END_TEST + ++START_TEST(test_alloc_parameter_entity) { ++ const char *text = "\">" ++ "%param1;" ++ "]>&internal;content "; ++ int i; ++ const int alloc_test_max_repeats = 30; ++ ++ for (i = 0; i < alloc_test_max_repeats; i++) { ++ allocation_count = i; ++ XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) ++ != XML_STATUS_ERROR) ++ break; ++ alloc_teardown(); ++ alloc_setup(); ++ } ++ allocation_count = -1; ++ if (i == 0) ++ fail("Parameter entity processed despite duff allocator"); ++ if (i == alloc_test_max_repeats) ++ fail("Parameter entity not processed at max allocation count"); ++} ++END_TEST ++ + /* Test the robustness against allocation failure of element handling + * Based on test_dtd_default_handling(). + */ +@@ -12808,6 +13231,7 @@ make_suite(void) { + test_wfc_undeclared_entity_with_external_subset_standalone); + tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); + tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs); ++ tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); +@@ -12986,7 +13410,13 @@ make_suite(void) { + tcase_add_test(tc_basic, test_empty_element_abort); + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_pool_integrity_with_unfinished_attr); ++ tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_ref_no_elements); ++ tcase_add_test__ifdef_xml_dtd(tc_basic, test_deep_nested_entity); ++ tcase_add_test__ifdef_xml_dtd(tc_basic, test_deep_nested_attribute_entity); ++ tcase_add_test__ifdef_xml_dtd(tc_basic, ++ test_deep_nested_entity_delayed_interpretation); + tcase_add_test(tc_basic, test_nested_entity_suspend); ++ tcase_add_test__ifdef_xml_dtd(tc_basic, test_nested_entity_suspend_2); + tcase_add_test(tc_basic, test_big_tokens_take_linear_time); + tcase_add_test(tc_basic, test_set_reparse_deferral); + tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); +@@ -13049,6 +13479,8 @@ make_suite(void) { + tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak); + tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing); + tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser); ++ tcase_add_test__ifdef_xml_dtd(tc_misc, test_renter_loop_finite_content); ++ tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980); + + suite_add_tcase(s, tc_alloc); + tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown); +@@ -13065,6 +13497,7 @@ make_suite(void) { + tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_external_entity); + tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_ext_entity_set_encoding); + tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_internal_entity); ++ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_parameter_entity); + tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_default_handling); + tcase_add_test(tc_alloc, test_alloc_explicit_encoding); + tcase_add_test(tc_alloc, test_alloc_set_base); diff -Nru expat-2.5.0/debian/patches/series expat-2.5.0/debian/patches/series --- expat-2.5.0/debian/patches/series 2024-09-08 06:44:19.000000000 +0000 +++ expat-2.5.0/debian/patches/series 2025-04-05 05:36:55.000000000 +0000 @@ -3,3 +3,6 @@ CVE-2024-45490.patch CVE-2024-45491.patch CVE-2024-45492.patch +expat-2.5.0-CVE-2023-52425.patch +expat-2.5.0-CVE-2024-50602.patch +expat-2.5.0-CVE-2024-8176.patch