untrusted comment: verify with openbsd-69-base.pub RWQQsAemppS46H0Sj070OqmhEOmtbq6yaVzeWlkytjFiZh9uWNVQMAJHh5XP13jT2kwkLjOcZeYOWi9gNfywbIunz8U+ZF4hIAA= OpenBSD 6.9 errata 031, February 24, 2022: Fix five security issues in libexpat related to encoding, stack exhaustion, and integer overflow. Apply by doing: signify -Vep /etc/signify/openbsd-69-base.pub -x 031_expat.patch.sig \ -m - | (cd /usr/src && patch -p0) And then rebuild and install libexpat: cd /usr/src/lib/libexpat make obj make make install Index: lib/libexpat/Changes =================================================================== RCS file: /cvs/src/lib/libexpat/Changes,v retrieving revision 1.15.2.2 diff -u -p -r1.15.2.2 Changes --- lib/libexpat/Changes 1 Feb 2022 10:53:25 -0000 1.15.2.2 +++ lib/libexpat/Changes 21 Feb 2022 18:59:06 -0000 @@ -2,6 +2,59 @@ NOTE: We are looking for help with a few https://github.com/libexpat/libexpat/labels/help%20wanted If you can help, please get in touch. Thanks! + Bug fixes: + #566 Fix a regression introduced by the fix for CVE-2022-25313 + in release 2.4.5 that affects applications that (1) + call function XML_SetElementDeclHandler and (2) are + parsing XML that contains nested element declarations + (e.g. ""). + + Special thanks to: + Matt Sergeant + Samanta Navarro + Sergei Trofimovich + and + NixOS + Perl XML::Parser + + Security fixes: + #562 CVE-2022-25235 -- Passing malformed 2- and 3-byte UTF-8 + sequences (e.g. from start tag names) to the XML + processing application on top of Expat can cause + arbitrary damage (e.g. code execution) depending + on how invalid UTF-8 is handled inside the XML + processor; validation was not their job but Expat's. + Exploits with code execution are known to exist. + #561 CVE-2022-25236 -- Passing (one or more) namespace separator + characters in "xmlns[:prefix]" attribute values + made Expat send malformed tag names to the XML + processor on top of Expat which can cause + arbitrary damage (e.g. code execution) depending + on such unexpectable cases are handled inside the XML + processor; validation was not their job but Expat's. + Exploits with code execution are known to exist. + #558 CVE-2022-25313 -- Fix stack exhaustion in doctype parsing + that could be triggered by e.g. a 2 megabytes + file with a large number of opening braces. + Expected impact is denial of service or potentially + arbitrary code execution. + #560 CVE-2022-25314 -- Fix integer overflow in function copyString; + only affects the encoding name parameter at parser creation + time which is often hardcoded (rather than user input), + takes a value in the gigabytes to trigger, and a 64-bit + machine. Expected impact is denial of service. + #559 CVE-2022-25315 -- Fix integer overflow in function storeRawNames; + needs input in the gigabytes and a 64-bit machine. + Expected impact is denial of service or potentially + arbitrary code execution. + + Special thanks to: + Ivan Fratric + Samanta Navarro + and + Google Project Zero + JetBrains + Security fixes: #550 CVE-2022-23852 -- Fix signed integer overflow (undefined behavior) in function XML_GetBuffer Index: lib/libexpat/lib/xmlparse.c =================================================================== RCS file: /cvs/src/lib/libexpat/lib/xmlparse.c,v retrieving revision 1.26.2.2 diff -u -p -r1.26.2.2 xmlparse.c --- lib/libexpat/lib/xmlparse.c 1 Feb 2022 10:53:25 -0000 1.26.2.2 +++ lib/libexpat/lib/xmlparse.c 21 Feb 2022 18:59:07 -0000 @@ -640,8 +640,7 @@ XML_ParserCreate(const XML_Char *encodin XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { - XML_Char tmp[2]; - *tmp = nsSep; + XML_Char tmp[2] = {nsSep, 0}; return XML_ParserCreate_MM(encodingName, NULL, tmp); } @@ -1059,8 +1058,7 @@ XML_ExternalEntityParserCreate(XML_Parse would be otherwise. */ if (parser->m_ns) { - XML_Char tmp[2]; - *tmp = parser->m_namespaceSeparator; + XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); } else { parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); @@ -2230,6 +2228,7 @@ storeRawNames(XML_Parser parser) { while (tag) { int bufSize; int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything @@ -2241,7 +2240,11 @@ storeRawNames(XML_Parser parser) { /* For re-use purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ - bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + /* Detect and prevent integer overflow. */ + if (rawNameLen > (size_t)INT_MAX - nameLen) + return XML_FALSE; + bufSize = nameLen + (int)rawNameLen; if (bufSize > tag->bufEnd - tag->buf) { char *temp = (char *)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) @@ -3387,6 +3390,17 @@ addBinding(XML_Parser parser, PREFIX *pr if (! mustBeXML && isXMLNS && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; + + // NOTE: While Expat does not validate namespace URIs against RFC 3986, + // we have to at least make sure that the XML processor on top of + // Expat (that is splitting tag names by namespace separator into + // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused + // by an attacker putting additional namespace separator characters + // into namespace declarations. That would be ambiguous and not to + // be expected. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { + return XML_ERROR_SYNTAX; + } } isXML = isXML && len == xmlLen; isXMLNS = isXMLNS && len == xmlnsLen; @@ -6807,44 +6821,15 @@ nextScaffoldPart(XML_Parser parser) { return next; } -static void -build_node(XML_Parser parser, int src_node, XML_Content *dest, - XML_Content **contpos, XML_Char **strpos) { - DTD *const dtd = parser->m_dtd; /* save one level of indirection */ - dest->type = dtd->scaffold[src_node].type; - dest->quant = dtd->scaffold[src_node].quant; - if (dest->type == XML_CTYPE_NAME) { - const XML_Char *src; - dest->name = *strpos; - src = dtd->scaffold[src_node].name; - for (;;) { - *(*strpos)++ = *src; - if (! *src) - break; - src++; - } - dest->numchildren = 0; - dest->children = NULL; - } else { - unsigned int i; - int cn; - dest->numchildren = dtd->scaffold[src_node].childcnt; - dest->children = *contpos; - *contpos += dest->numchildren; - for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; - i++, cn = dtd->scaffold[cn].nextsib) { - build_node(parser, cn, &(dest->children[i]), contpos, strpos); - } - dest->name = NULL; - } -} - static XML_Content * build_model(XML_Parser parser) { + /* Function build_model transforms the existing parser->m_dtd->scaffold + * array of CONTENT_SCAFFOLD tree nodes into a new array of + * XML_Content tree nodes followed by a gapless list of zero-terminated + * strings. */ DTD *const dtd = parser->m_dtd; /* save one level of indirection */ XML_Content *ret; - XML_Content *cpos; - XML_Char *str; + XML_Char *str; /* the current string writing location */ /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning @@ -6870,10 +6855,96 @@ build_model(XML_Parser parser) { if (! ret) return NULL; - str = (XML_Char *)(&ret[dtd->scaffCount]); - cpos = &ret[1]; + /* What follows is an iterative implementation (of what was previously done + * recursively in a dedicated function called "build_node". The old recursive + * build_node could be forced into stack exhaustion from input as small as a + * few megabyte, and so that was a security issue. Hence, a function call + * stack is avoided now by resolving recursion.) + * + * The iterative approach works as follows: + * + * - We have two writing pointers, both walking up the result array; one does + * the work, the other creates "jobs" for its colleague to do, and leads + * the way: + * + * - The faster one, pointer jobDest, always leads and writes "what job + * to do" by the other, once they reach that place in the + * array: leader "jobDest" stores the source node array index (relative + * to array dtd->scaffold) in field "numchildren". + * + * - The slower one, pointer dest, looks at the value stored in the + * "numchildren" field (which actually holds a source node array index + * at that time) and puts the real data from dtd->scaffold in. + * + * - Before the loop starts, jobDest writes source array index 0 + * (where the root node is located) so that dest will have something to do + * when it starts operation. + * + * - Whenever nodes with children are encountered, jobDest appends + * them as new jobs, in order. As a result, tree node siblings are + * adjacent in the resulting array, for example: + * + * [0] root, has two children + * [1] first child of 0, has three children + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * [2] second child of 0, does not have children + * + * Or (the same data) presented in flat array view: + * + * [0] root, has two children + * + * [1] first child of 0, has three children + * [2] second child of 0, does not have children + * + * [3] first child of 1, does not have children + * [4] second child of 1, does not have children + * [5] third child of 1, does not have children + * + * - The algorithm repeats until all target array indices have been processed. + */ + XML_Content *dest = ret; /* tree node writing location, moves upwards */ + XML_Content *const destLimit = &ret[dtd->scaffCount]; + XML_Content *jobDest = ret; /* next free writing location in target array */ + str = (XML_Char *)&ret[dtd->scaffCount]; + + /* Add the starting job, the root node (index 0) of the source tree */ + (jobDest++)->numchildren = 0; + + for (; dest < destLimit; dest++) { + /* Retrieve source tree array index from job storage */ + const int src_node = (int)dest->numchildren; + + /* Convert item */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const XML_Char *src; + dest->name = str; + src = dtd->scaffold[src_node].name; + for (;;) { + *str++ = *src; + if (! *src) + break; + src++; + } + dest->numchildren = 0; + dest->children = NULL; + } else { + unsigned int i; + int cn; + dest->name = NULL; + dest->numchildren = dtd->scaffold[src_node].childcnt; + dest->children = jobDest; + + /* Append scaffold indices of children to array */ + for (i = 0, cn = dtd->scaffold[src_node].firstchild; + i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) + (jobDest++)->numchildren = (unsigned int)cn; + } + } - build_node(parser, 0, ret, &cpos, &str); return ret; } @@ -6902,7 +6973,7 @@ getElementType(XML_Parser parser, const static XML_Char * copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { - int charsRequired = 0; + size_t charsRequired = 0; XML_Char *result; /* First determine how long the string is */ Index: lib/libexpat/lib/xmltok.c =================================================================== RCS file: /cvs/src/lib/libexpat/lib/xmltok.c,v retrieving revision 1.11 diff -u -p -r1.11 xmltok.c --- lib/libexpat/lib/xmltok.c 29 Dec 2020 16:59:42 -0000 1.11 +++ lib/libexpat/lib/xmltok.c 21 Feb 2022 18:59:07 -0000 @@ -8,6 +8,7 @@ Copyright (c) 1997-2000 Thai Open Source Software Center Ltd Copyright (c) 2000-2017 Expat development team + Copyright (c) 2016-2022 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -94,11 +95,6 @@ << 3) \ + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ & (1u << (((byte)[2]) & 0x1F))) - -#define UTF8_GET_NAMING(pages, p, n) \ - ((n) == 2 \ - ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ - : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0)) /* Detection of invalid UTF-8 sequences is based on Table 3.1B of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ Index: lib/libexpat/lib/xmltok_impl.c =================================================================== RCS file: /cvs/src/lib/libexpat/lib/xmltok_impl.c,v retrieving revision 1.14 diff -u -p -r1.14 xmltok_impl.c --- lib/libexpat/lib/xmltok_impl.c 29 Dec 2020 16:59:42 -0000 1.14 +++ lib/libexpat/lib/xmltok_impl.c 21 Feb 2022 18:59:07 -0000 @@ -8,6 +8,7 @@ Copyright (c) 1997-2000 Thai Open Source Software Center Ltd Copyright (c) 2000-2017 Expat development team + Copyright (c) 2016-2022 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -61,7 +62,7 @@ case BT_LEAD##n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (! IS_NAME_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -90,7 +91,7 @@ case BT_LEAD##n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ - if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ @@ -1134,6 +1135,10 @@ PREFIX(prologTok)(const ENCODING *enc, c case BT_LEAD##n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ @@ -1262,7 +1267,7 @@ PREFIX(attributeValueTok)(const ENCODING switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) @@ -1331,7 +1336,7 @@ PREFIX(entityValueTok)(const ENCODING *e switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) @@ -1510,7 +1515,7 @@ PREFIX(getAtts)(const ENCODING *enc, con state = inName; \ } # define LEAD_CASE(n) \ - case BT_LEAD##n: \ + case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ START_NAME ptr += (n - MINBPC(enc)); \ break; LEAD_CASE(2) @@ -1722,7 +1727,7 @@ PREFIX(nameLength)(const ENCODING *enc, switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ break; LEAD_CASE(2) LEAD_CASE(3) @@ -1767,7 +1772,7 @@ PREFIX(updatePosition)(const ENCODING *e switch (BYTE_TYPE(enc, ptr)) { # define LEAD_CASE(n) \ case BT_LEAD##n: \ - ptr += n; \ + ptr += n; /* NOTE: The encoding has already been validated. */ \ pos->columnNumber++; \ break; LEAD_CASE(2) Index: lib/libexpat/tests/runtests.c =================================================================== RCS file: /cvs/src/lib/libexpat/tests/runtests.c,v retrieving revision 1.10.2.1 diff -u -p -r1.10.2.1 runtests.c --- lib/libexpat/tests/runtests.c 1 Feb 2022 10:53:25 -0000 1.10.2.1 +++ lib/libexpat/tests/runtests.c 21 Feb 2022 18:59:08 -0000 @@ -2677,6 +2677,82 @@ START_TEST(test_dtd_elements) { } END_TEST +static void XMLCALL +element_decl_check_model(void *userData, const XML_Char *name, + XML_Content *model) { + UNUSED_P(userData); + uint32_t errorFlags = 0; + + /* Expected model array structure is this: + * [0] (type 6, quant 0) + * [1] (type 5, quant 0) + * [3] (type 4, quant 0, name "bar") + * [4] (type 4, quant 0, name "foo") + * [5] (type 4, quant 3, name "xyz") + * [2] (type 4, quant 2, name "zebra") + */ + errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); + errorFlags |= ((model != NULL) ? 0 : (1u << 1)); + + errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); + errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); + errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); + errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); + errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); + + errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); + errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); + errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); + errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); + errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); + + errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); + errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); + errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); + errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); + errorFlags |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); + + errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); + errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); + errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); + errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); + errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); + + errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); + errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); + errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); + errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); + errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); + + errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); + errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); + errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); + errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); + errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); + + XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); + XML_FreeContentModel(g_parser, model); +} + +START_TEST(test_dtd_elements_nesting) { + // Payload inspired by a test in Perl's XML::Parser + const char *text = "\n" + "]>\n" + ""; + + XML_SetUserData(g_parser, (void *)(uintptr_t)-1); + + XML_SetElementDeclHandler(g_parser, element_decl_check_model); + if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + + if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) + fail("Element declaration model regression detected"); +} +END_TEST + /* Test foreign DTD handling */ START_TEST(test_set_foreign_dtd) { const char *text1 = "\n"; @@ -6011,6 +6087,105 @@ START_TEST(test_utf8_in_cdata_section_2) } END_TEST +START_TEST(test_utf8_in_start_tags) { + struct test_case { + bool goodName; + bool goodNameStart; + const char *tagName; + }; + + // The idea with the tests below is this: + // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences + // go to isNever and are hence not a concern. + // + // We start with a character that is a valid name character + // (or even name-start character, see XML 1.0r4 spec) and then we flip + // single bits at places where (1) the result leaves the UTF-8 encoding space + // and (2) we stay in the same n-byte sequence family. + // + // The flipped bits are highlighted in angle brackets in comments, + // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped + // the most significant bit to 1 to leave UTF-8 encoding space. + struct test_case cases[] = { + // 1-byte UTF-8: [0xxx xxxx] + {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' + {false, false, "\xBA"}, // [<1>011 1010] + {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' + {false, false, "\xB9"}, // [<1>011 1001] + + // 2-byte UTF-8: [110x xxxx] [10xx xxxx] + {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = + // Arabic small waw U+06E5 + {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] + {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] + {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] + {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = + // combining char U+0301 + {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] + {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] + {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] + + // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] + {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = + // Devanagari Letter A U+0905 + {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] + {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] + {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] + {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] + {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] + {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = + // combining char U+0901 + {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] + {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] + {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] + {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] + {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] + }; + const bool atNameStart[] = {true, false}; + + size_t i = 0; + char doc[1024]; + size_t failCount = 0; + + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + size_t j = 0; + for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { + const bool expectedSuccess + = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; + sprintf(doc, "<%s%s>