diff --git a/Doc/whatsnew/3.16.rst b/Doc/whatsnew/3.16.rst
index a055113dec0494c..40f0e62d10501af 100644
--- a/Doc/whatsnew/3.16.rst
+++ b/Doc/whatsnew/3.16.rst
@@ -141,6 +141,14 @@ module_name
* TODO
+html
+----
+
+* :func:`html.escape` and :func:`html.unescape` are now implemented in C, with
+ the pure-Python versions kept as a fallback. This also speeds up
+ :class:`html.parser.HTMLParser`, which relies on :func:`html.unescape`.
+ (Contributed by Bernát Gábor in :gh:`151024`.)
+
Removed
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 99a1ffb8ad5229b..67d81fee54e027f 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -2012,6 +2012,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(qualname));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(query));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(queuetype));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(quote));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(quotetabs));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(raw));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(read));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index d5818402a508cb9..7830c55675cb1f5 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -735,6 +735,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(qualname)
STRUCT_FOR_ID(query)
STRUCT_FOR_ID(queuetype)
+ STRUCT_FOR_ID(quote)
STRUCT_FOR_ID(quotetabs)
STRUCT_FOR_ID(raw)
STRUCT_FOR_ID(read)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 8227f3fa9eedcf5..33acde2ed195c67 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -2010,6 +2010,7 @@ extern "C" {
INIT_ID(qualname), \
INIT_ID(query), \
INIT_ID(queuetype), \
+ INIT_ID(quote), \
INIT_ID(quotetabs), \
INIT_ID(raw), \
INIT_ID(read), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index cb731e9a6888781..3a7a3c316d9085c 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -2720,6 +2720,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(quote);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(quotetabs);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/html/__init__.py b/Lib/html/__init__.py
index 1543460ca33b0ae..c83ec8059037cac 100644
--- a/Lib/html/__init__.py
+++ b/Lib/html/__init__.py
@@ -130,3 +130,11 @@ def unescape(s):
if '&' not in s:
return s
return _charref.sub(_replace_charref, s)
+
+
+try:
+ # Prefer the C accelerator; fall back to the pure-Python versions above on
+ # implementations that do not ship it (see PEP 399).
+ from _html import escape, unescape
+except ImportError:
+ pass
diff --git a/Lib/test/test_html.py b/Lib/test/test_html.py
index 839e0a47a8499cc..1a7147579c57090 100644
--- a/Lib/test/test_html.py
+++ b/Lib/test/test_html.py
@@ -2,29 +2,81 @@
Tests for the html module functions.
"""
-import html
import unittest
+from test.support import import_helper
+py_html = import_helper.import_fresh_module('html', blocked=['_html'])
+c_html = import_helper.import_fresh_module('html', fresh=['_html'])
+
+
+class HtmlTestsMixin:
+ # Subclasses set ``html`` to the pure-Python or C-accelerated module.
+ html = None
-class HtmlTests(unittest.TestCase):
def test_escape(self):
+ escape = self.html.escape
self.assertEqual(
- html.escape('\'\''),
+ escape('\'\''),
''<script>"&foo;"</script>'')
self.assertEqual(
- html.escape('\'\'', False),
+ escape('\'\'', False),
'\'<script>"&foo;"</script>\'')
+ def test_escape_quote_flag(self):
+ escape = self.html.escape
+ self.assertEqual(escape('"\'', quote=True), '"'')
+ self.assertEqual(escape('"\''), '"'')
+ self.assertEqual(escape('"\'', quote=False), '"\'')
+ self.assertEqual(escape('"\'', False), '"\'')
+
+ def test_escape_no_specials_returned_unchanged(self):
+ for s in ['', 'a', 'plain text', 'x' * 100, 'caf\xe9 r\xe9sum\xe9',
+ '☃ snowman', '\U0001F600 emoji']:
+ self.assertEqual(self.html.escape(s), s)
+
+ def test_escape_specials_at_every_offset(self):
+ # Exercise the word-at-a-time (SWAR) scan boundaries and tail loop by
+ # placing each special at every offset of a run crossing 8-byte words.
+ escape = self.html.escape
+ specials = {'&': '&', '<': '<', '>': '>',
+ '"': '"', "'": '''}
+ for ch, rep in specials.items():
+ for pad in range(0, 20):
+ s = 'a' * pad + ch + 'b' * pad
+ self.assertEqual(escape(s), 'a' * pad + rep + 'b' * pad)
+
+ def test_escape_adjacent_specials(self):
+ self.assertEqual(self.html.escape('&<>"\'' * 5),
+ '&<>"'' * 5)
+
+ def test_escape_multiple_kinds(self):
+ escape = self.html.escape
+ # 2-byte (UCS-2) and 4-byte (UCS-4) strings still escape ASCII specials.
+ self.assertEqual(escape('☃ & '),
+ '☃ <b> & </b>')
+ self.assertEqual(escape('\U0001F600<&>"\''),
+ '\U0001F600<&>"'')
+ # Latin-1 high bytes must not be matched by the byte-wise scan.
+ self.assertEqual(escape('\xe9\xff & \xe9'), '\xe9\xff & \xe9')
+
+ def test_escape_str_subclass_returns_true_str(self):
+ class S(str):
+ pass
+ for s in ['no specials', 'a & b']:
+ result = self.html.escape(S(s))
+ self.assertEqual(result, self.html.escape(s))
+ self.assertIs(type(result), str)
+
def test_unescape(self):
numeric_formats = ['%d', '%d;', '%x', '%x;']
errmsg = 'unescape(%r) should have returned %r'
def check(text, expected):
- self.assertEqual(html.unescape(text), expected,
+ self.assertEqual(self.html.unescape(text), expected,
msg=errmsg % (text, expected))
def check_num(num, expected):
for format in numeric_formats:
text = format % num
- self.assertEqual(html.unescape(text), expected,
+ self.assertEqual(self.html.unescape(text), expected,
msg=errmsg % (text, expected))
# check text with no character references
check('no character references', 'no character references')
@@ -42,25 +94,25 @@ def check_num(num, expected):
'%x', '%06x', '%x;', '%06x;',
'%X', '%06X', '%x;', '%06x;']
for num, char in zip([65, 97, 34, 38, 0x2603, 0x101234],
- ['A', 'a', '"', '&', '\u2603', '\U00101234']):
+ ['A', 'a', '"', '&', '☃', '\U00101234']):
for s in formats:
check(s % num, char)
for end in [' ', 'X']:
check((s+end) % num, char+end)
# check invalid code points
for cp in [0xD800, 0xDB00, 0xDC00, 0xDFFF, 0x110000]:
- check_num(cp, '\uFFFD')
+ check_num(cp, '�')
# check more invalid code points
for cp in [0x1, 0xb, 0xe, 0x7f, 0xfffe, 0xffff, 0x10fffe, 0x10ffff]:
check_num(cp, '')
# check invalid numbers
- for num, ch in zip([0x0d, 0x80, 0x95, 0x9d], '\r\u20ac\u2022\x9d'):
+ for num, ch in zip([0x0d, 0x80, 0x95, 0x9d], '\rۥ\x9d'):
check_num(num, ch)
# check small numbers
- check_num(0, '\uFFFD')
+ check_num(0, '�')
check_num(9, '\t')
# check a big number
- check_num(1000000000000000000, '\uFFFD')
+ check_num(1000000000000000000, '�')
# check that multiple trailing semicolons are handled correctly
for e in ['";', '";', '";', '";']:
check(e, '";')
@@ -89,7 +141,7 @@ def check_num(num, expected):
# longest valid name
check('∳', '∳')
# check a charref that maps to two unicode chars
- check('∾̳', '\u223E\u0333')
+ check('∾̳', '∾̳')
check('&acE', '&acE')
# see #12888
check('{ ' * 1050, '{ ' * 1050)
@@ -98,6 +150,38 @@ def check_num(num, expected):
'ÉricÉric&alphacentauriαcentauri')
check('&co;', '&co;')
+ def test_unescape_multiple_kinds(self):
+ unescape = self.html.unescape
+ # references embedded in 2-byte and 4-byte strings
+ self.assertEqual(unescape('☃ & > © x'),
+ '☃ & > \xa9 x')
+ self.assertEqual(unescape('\U0001F600&A∉'),
+ '\U0001F600&A∉')
+
+ def test_unescape_long_text_with_sparse_refs(self):
+ # exercise the bulk substring copy between references
+ unescape = self.html.unescape
+ s = 'x' * 5000 + '&' + 'y' * 5000
+ self.assertEqual(unescape(s), 'x' * 5000 + '&' + 'y' * 5000)
+ self.assertEqual(unescape('a' * 5000), 'a' * 5000)
+
+ def test_unescape_str_subclass(self):
+ class S(str):
+ pass
+ self.assertEqual(self.html.unescape(S('no refs')), 'no refs')
+ self.assertEqual(self.html.unescape(S('a & b')), 'a & b')
+
+
+class PyHtmlTests(HtmlTestsMixin, unittest.TestCase):
+ html = py_html
+
+
+@unittest.skipUnless(
+ c_html is not None and getattr(c_html.escape, '__module__', None) == '_html',
+ 'requires the _html C accelerator')
+class CHtmlTests(HtmlTestsMixin, unittest.TestCase):
+ html = c_html
+
if __name__ == '__main__':
unittest.main()
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 2b34b009fd745a0..d7a709644d6e0d6 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1848,6 +1848,13 @@ regen-limited-abi: all
regen-unicodedata:
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/unicode/makeunicodedata.py
+.PHONY: regen-html
+regen-html:
+ # Regenerate Modules/html_entities.h from the html module data
+ # using Tools/build/generate_html_entities.py
+ $(PYTHON_FOR_REGEN) $(srcdir)/Tools/build/generate_html_entities.py \
+ $(srcdir)/Modules/html_entities.h
+
############################################################################
# Regenerate all generated files
@@ -1860,7 +1867,7 @@ regen-all: regen-cases regen-slots \
regen-test-levenshtein regen-global-objects
@echo
@echo "Note: make regen-stdlib-module-names, make regen-limited-abi, "
- @echo "make regen-configure, make regen-sbom, and make regen-unicodedata should be run manually"
+ @echo "make regen-configure, make regen-sbom, make regen-html, and make regen-unicodedata should be run manually"
############################################################################
# Special rules for object files
@@ -3422,6 +3429,7 @@ MODULE_DEPS_SHARED=@MODULE_DEPS_SHARED@
MODULE__CURSES_DEPS=$(srcdir)/Include/py_curses.h
MODULE__CURSES_PANEL_DEPS=$(srcdir)/Include/py_curses.h
MODULE__DATETIME_DEPS=$(srcdir)/Include/datetime.h
+MODULE__HTML_DEPS=$(srcdir)/Modules/html_entities.h
MODULE_CMATH_DEPS=$(srcdir)/Modules/_math.h
MODULE_MATH_DEPS=$(srcdir)/Modules/_math.h
MODULE_PYEXPAT_DEPS=@LIBEXPAT_INTERNAL@
diff --git a/Misc/NEWS.d/next/Library/2026-06-06-15-31-29.gh-issue-151024.pzZAnr.rst b/Misc/NEWS.d/next/Library/2026-06-06-15-31-29.gh-issue-151024.pzZAnr.rst
new file mode 100644
index 000000000000000..c122c46c09c9c8f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-06-15-31-29.gh-issue-151024.pzZAnr.rst
@@ -0,0 +1,3 @@
+Add a C accelerator for :func:`html.escape` and :func:`html.unescape`, with the
+pure-Python implementations kept as a fallback. This also speeds up
+:class:`html.parser.HTMLParser`, which uses :func:`html.unescape`.
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index c3dd47a5e40a675..73d7040e221d985 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -35,6 +35,7 @@
@MODULE__BISECT_TRUE@_bisect _bisectmodule.c
@MODULE__CSV_TRUE@_csv _csv.c
@MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c
+@MODULE__HTML_TRUE@_html _htmlmodule.c
@MODULE__JSON_TRUE@_json _json.c
@MODULE__LSPROF_TRUE@_lsprof _lsprof.c rotatingtree.c
@MODULE__MATH_INTEGER_TRUE@_math_integer mathintegermodule.c
diff --git a/Modules/_htmlmodule.c b/Modules/_htmlmodule.c
new file mode 100644
index 000000000000000..e771e5e3dc7eade
--- /dev/null
+++ b/Modules/_htmlmodule.c
@@ -0,0 +1,522 @@
+/* C accelerator for the html module (html.escape and html.unescape).
+
+ escape() scans 1-byte strings word-at-a-time (SWAR) to skip runs with no
+ special character eight bytes at a time, using the same broadcast/haszero
+ masks as Objects/unicodeobject.c, and returns the input unchanged when there
+ is nothing to escape. unescape() replaces the regex + Python callback with a
+ single C pass: it bulk-copies the text between references and binary-searches
+ the generated HTML5 named-reference and numeric-charref tables.
+
+ The module has no mutable state: inputs are immutable str objects and the
+ lookup tables are read-only, so it supports free-threading and a
+ per-interpreter GIL. */
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+#include
+#include
+
+#include "html_entities.h"
+
+#include "clinic/_htmlmodule.c.h"
+
+/*[clinic input]
+module _html
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=95e66f9a73b6c8ba]*/
+
+#define SWAR_ONES 0x0101010101010101ULL
+#define SWAR_HIGHS 0x8080808080808080ULL
+
+static inline uint64_t
+swar_haszero(uint64_t v)
+{
+ return (v - SWAR_ONES) & ~v & SWAR_HIGHS;
+}
+
+static inline uint64_t
+swar_hasbyte(uint64_t w, uint8_t c)
+{
+ return swar_haszero(w ^ (SWAR_ONES * c));
+}
+
+static inline uint64_t
+swar_specials(uint64_t w, int quote)
+{
+ uint64_t m = swar_hasbyte(w, '&') | swar_hasbyte(w, '<') | swar_hasbyte(w, '>');
+ if (quote) {
+ m |= swar_hasbyte(w, '"') | swar_hasbyte(w, '\'');
+ }
+ return m;
+}
+
+static inline Py_ssize_t
+escape_extra(Py_UCS4 ch, int quote)
+{
+ switch (ch) {
+ case '&': return 4; /* "&" */
+ case '<': case '>': return 3; /* "<" */
+ case '"': return quote ? 5 : 0; /* """ */
+ case '\'': return quote ? 5 : 0; /* "'" */
+ default: return 0;
+ }
+}
+
+static inline Py_ssize_t
+write_escaped(int kind, void *data, Py_ssize_t o, Py_UCS4 ch, int quote)
+{
+ const char *rep = NULL;
+ int rlen = 0;
+ switch (ch) {
+ case '&': rep = "&"; rlen = 5; break;
+ case '<': rep = "<"; rlen = 4; break;
+ case '>': rep = ">"; rlen = 4; break;
+ case '"': if (quote) { rep = """; rlen = 6; } break;
+ case '\'': if (quote) { rep = "'"; rlen = 6; } break;
+ default: break;
+ }
+ if (rep != NULL) {
+ for (int k = 0; k < rlen; k++) {
+ PyUnicode_WRITE(kind, data, o + k, (Py_UCS4)rep[k]);
+ }
+ return rlen;
+ }
+ PyUnicode_WRITE(kind, data, o, ch);
+ return 1;
+}
+
+/*[clinic input]
+_html.escape
+
+ s: unicode
+ quote: bool = True
+
+Replace special characters "&", "<" and ">" to HTML-safe sequences.
+
+If the optional flag quote is true (the default), the quotation mark
+characters, both double quote (") and single quote ('), are also
+translated.
+[clinic start generated code]*/
+
+static PyObject *
+_html_escape_impl(PyObject *module, PyObject *s, int quote)
+/*[clinic end generated code: output=7e6916b020ab13bd input=04fd630fd061e3c5]*/
+{
+ int kind = PyUnicode_KIND(s);
+ Py_ssize_t n = PyUnicode_GET_LENGTH(s);
+ const void *data = PyUnicode_DATA(s);
+
+ Py_ssize_t extra = 0;
+ if (kind == PyUnicode_1BYTE_KIND) {
+ const uint8_t *p = (const uint8_t *)data;
+ Py_ssize_t i = 0;
+ while (i + 8 <= n) {
+ uint64_t w;
+ memcpy(&w, p + i, 8);
+ if (swar_specials(w, quote) == 0) {
+ i += 8;
+ continue;
+ }
+ for (int j = 0; j < 8; j++) {
+ extra += escape_extra(p[i + j], quote);
+ }
+ i += 8;
+ }
+ for (; i < n; i++) {
+ extra += escape_extra(p[i], quote);
+ }
+ }
+ else {
+ for (Py_ssize_t i = 0; i < n; i++) {
+ extra += escape_extra(PyUnicode_READ(kind, data, i), quote);
+ }
+ }
+
+ if (extra == 0) {
+ /* Nothing to escape. Match the pure-Python escape(), which returns a
+ true str (str.replace() normalises subclasses); for an exact str this
+ just returns a new reference to the input. */
+ return PyUnicode_FromObject(s);
+ }
+
+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(s);
+ PyObject *out = PyUnicode_New(n + extra, maxchar);
+ if (out == NULL) {
+ return NULL;
+ }
+ int okind = PyUnicode_KIND(out);
+ void *odata = PyUnicode_DATA(out);
+ Py_ssize_t o = 0;
+
+ if (kind == PyUnicode_1BYTE_KIND && okind == PyUnicode_1BYTE_KIND) {
+ const uint8_t *p = (const uint8_t *)data;
+ uint8_t *q = (uint8_t *)odata;
+ Py_ssize_t i = 0;
+ while (i + 8 <= n) {
+ uint64_t w;
+ memcpy(&w, p + i, 8);
+ if (swar_specials(w, quote) == 0) {
+ memcpy(q + o, p + i, 8);
+ o += 8;
+ i += 8;
+ continue;
+ }
+ for (int j = 0; j < 8; j++) {
+ o += write_escaped(okind, odata, o, p[i + j], quote);
+ }
+ i += 8;
+ }
+ for (; i < n; i++) {
+ o += write_escaped(okind, odata, o, p[i], quote);
+ }
+ }
+ else {
+ for (Py_ssize_t i = 0; i < n; i++) {
+ o += write_escaped(okind, odata, o, PyUnicode_READ(kind, data, i), quote);
+ }
+ }
+ return out;
+}
+
+static int
+cmp_name(const char *a, Py_ssize_t alen, const char *b, unsigned blen)
+{
+ Py_ssize_t m = alen < (Py_ssize_t)blen ? alen : (Py_ssize_t)blen;
+ int c = memcmp(a, b, (size_t)m);
+ if (c != 0) {
+ return c < 0 ? -1 : 1;
+ }
+ if (alen == (Py_ssize_t)blen) {
+ return 0;
+ }
+ return alen < (Py_ssize_t)blen ? -1 : 1;
+}
+
+static const html5_entity *
+find_entity(const char *name, Py_ssize_t len)
+{
+ int lo = 0, hi = html5_count - 1;
+ while (lo <= hi) {
+ int mid = (lo + hi) >> 1;
+ const html5_entity *e = &html5_entities[mid];
+ int c = cmp_name(name, len, e->name, e->name_len);
+ if (c == 0) {
+ return e;
+ }
+ if (c < 0) {
+ hi = mid - 1;
+ }
+ else {
+ lo = mid + 1;
+ }
+ }
+ return NULL;
+}
+
+static int
+find_invalid_charref(Py_UCS4 num, Py_UCS4 *cp)
+{
+ int lo = 0, hi = invalid_charref_count - 1;
+ while (lo <= hi) {
+ int mid = (lo + hi) >> 1;
+ Py_UCS4 v = invalid_charrefs[mid].num;
+ if (v == num) {
+ *cp = invalid_charrefs[mid].cp;
+ return 1;
+ }
+ if (num < v) {
+ hi = mid - 1;
+ }
+ else {
+ lo = mid + 1;
+ }
+ }
+ return 0;
+}
+
+static int
+is_invalid_codepoint(Py_UCS4 num)
+{
+ int lo = 0, hi = invalid_codepoint_count - 1;
+ while (lo <= hi) {
+ int mid = (lo + hi) >> 1;
+ Py_UCS4 v = invalid_codepoints[mid];
+ if (v == num) {
+ return 1;
+ }
+ if (num < v) {
+ hi = mid - 1;
+ }
+ else {
+ lo = mid + 1;
+ }
+ }
+ return 0;
+}
+
+static inline int
+is_name_char(Py_UCS4 c)
+{
+ /* [^\t\n\f <] from the _charref regex in Lib/html/__init__.py. */
+ switch (c) {
+ case '\t': case '\n': case '\x0c': case ' ':
+ case '<': case '&': case '#': case ';':
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static inline int
+hex_value(Py_UCS4 c)
+{
+ if (c >= '0' && c <= '9') return (int)(c - '0');
+ if (c >= 'a' && c <= 'f') return (int)(c - 'a') + 10;
+ if (c >= 'A' && c <= 'F') return (int)(c - 'A') + 10;
+ return -1;
+}
+
+/* Parse a character reference that starts with '&' at index i. On a match,
+ write the replacement to the writer, set *consumed to the number of input
+ characters used (including '&'), and return 1. Return 0 when no reference
+ matches (the caller emits '&' literally) and -1 on a writer error. */
+static int
+parse_charref(int kind, const void *data, Py_ssize_t n, Py_ssize_t i,
+ PyUnicodeWriter *writer, Py_ssize_t *consumed)
+{
+ Py_ssize_t p = i + 1;
+ if (p >= n) {
+ return 0;
+ }
+ Py_UCS4 c = PyUnicode_READ(kind, data, p);
+
+ if (c == '#') {
+ Py_ssize_t d = p + 1;
+ int hex = 0;
+ if (d < n) {
+ Py_UCS4 x = PyUnicode_READ(kind, data, d);
+ if (x == 'x' || x == 'X') {
+ hex = 1;
+ d++;
+ }
+ }
+ Py_UCS4 num = 0;
+ int overflow = 0;
+ Py_ssize_t start = d;
+ while (d < n) {
+ Py_UCS4 x = PyUnicode_READ(kind, data, d);
+ if (hex) {
+ int v = hex_value(x);
+ if (v < 0) {
+ break;
+ }
+ num = num * 16 + (Py_UCS4)v;
+ }
+ else {
+ if (x < '0' || x > '9') {
+ break;
+ }
+ num = num * 10 + (x - '0');
+ }
+ if (num > 0x110000) {
+ num = 0x110000; /* cap to trigger the > 0x10FFFF branch below */
+ overflow = 1;
+ }
+ d++;
+ }
+ if (d == start) {
+ return 0; /* no digits: the regex does not match */
+ }
+ if (d < n && PyUnicode_READ(kind, data, d) == ';') {
+ d++; /* optional trailing ';' */
+ }
+
+ Py_UCS4 repl;
+ if (!overflow && find_invalid_charref(num, &repl)) {
+ if (PyUnicodeWriter_WriteChar(writer, repl) < 0) {
+ return -1;
+ }
+ }
+ else if ((num >= 0xD800 && num <= 0xDFFF) || num > 0x10FFFF) {
+ if (PyUnicodeWriter_WriteChar(writer, 0xFFFD) < 0) {
+ return -1;
+ }
+ }
+ else if (is_invalid_codepoint(num)) {
+ /* maps to the empty string */
+ }
+ else if (PyUnicodeWriter_WriteChar(writer, num) < 0) {
+ return -1;
+ }
+ *consumed = d - i;
+ return 1;
+ }
+
+ if (!is_name_char(c)) {
+ return 0; /* e.g. "&;", "& ", "&&": the regex does not match */
+ }
+
+ /* Named reference: read up to 32 name characters, then an optional ';'. */
+ Py_UCS4 ucs[HTML5_MAX_NAME_LEN];
+ char ascii[HTML5_MAX_NAME_LEN + 1];
+ int nlen = 0;
+ Py_ssize_t d = p;
+ while (d < n && nlen < HTML5_MAX_NAME_LEN) {
+ Py_UCS4 x = PyUnicode_READ(kind, data, d);
+ if (!is_name_char(x)) {
+ break;
+ }
+ ucs[nlen] = x;
+ ascii[nlen] = (x < 128) ? (char)x : (char)0x01; /* 0x01 never matches */
+ nlen++;
+ d++;
+ }
+ int semi = 0;
+ if (d < n && PyUnicode_READ(kind, data, d) == ';') {
+ ascii[nlen] = ';';
+ semi = 1;
+ d++;
+ }
+ int toklen = nlen + semi;
+
+ /* Whole token first, then shorter prefixes (HTML5 longest match). */
+ const html5_entity *e = find_entity(ascii, toklen);
+ int matchlen = toklen;
+ if (e == NULL) {
+ for (int x = toklen - 1; x >= 2; x--) {
+ e = find_entity(ascii, x);
+ if (e != NULL) {
+ matchlen = x;
+ break;
+ }
+ }
+ }
+
+ if (e == NULL) {
+ /* No match: the callback returns '&' + group, i.e. unchanged. */
+ if (PyUnicodeWriter_WriteChar(writer, '&') < 0) {
+ return -1;
+ }
+ for (int k = 0; k < nlen; k++) {
+ if (PyUnicodeWriter_WriteChar(writer, ucs[k]) < 0) {
+ return -1;
+ }
+ }
+ if (semi && PyUnicodeWriter_WriteChar(writer, ';') < 0) {
+ return -1;
+ }
+ *consumed = d - i;
+ return 1;
+ }
+
+ if (PyUnicodeWriter_WriteChar(writer, e->cp0) < 0) {
+ return -1;
+ }
+ if (e->cp1 && PyUnicodeWriter_WriteChar(writer, e->cp1) < 0) {
+ return -1;
+ }
+ /* Emit the unmatched tail of the token verbatim. */
+ for (int k = matchlen; k < toklen; k++) {
+ Py_UCS4 ch = (k < nlen) ? ucs[k] : (Py_UCS4)';';
+ if (PyUnicodeWriter_WriteChar(writer, ch) < 0) {
+ return -1;
+ }
+ }
+ *consumed = d - i;
+ return 1;
+}
+
+/*[clinic input]
+_html.unescape
+
+ s: unicode
+ /
+
+Convert named and numeric character references to Unicode characters.
+
+This function uses the rules defined by the HTML 5 standard for both
+valid and invalid character references, and the list of HTML 5 named
+character references defined in html.entities.html5.
+[clinic start generated code]*/
+
+static PyObject *
+_html_unescape_impl(PyObject *module, PyObject *s)
+/*[clinic end generated code: output=36781d63ddc15dd9 input=8a45dd7fcf275d12]*/
+{
+ Py_ssize_t n = PyUnicode_GET_LENGTH(s);
+ int kind = PyUnicode_KIND(s);
+ const void *data = PyUnicode_DATA(s);
+
+ if (PyUnicode_FindChar(s, '&', 0, n, 1) < 0) {
+ return Py_NewRef(s);
+ }
+
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(n);
+ if (writer == NULL) {
+ return NULL;
+ }
+
+ Py_ssize_t i = 0;
+ while (i < n) {
+ /* Bulk-copy the run of non-'&' text, then handle the reference. */
+ Py_ssize_t j = PyUnicode_FindChar(s, '&', i, n, 1);
+ if (j < 0) {
+ if (PyUnicodeWriter_WriteSubstring(writer, s, i, n) < 0) {
+ goto error;
+ }
+ break;
+ }
+ if (j > i && PyUnicodeWriter_WriteSubstring(writer, s, i, j) < 0) {
+ goto error;
+ }
+ Py_ssize_t consumed;
+ int r = parse_charref(kind, data, n, j, writer, &consumed);
+ if (r < 0) {
+ goto error;
+ }
+ if (r == 0) {
+ if (PyUnicodeWriter_WriteChar(writer, '&') < 0) {
+ goto error;
+ }
+ i = j + 1;
+ }
+ else {
+ i = j + consumed;
+ }
+ }
+ return PyUnicodeWriter_Finish(writer);
+
+error:
+ PyUnicodeWriter_Discard(writer);
+ return NULL;
+}
+
+static PyMethodDef html_methods[] = {
+ _HTML_ESCAPE_METHODDEF
+ _HTML_UNESCAPE_METHODDEF
+ {NULL, NULL}
+};
+
+static struct PyModuleDef_Slot html_slots[] = {
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+ {0, NULL},
+};
+
+static struct PyModuleDef htmlmodule = {
+ PyModuleDef_HEAD_INIT,
+ .m_name = "_html",
+ .m_doc = "C accelerator for the html module.",
+ .m_size = 0,
+ .m_methods = html_methods,
+ .m_slots = html_slots,
+};
+
+PyMODINIT_FUNC
+PyInit__html(void)
+{
+ return PyModuleDef_Init(&htmlmodule);
+}
diff --git a/Modules/clinic/_htmlmodule.c.h b/Modules/clinic/_htmlmodule.c.h
new file mode 100644
index 000000000000000..10768c690d2181e
--- /dev/null
+++ b/Modules/clinic/_htmlmodule.c.h
@@ -0,0 +1,119 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_html_escape__doc__,
+"escape($module, /, s, quote=True)\n"
+"--\n"
+"\n"
+"Replace special characters \"&\", \"<\" and \">\" to HTML-safe sequences.\n"
+"\n"
+"If the optional flag quote is true (the default), the quotation mark\n"
+"characters, both double quote (\") and single quote (\'), are also\n"
+"translated.");
+
+#define _HTML_ESCAPE_METHODDEF \
+ {"escape", _PyCFunction_CAST(_html_escape), METH_FASTCALL|METH_KEYWORDS, _html_escape__doc__},
+
+static PyObject *
+_html_escape_impl(PyObject *module, PyObject *s, int quote);
+
+static PyObject *
+_html_escape(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { _Py_LATIN1_CHR('s'), &_Py_ID(quote), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"s", "quote", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "escape",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ PyObject *s;
+ int quote = 1;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("escape", "argument 's'", "str", args[0]);
+ goto exit;
+ }
+ s = args[0];
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ quote = PyObject_IsTrue(args[1]);
+ if (quote < 0) {
+ goto exit;
+ }
+skip_optional_pos:
+ return_value = _html_escape_impl(module, s, quote);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_html_unescape__doc__,
+"unescape($module, s, /)\n"
+"--\n"
+"\n"
+"Convert named and numeric character references to Unicode characters.\n"
+"\n"
+"This function uses the rules defined by the HTML 5 standard for both\n"
+"valid and invalid character references, and the list of HTML 5 named\n"
+"character references defined in html.entities.html5.");
+
+#define _HTML_UNESCAPE_METHODDEF \
+ {"unescape", (PyCFunction)_html_unescape, METH_O, _html_unescape__doc__},
+
+static PyObject *
+_html_unescape_impl(PyObject *module, PyObject *s);
+
+static PyObject *
+_html_unescape(PyObject *module, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ PyObject *s;
+
+ if (!PyUnicode_Check(arg)) {
+ _PyArg_BadArgument("unescape", "argument", "str", arg);
+ goto exit;
+ }
+ s = arg;
+ return_value = _html_unescape_impl(module, s);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=3173663201cb635a input=a9049054013a1b77]*/
diff --git a/Modules/html_entities.h b/Modules/html_entities.h
new file mode 100644
index 000000000000000..0900a47b0605183
--- /dev/null
+++ b/Modules/html_entities.h
@@ -0,0 +1,2301 @@
+/* Auto-generated by Tools/build/generate_html_entities.py */
+/* HTML5 named references and numeric-charref correction tables. */
+
+typedef struct {
+ const char *name;
+ unsigned char name_len;
+ Py_UCS4 cp0;
+ Py_UCS4 cp1; /* second code point, or 0 if the value is one char */
+} html5_entity;
+
+#define HTML5_MAX_NAME_LEN 32
+static const int html5_count = 2231;
+static const html5_entity html5_entities[] = {
+ {"AElig", 5u, 198u, 0u},
+ {"AElig;", 6u, 198u, 0u},
+ {"AMP", 3u, 38u, 0u},
+ {"AMP;", 4u, 38u, 0u},
+ {"Aacute", 6u, 193u, 0u},
+ {"Aacute;", 7u, 193u, 0u},
+ {"Abreve;", 7u, 258u, 0u},
+ {"Acirc", 5u, 194u, 0u},
+ {"Acirc;", 6u, 194u, 0u},
+ {"Acy;", 4u, 1040u, 0u},
+ {"Afr;", 4u, 120068u, 0u},
+ {"Agrave", 6u, 192u, 0u},
+ {"Agrave;", 7u, 192u, 0u},
+ {"Alpha;", 6u, 913u, 0u},
+ {"Amacr;", 6u, 256u, 0u},
+ {"And;", 4u, 10835u, 0u},
+ {"Aogon;", 6u, 260u, 0u},
+ {"Aopf;", 5u, 120120u, 0u},
+ {"ApplyFunction;", 14u, 8289u, 0u},
+ {"Aring", 5u, 197u, 0u},
+ {"Aring;", 6u, 197u, 0u},
+ {"Ascr;", 5u, 119964u, 0u},
+ {"Assign;", 7u, 8788u, 0u},
+ {"Atilde", 6u, 195u, 0u},
+ {"Atilde;", 7u, 195u, 0u},
+ {"Auml", 4u, 196u, 0u},
+ {"Auml;", 5u, 196u, 0u},
+ {"Backslash;", 10u, 8726u, 0u},
+ {"Barv;", 5u, 10983u, 0u},
+ {"Barwed;", 7u, 8966u, 0u},
+ {"Bcy;", 4u, 1041u, 0u},
+ {"Because;", 8u, 8757u, 0u},
+ {"Bernoullis;", 11u, 8492u, 0u},
+ {"Beta;", 5u, 914u, 0u},
+ {"Bfr;", 4u, 120069u, 0u},
+ {"Bopf;", 5u, 120121u, 0u},
+ {"Breve;", 6u, 728u, 0u},
+ {"Bscr;", 5u, 8492u, 0u},
+ {"Bumpeq;", 7u, 8782u, 0u},
+ {"CHcy;", 5u, 1063u, 0u},
+ {"COPY", 4u, 169u, 0u},
+ {"COPY;", 5u, 169u, 0u},
+ {"Cacute;", 7u, 262u, 0u},
+ {"Cap;", 4u, 8914u, 0u},
+ {"CapitalDifferentialD;", 21u, 8517u, 0u},
+ {"Cayleys;", 8u, 8493u, 0u},
+ {"Ccaron;", 7u, 268u, 0u},
+ {"Ccedil", 6u, 199u, 0u},
+ {"Ccedil;", 7u, 199u, 0u},
+ {"Ccirc;", 6u, 264u, 0u},
+ {"Cconint;", 8u, 8752u, 0u},
+ {"Cdot;", 5u, 266u, 0u},
+ {"Cedilla;", 8u, 184u, 0u},
+ {"CenterDot;", 10u, 183u, 0u},
+ {"Cfr;", 4u, 8493u, 0u},
+ {"Chi;", 4u, 935u, 0u},
+ {"CircleDot;", 10u, 8857u, 0u},
+ {"CircleMinus;", 12u, 8854u, 0u},
+ {"CirclePlus;", 11u, 8853u, 0u},
+ {"CircleTimes;", 12u, 8855u, 0u},
+ {"ClockwiseContourIntegral;", 25u, 8754u, 0u},
+ {"CloseCurlyDoubleQuote;", 22u, 8221u, 0u},
+ {"CloseCurlyQuote;", 16u, 8217u, 0u},
+ {"Colon;", 6u, 8759u, 0u},
+ {"Colone;", 7u, 10868u, 0u},
+ {"Congruent;", 10u, 8801u, 0u},
+ {"Conint;", 7u, 8751u, 0u},
+ {"ContourIntegral;", 16u, 8750u, 0u},
+ {"Copf;", 5u, 8450u, 0u},
+ {"Coproduct;", 10u, 8720u, 0u},
+ {"CounterClockwiseContourIntegral;", 32u, 8755u, 0u},
+ {"Cross;", 6u, 10799u, 0u},
+ {"Cscr;", 5u, 119966u, 0u},
+ {"Cup;", 4u, 8915u, 0u},
+ {"CupCap;", 7u, 8781u, 0u},
+ {"DD;", 3u, 8517u, 0u},
+ {"DDotrahd;", 9u, 10513u, 0u},
+ {"DJcy;", 5u, 1026u, 0u},
+ {"DScy;", 5u, 1029u, 0u},
+ {"DZcy;", 5u, 1039u, 0u},
+ {"Dagger;", 7u, 8225u, 0u},
+ {"Darr;", 5u, 8609u, 0u},
+ {"Dashv;", 6u, 10980u, 0u},
+ {"Dcaron;", 7u, 270u, 0u},
+ {"Dcy;", 4u, 1044u, 0u},
+ {"Del;", 4u, 8711u, 0u},
+ {"Delta;", 6u, 916u, 0u},
+ {"Dfr;", 4u, 120071u, 0u},
+ {"DiacriticalAcute;", 17u, 180u, 0u},
+ {"DiacriticalDot;", 15u, 729u, 0u},
+ {"DiacriticalDoubleAcute;", 23u, 733u, 0u},
+ {"DiacriticalGrave;", 17u, 96u, 0u},
+ {"DiacriticalTilde;", 17u, 732u, 0u},
+ {"Diamond;", 8u, 8900u, 0u},
+ {"DifferentialD;", 14u, 8518u, 0u},
+ {"Dopf;", 5u, 120123u, 0u},
+ {"Dot;", 4u, 168u, 0u},
+ {"DotDot;", 7u, 8412u, 0u},
+ {"DotEqual;", 9u, 8784u, 0u},
+ {"DoubleContourIntegral;", 22u, 8751u, 0u},
+ {"DoubleDot;", 10u, 168u, 0u},
+ {"DoubleDownArrow;", 16u, 8659u, 0u},
+ {"DoubleLeftArrow;", 16u, 8656u, 0u},
+ {"DoubleLeftRightArrow;", 21u, 8660u, 0u},
+ {"DoubleLeftTee;", 14u, 10980u, 0u},
+ {"DoubleLongLeftArrow;", 20u, 10232u, 0u},
+ {"DoubleLongLeftRightArrow;", 25u, 10234u, 0u},
+ {"DoubleLongRightArrow;", 21u, 10233u, 0u},
+ {"DoubleRightArrow;", 17u, 8658u, 0u},
+ {"DoubleRightTee;", 15u, 8872u, 0u},
+ {"DoubleUpArrow;", 14u, 8657u, 0u},
+ {"DoubleUpDownArrow;", 18u, 8661u, 0u},
+ {"DoubleVerticalBar;", 18u, 8741u, 0u},
+ {"DownArrow;", 10u, 8595u, 0u},
+ {"DownArrowBar;", 13u, 10515u, 0u},
+ {"DownArrowUpArrow;", 17u, 8693u, 0u},
+ {"DownBreve;", 10u, 785u, 0u},
+ {"DownLeftRightVector;", 20u, 10576u, 0u},
+ {"DownLeftTeeVector;", 18u, 10590u, 0u},
+ {"DownLeftVector;", 15u, 8637u, 0u},
+ {"DownLeftVectorBar;", 18u, 10582u, 0u},
+ {"DownRightTeeVector;", 19u, 10591u, 0u},
+ {"DownRightVector;", 16u, 8641u, 0u},
+ {"DownRightVectorBar;", 19u, 10583u, 0u},
+ {"DownTee;", 8u, 8868u, 0u},
+ {"DownTeeArrow;", 13u, 8615u, 0u},
+ {"Downarrow;", 10u, 8659u, 0u},
+ {"Dscr;", 5u, 119967u, 0u},
+ {"Dstrok;", 7u, 272u, 0u},
+ {"ENG;", 4u, 330u, 0u},
+ {"ETH", 3u, 208u, 0u},
+ {"ETH;", 4u, 208u, 0u},
+ {"Eacute", 6u, 201u, 0u},
+ {"Eacute;", 7u, 201u, 0u},
+ {"Ecaron;", 7u, 282u, 0u},
+ {"Ecirc", 5u, 202u, 0u},
+ {"Ecirc;", 6u, 202u, 0u},
+ {"Ecy;", 4u, 1069u, 0u},
+ {"Edot;", 5u, 278u, 0u},
+ {"Efr;", 4u, 120072u, 0u},
+ {"Egrave", 6u, 200u, 0u},
+ {"Egrave;", 7u, 200u, 0u},
+ {"Element;", 8u, 8712u, 0u},
+ {"Emacr;", 6u, 274u, 0u},
+ {"EmptySmallSquare;", 17u, 9723u, 0u},
+ {"EmptyVerySmallSquare;", 21u, 9643u, 0u},
+ {"Eogon;", 6u, 280u, 0u},
+ {"Eopf;", 5u, 120124u, 0u},
+ {"Epsilon;", 8u, 917u, 0u},
+ {"Equal;", 6u, 10869u, 0u},
+ {"EqualTilde;", 11u, 8770u, 0u},
+ {"Equilibrium;", 12u, 8652u, 0u},
+ {"Escr;", 5u, 8496u, 0u},
+ {"Esim;", 5u, 10867u, 0u},
+ {"Eta;", 4u, 919u, 0u},
+ {"Euml", 4u, 203u, 0u},
+ {"Euml;", 5u, 203u, 0u},
+ {"Exists;", 7u, 8707u, 0u},
+ {"ExponentialE;", 13u, 8519u, 0u},
+ {"Fcy;", 4u, 1060u, 0u},
+ {"Ffr;", 4u, 120073u, 0u},
+ {"FilledSmallSquare;", 18u, 9724u, 0u},
+ {"FilledVerySmallSquare;", 22u, 9642u, 0u},
+ {"Fopf;", 5u, 120125u, 0u},
+ {"ForAll;", 7u, 8704u, 0u},
+ {"Fouriertrf;", 11u, 8497u, 0u},
+ {"Fscr;", 5u, 8497u, 0u},
+ {"GJcy;", 5u, 1027u, 0u},
+ {"GT", 2u, 62u, 0u},
+ {"GT;", 3u, 62u, 0u},
+ {"Gamma;", 6u, 915u, 0u},
+ {"Gammad;", 7u, 988u, 0u},
+ {"Gbreve;", 7u, 286u, 0u},
+ {"Gcedil;", 7u, 290u, 0u},
+ {"Gcirc;", 6u, 284u, 0u},
+ {"Gcy;", 4u, 1043u, 0u},
+ {"Gdot;", 5u, 288u, 0u},
+ {"Gfr;", 4u, 120074u, 0u},
+ {"Gg;", 3u, 8921u, 0u},
+ {"Gopf;", 5u, 120126u, 0u},
+ {"GreaterEqual;", 13u, 8805u, 0u},
+ {"GreaterEqualLess;", 17u, 8923u, 0u},
+ {"GreaterFullEqual;", 17u, 8807u, 0u},
+ {"GreaterGreater;", 15u, 10914u, 0u},
+ {"GreaterLess;", 12u, 8823u, 0u},
+ {"GreaterSlantEqual;", 18u, 10878u, 0u},
+ {"GreaterTilde;", 13u, 8819u, 0u},
+ {"Gscr;", 5u, 119970u, 0u},
+ {"Gt;", 3u, 8811u, 0u},
+ {"HARDcy;", 7u, 1066u, 0u},
+ {"Hacek;", 6u, 711u, 0u},
+ {"Hat;", 4u, 94u, 0u},
+ {"Hcirc;", 6u, 292u, 0u},
+ {"Hfr;", 4u, 8460u, 0u},
+ {"HilbertSpace;", 13u, 8459u, 0u},
+ {"Hopf;", 5u, 8461u, 0u},
+ {"HorizontalLine;", 15u, 9472u, 0u},
+ {"Hscr;", 5u, 8459u, 0u},
+ {"Hstrok;", 7u, 294u, 0u},
+ {"HumpDownHump;", 13u, 8782u, 0u},
+ {"HumpEqual;", 10u, 8783u, 0u},
+ {"IEcy;", 5u, 1045u, 0u},
+ {"IJlig;", 6u, 306u, 0u},
+ {"IOcy;", 5u, 1025u, 0u},
+ {"Iacute", 6u, 205u, 0u},
+ {"Iacute;", 7u, 205u, 0u},
+ {"Icirc", 5u, 206u, 0u},
+ {"Icirc;", 6u, 206u, 0u},
+ {"Icy;", 4u, 1048u, 0u},
+ {"Idot;", 5u, 304u, 0u},
+ {"Ifr;", 4u, 8465u, 0u},
+ {"Igrave", 6u, 204u, 0u},
+ {"Igrave;", 7u, 204u, 0u},
+ {"Im;", 3u, 8465u, 0u},
+ {"Imacr;", 6u, 298u, 0u},
+ {"ImaginaryI;", 11u, 8520u, 0u},
+ {"Implies;", 8u, 8658u, 0u},
+ {"Int;", 4u, 8748u, 0u},
+ {"Integral;", 9u, 8747u, 0u},
+ {"Intersection;", 13u, 8898u, 0u},
+ {"InvisibleComma;", 15u, 8291u, 0u},
+ {"InvisibleTimes;", 15u, 8290u, 0u},
+ {"Iogon;", 6u, 302u, 0u},
+ {"Iopf;", 5u, 120128u, 0u},
+ {"Iota;", 5u, 921u, 0u},
+ {"Iscr;", 5u, 8464u, 0u},
+ {"Itilde;", 7u, 296u, 0u},
+ {"Iukcy;", 6u, 1030u, 0u},
+ {"Iuml", 4u, 207u, 0u},
+ {"Iuml;", 5u, 207u, 0u},
+ {"Jcirc;", 6u, 308u, 0u},
+ {"Jcy;", 4u, 1049u, 0u},
+ {"Jfr;", 4u, 120077u, 0u},
+ {"Jopf;", 5u, 120129u, 0u},
+ {"Jscr;", 5u, 119973u, 0u},
+ {"Jsercy;", 7u, 1032u, 0u},
+ {"Jukcy;", 6u, 1028u, 0u},
+ {"KHcy;", 5u, 1061u, 0u},
+ {"KJcy;", 5u, 1036u, 0u},
+ {"Kappa;", 6u, 922u, 0u},
+ {"Kcedil;", 7u, 310u, 0u},
+ {"Kcy;", 4u, 1050u, 0u},
+ {"Kfr;", 4u, 120078u, 0u},
+ {"Kopf;", 5u, 120130u, 0u},
+ {"Kscr;", 5u, 119974u, 0u},
+ {"LJcy;", 5u, 1033u, 0u},
+ {"LT", 2u, 60u, 0u},
+ {"LT;", 3u, 60u, 0u},
+ {"Lacute;", 7u, 313u, 0u},
+ {"Lambda;", 7u, 923u, 0u},
+ {"Lang;", 5u, 10218u, 0u},
+ {"Laplacetrf;", 11u, 8466u, 0u},
+ {"Larr;", 5u, 8606u, 0u},
+ {"Lcaron;", 7u, 317u, 0u},
+ {"Lcedil;", 7u, 315u, 0u},
+ {"Lcy;", 4u, 1051u, 0u},
+ {"LeftAngleBracket;", 17u, 10216u, 0u},
+ {"LeftArrow;", 10u, 8592u, 0u},
+ {"LeftArrowBar;", 13u, 8676u, 0u},
+ {"LeftArrowRightArrow;", 20u, 8646u, 0u},
+ {"LeftCeiling;", 12u, 8968u, 0u},
+ {"LeftDoubleBracket;", 18u, 10214u, 0u},
+ {"LeftDownTeeVector;", 18u, 10593u, 0u},
+ {"LeftDownVector;", 15u, 8643u, 0u},
+ {"LeftDownVectorBar;", 18u, 10585u, 0u},
+ {"LeftFloor;", 10u, 8970u, 0u},
+ {"LeftRightArrow;", 15u, 8596u, 0u},
+ {"LeftRightVector;", 16u, 10574u, 0u},
+ {"LeftTee;", 8u, 8867u, 0u},
+ {"LeftTeeArrow;", 13u, 8612u, 0u},
+ {"LeftTeeVector;", 14u, 10586u, 0u},
+ {"LeftTriangle;", 13u, 8882u, 0u},
+ {"LeftTriangleBar;", 16u, 10703u, 0u},
+ {"LeftTriangleEqual;", 18u, 8884u, 0u},
+ {"LeftUpDownVector;", 17u, 10577u, 0u},
+ {"LeftUpTeeVector;", 16u, 10592u, 0u},
+ {"LeftUpVector;", 13u, 8639u, 0u},
+ {"LeftUpVectorBar;", 16u, 10584u, 0u},
+ {"LeftVector;", 11u, 8636u, 0u},
+ {"LeftVectorBar;", 14u, 10578u, 0u},
+ {"Leftarrow;", 10u, 8656u, 0u},
+ {"Leftrightarrow;", 15u, 8660u, 0u},
+ {"LessEqualGreater;", 17u, 8922u, 0u},
+ {"LessFullEqual;", 14u, 8806u, 0u},
+ {"LessGreater;", 12u, 8822u, 0u},
+ {"LessLess;", 9u, 10913u, 0u},
+ {"LessSlantEqual;", 15u, 10877u, 0u},
+ {"LessTilde;", 10u, 8818u, 0u},
+ {"Lfr;", 4u, 120079u, 0u},
+ {"Ll;", 3u, 8920u, 0u},
+ {"Lleftarrow;", 11u, 8666u, 0u},
+ {"Lmidot;", 7u, 319u, 0u},
+ {"LongLeftArrow;", 14u, 10229u, 0u},
+ {"LongLeftRightArrow;", 19u, 10231u, 0u},
+ {"LongRightArrow;", 15u, 10230u, 0u},
+ {"Longleftarrow;", 14u, 10232u, 0u},
+ {"Longleftrightarrow;", 19u, 10234u, 0u},
+ {"Longrightarrow;", 15u, 10233u, 0u},
+ {"Lopf;", 5u, 120131u, 0u},
+ {"LowerLeftArrow;", 15u, 8601u, 0u},
+ {"LowerRightArrow;", 16u, 8600u, 0u},
+ {"Lscr;", 5u, 8466u, 0u},
+ {"Lsh;", 4u, 8624u, 0u},
+ {"Lstrok;", 7u, 321u, 0u},
+ {"Lt;", 3u, 8810u, 0u},
+ {"Map;", 4u, 10501u, 0u},
+ {"Mcy;", 4u, 1052u, 0u},
+ {"MediumSpace;", 12u, 8287u, 0u},
+ {"Mellintrf;", 10u, 8499u, 0u},
+ {"Mfr;", 4u, 120080u, 0u},
+ {"MinusPlus;", 10u, 8723u, 0u},
+ {"Mopf;", 5u, 120132u, 0u},
+ {"Mscr;", 5u, 8499u, 0u},
+ {"Mu;", 3u, 924u, 0u},
+ {"NJcy;", 5u, 1034u, 0u},
+ {"Nacute;", 7u, 323u, 0u},
+ {"Ncaron;", 7u, 327u, 0u},
+ {"Ncedil;", 7u, 325u, 0u},
+ {"Ncy;", 4u, 1053u, 0u},
+ {"NegativeMediumSpace;", 20u, 8203u, 0u},
+ {"NegativeThickSpace;", 19u, 8203u, 0u},
+ {"NegativeThinSpace;", 18u, 8203u, 0u},
+ {"NegativeVeryThinSpace;", 22u, 8203u, 0u},
+ {"NestedGreaterGreater;", 21u, 8811u, 0u},
+ {"NestedLessLess;", 15u, 8810u, 0u},
+ {"NewLine;", 8u, 10u, 0u},
+ {"Nfr;", 4u, 120081u, 0u},
+ {"NoBreak;", 8u, 8288u, 0u},
+ {"NonBreakingSpace;", 17u, 160u, 0u},
+ {"Nopf;", 5u, 8469u, 0u},
+ {"Not;", 4u, 10988u, 0u},
+ {"NotCongruent;", 13u, 8802u, 0u},
+ {"NotCupCap;", 10u, 8813u, 0u},
+ {"NotDoubleVerticalBar;", 21u, 8742u, 0u},
+ {"NotElement;", 11u, 8713u, 0u},
+ {"NotEqual;", 9u, 8800u, 0u},
+ {"NotEqualTilde;", 14u, 8770u, 824u},
+ {"NotExists;", 10u, 8708u, 0u},
+ {"NotGreater;", 11u, 8815u, 0u},
+ {"NotGreaterEqual;", 16u, 8817u, 0u},
+ {"NotGreaterFullEqual;", 20u, 8807u, 824u},
+ {"NotGreaterGreater;", 18u, 8811u, 824u},
+ {"NotGreaterLess;", 15u, 8825u, 0u},
+ {"NotGreaterSlantEqual;", 21u, 10878u, 824u},
+ {"NotGreaterTilde;", 16u, 8821u, 0u},
+ {"NotHumpDownHump;", 16u, 8782u, 824u},
+ {"NotHumpEqual;", 13u, 8783u, 824u},
+ {"NotLeftTriangle;", 16u, 8938u, 0u},
+ {"NotLeftTriangleBar;", 19u, 10703u, 824u},
+ {"NotLeftTriangleEqual;", 21u, 8940u, 0u},
+ {"NotLess;", 8u, 8814u, 0u},
+ {"NotLessEqual;", 13u, 8816u, 0u},
+ {"NotLessGreater;", 15u, 8824u, 0u},
+ {"NotLessLess;", 12u, 8810u, 824u},
+ {"NotLessSlantEqual;", 18u, 10877u, 824u},
+ {"NotLessTilde;", 13u, 8820u, 0u},
+ {"NotNestedGreaterGreater;", 24u, 10914u, 824u},
+ {"NotNestedLessLess;", 18u, 10913u, 824u},
+ {"NotPrecedes;", 12u, 8832u, 0u},
+ {"NotPrecedesEqual;", 17u, 10927u, 824u},
+ {"NotPrecedesSlantEqual;", 22u, 8928u, 0u},
+ {"NotReverseElement;", 18u, 8716u, 0u},
+ {"NotRightTriangle;", 17u, 8939u, 0u},
+ {"NotRightTriangleBar;", 20u, 10704u, 824u},
+ {"NotRightTriangleEqual;", 22u, 8941u, 0u},
+ {"NotSquareSubset;", 16u, 8847u, 824u},
+ {"NotSquareSubsetEqual;", 21u, 8930u, 0u},
+ {"NotSquareSuperset;", 18u, 8848u, 824u},
+ {"NotSquareSupersetEqual;", 23u, 8931u, 0u},
+ {"NotSubset;", 10u, 8834u, 8402u},
+ {"NotSubsetEqual;", 15u, 8840u, 0u},
+ {"NotSucceeds;", 12u, 8833u, 0u},
+ {"NotSucceedsEqual;", 17u, 10928u, 824u},
+ {"NotSucceedsSlantEqual;", 22u, 8929u, 0u},
+ {"NotSucceedsTilde;", 17u, 8831u, 824u},
+ {"NotSuperset;", 12u, 8835u, 8402u},
+ {"NotSupersetEqual;", 17u, 8841u, 0u},
+ {"NotTilde;", 9u, 8769u, 0u},
+ {"NotTildeEqual;", 14u, 8772u, 0u},
+ {"NotTildeFullEqual;", 18u, 8775u, 0u},
+ {"NotTildeTilde;", 14u, 8777u, 0u},
+ {"NotVerticalBar;", 15u, 8740u, 0u},
+ {"Nscr;", 5u, 119977u, 0u},
+ {"Ntilde", 6u, 209u, 0u},
+ {"Ntilde;", 7u, 209u, 0u},
+ {"Nu;", 3u, 925u, 0u},
+ {"OElig;", 6u, 338u, 0u},
+ {"Oacute", 6u, 211u, 0u},
+ {"Oacute;", 7u, 211u, 0u},
+ {"Ocirc", 5u, 212u, 0u},
+ {"Ocirc;", 6u, 212u, 0u},
+ {"Ocy;", 4u, 1054u, 0u},
+ {"Odblac;", 7u, 336u, 0u},
+ {"Ofr;", 4u, 120082u, 0u},
+ {"Ograve", 6u, 210u, 0u},
+ {"Ograve;", 7u, 210u, 0u},
+ {"Omacr;", 6u, 332u, 0u},
+ {"Omega;", 6u, 937u, 0u},
+ {"Omicron;", 8u, 927u, 0u},
+ {"Oopf;", 5u, 120134u, 0u},
+ {"OpenCurlyDoubleQuote;", 21u, 8220u, 0u},
+ {"OpenCurlyQuote;", 15u, 8216u, 0u},
+ {"Or;", 3u, 10836u, 0u},
+ {"Oscr;", 5u, 119978u, 0u},
+ {"Oslash", 6u, 216u, 0u},
+ {"Oslash;", 7u, 216u, 0u},
+ {"Otilde", 6u, 213u, 0u},
+ {"Otilde;", 7u, 213u, 0u},
+ {"Otimes;", 7u, 10807u, 0u},
+ {"Ouml", 4u, 214u, 0u},
+ {"Ouml;", 5u, 214u, 0u},
+ {"OverBar;", 8u, 8254u, 0u},
+ {"OverBrace;", 10u, 9182u, 0u},
+ {"OverBracket;", 12u, 9140u, 0u},
+ {"OverParenthesis;", 16u, 9180u, 0u},
+ {"PartialD;", 9u, 8706u, 0u},
+ {"Pcy;", 4u, 1055u, 0u},
+ {"Pfr;", 4u, 120083u, 0u},
+ {"Phi;", 4u, 934u, 0u},
+ {"Pi;", 3u, 928u, 0u},
+ {"PlusMinus;", 10u, 177u, 0u},
+ {"Poincareplane;", 14u, 8460u, 0u},
+ {"Popf;", 5u, 8473u, 0u},
+ {"Pr;", 3u, 10939u, 0u},
+ {"Precedes;", 9u, 8826u, 0u},
+ {"PrecedesEqual;", 14u, 10927u, 0u},
+ {"PrecedesSlantEqual;", 19u, 8828u, 0u},
+ {"PrecedesTilde;", 14u, 8830u, 0u},
+ {"Prime;", 6u, 8243u, 0u},
+ {"Product;", 8u, 8719u, 0u},
+ {"Proportion;", 11u, 8759u, 0u},
+ {"Proportional;", 13u, 8733u, 0u},
+ {"Pscr;", 5u, 119979u, 0u},
+ {"Psi;", 4u, 936u, 0u},
+ {"QUOT", 4u, 34u, 0u},
+ {"QUOT;", 5u, 34u, 0u},
+ {"Qfr;", 4u, 120084u, 0u},
+ {"Qopf;", 5u, 8474u, 0u},
+ {"Qscr;", 5u, 119980u, 0u},
+ {"RBarr;", 6u, 10512u, 0u},
+ {"REG", 3u, 174u, 0u},
+ {"REG;", 4u, 174u, 0u},
+ {"Racute;", 7u, 340u, 0u},
+ {"Rang;", 5u, 10219u, 0u},
+ {"Rarr;", 5u, 8608u, 0u},
+ {"Rarrtl;", 7u, 10518u, 0u},
+ {"Rcaron;", 7u, 344u, 0u},
+ {"Rcedil;", 7u, 342u, 0u},
+ {"Rcy;", 4u, 1056u, 0u},
+ {"Re;", 3u, 8476u, 0u},
+ {"ReverseElement;", 15u, 8715u, 0u},
+ {"ReverseEquilibrium;", 19u, 8651u, 0u},
+ {"ReverseUpEquilibrium;", 21u, 10607u, 0u},
+ {"Rfr;", 4u, 8476u, 0u},
+ {"Rho;", 4u, 929u, 0u},
+ {"RightAngleBracket;", 18u, 10217u, 0u},
+ {"RightArrow;", 11u, 8594u, 0u},
+ {"RightArrowBar;", 14u, 8677u, 0u},
+ {"RightArrowLeftArrow;", 20u, 8644u, 0u},
+ {"RightCeiling;", 13u, 8969u, 0u},
+ {"RightDoubleBracket;", 19u, 10215u, 0u},
+ {"RightDownTeeVector;", 19u, 10589u, 0u},
+ {"RightDownVector;", 16u, 8642u, 0u},
+ {"RightDownVectorBar;", 19u, 10581u, 0u},
+ {"RightFloor;", 11u, 8971u, 0u},
+ {"RightTee;", 9u, 8866u, 0u},
+ {"RightTeeArrow;", 14u, 8614u, 0u},
+ {"RightTeeVector;", 15u, 10587u, 0u},
+ {"RightTriangle;", 14u, 8883u, 0u},
+ {"RightTriangleBar;", 17u, 10704u, 0u},
+ {"RightTriangleEqual;", 19u, 8885u, 0u},
+ {"RightUpDownVector;", 18u, 10575u, 0u},
+ {"RightUpTeeVector;", 17u, 10588u, 0u},
+ {"RightUpVector;", 14u, 8638u, 0u},
+ {"RightUpVectorBar;", 17u, 10580u, 0u},
+ {"RightVector;", 12u, 8640u, 0u},
+ {"RightVectorBar;", 15u, 10579u, 0u},
+ {"Rightarrow;", 11u, 8658u, 0u},
+ {"Ropf;", 5u, 8477u, 0u},
+ {"RoundImplies;", 13u, 10608u, 0u},
+ {"Rrightarrow;", 12u, 8667u, 0u},
+ {"Rscr;", 5u, 8475u, 0u},
+ {"Rsh;", 4u, 8625u, 0u},
+ {"RuleDelayed;", 12u, 10740u, 0u},
+ {"SHCHcy;", 7u, 1065u, 0u},
+ {"SHcy;", 5u, 1064u, 0u},
+ {"SOFTcy;", 7u, 1068u, 0u},
+ {"Sacute;", 7u, 346u, 0u},
+ {"Sc;", 3u, 10940u, 0u},
+ {"Scaron;", 7u, 352u, 0u},
+ {"Scedil;", 7u, 350u, 0u},
+ {"Scirc;", 6u, 348u, 0u},
+ {"Scy;", 4u, 1057u, 0u},
+ {"Sfr;", 4u, 120086u, 0u},
+ {"ShortDownArrow;", 15u, 8595u, 0u},
+ {"ShortLeftArrow;", 15u, 8592u, 0u},
+ {"ShortRightArrow;", 16u, 8594u, 0u},
+ {"ShortUpArrow;", 13u, 8593u, 0u},
+ {"Sigma;", 6u, 931u, 0u},
+ {"SmallCircle;", 12u, 8728u, 0u},
+ {"Sopf;", 5u, 120138u, 0u},
+ {"Sqrt;", 5u, 8730u, 0u},
+ {"Square;", 7u, 9633u, 0u},
+ {"SquareIntersection;", 19u, 8851u, 0u},
+ {"SquareSubset;", 13u, 8847u, 0u},
+ {"SquareSubsetEqual;", 18u, 8849u, 0u},
+ {"SquareSuperset;", 15u, 8848u, 0u},
+ {"SquareSupersetEqual;", 20u, 8850u, 0u},
+ {"SquareUnion;", 12u, 8852u, 0u},
+ {"Sscr;", 5u, 119982u, 0u},
+ {"Star;", 5u, 8902u, 0u},
+ {"Sub;", 4u, 8912u, 0u},
+ {"Subset;", 7u, 8912u, 0u},
+ {"SubsetEqual;", 12u, 8838u, 0u},
+ {"Succeeds;", 9u, 8827u, 0u},
+ {"SucceedsEqual;", 14u, 10928u, 0u},
+ {"SucceedsSlantEqual;", 19u, 8829u, 0u},
+ {"SucceedsTilde;", 14u, 8831u, 0u},
+ {"SuchThat;", 9u, 8715u, 0u},
+ {"Sum;", 4u, 8721u, 0u},
+ {"Sup;", 4u, 8913u, 0u},
+ {"Superset;", 9u, 8835u, 0u},
+ {"SupersetEqual;", 14u, 8839u, 0u},
+ {"Supset;", 7u, 8913u, 0u},
+ {"THORN", 5u, 222u, 0u},
+ {"THORN;", 6u, 222u, 0u},
+ {"TRADE;", 6u, 8482u, 0u},
+ {"TSHcy;", 6u, 1035u, 0u},
+ {"TScy;", 5u, 1062u, 0u},
+ {"Tab;", 4u, 9u, 0u},
+ {"Tau;", 4u, 932u, 0u},
+ {"Tcaron;", 7u, 356u, 0u},
+ {"Tcedil;", 7u, 354u, 0u},
+ {"Tcy;", 4u, 1058u, 0u},
+ {"Tfr;", 4u, 120087u, 0u},
+ {"Therefore;", 10u, 8756u, 0u},
+ {"Theta;", 6u, 920u, 0u},
+ {"ThickSpace;", 11u, 8287u, 8202u},
+ {"ThinSpace;", 10u, 8201u, 0u},
+ {"Tilde;", 6u, 8764u, 0u},
+ {"TildeEqual;", 11u, 8771u, 0u},
+ {"TildeFullEqual;", 15u, 8773u, 0u},
+ {"TildeTilde;", 11u, 8776u, 0u},
+ {"Topf;", 5u, 120139u, 0u},
+ {"TripleDot;", 10u, 8411u, 0u},
+ {"Tscr;", 5u, 119983u, 0u},
+ {"Tstrok;", 7u, 358u, 0u},
+ {"Uacute", 6u, 218u, 0u},
+ {"Uacute;", 7u, 218u, 0u},
+ {"Uarr;", 5u, 8607u, 0u},
+ {"Uarrocir;", 9u, 10569u, 0u},
+ {"Ubrcy;", 6u, 1038u, 0u},
+ {"Ubreve;", 7u, 364u, 0u},
+ {"Ucirc", 5u, 219u, 0u},
+ {"Ucirc;", 6u, 219u, 0u},
+ {"Ucy;", 4u, 1059u, 0u},
+ {"Udblac;", 7u, 368u, 0u},
+ {"Ufr;", 4u, 120088u, 0u},
+ {"Ugrave", 6u, 217u, 0u},
+ {"Ugrave;", 7u, 217u, 0u},
+ {"Umacr;", 6u, 362u, 0u},
+ {"UnderBar;", 9u, 95u, 0u},
+ {"UnderBrace;", 11u, 9183u, 0u},
+ {"UnderBracket;", 13u, 9141u, 0u},
+ {"UnderParenthesis;", 17u, 9181u, 0u},
+ {"Union;", 6u, 8899u, 0u},
+ {"UnionPlus;", 10u, 8846u, 0u},
+ {"Uogon;", 6u, 370u, 0u},
+ {"Uopf;", 5u, 120140u, 0u},
+ {"UpArrow;", 8u, 8593u, 0u},
+ {"UpArrowBar;", 11u, 10514u, 0u},
+ {"UpArrowDownArrow;", 17u, 8645u, 0u},
+ {"UpDownArrow;", 12u, 8597u, 0u},
+ {"UpEquilibrium;", 14u, 10606u, 0u},
+ {"UpTee;", 6u, 8869u, 0u},
+ {"UpTeeArrow;", 11u, 8613u, 0u},
+ {"Uparrow;", 8u, 8657u, 0u},
+ {"Updownarrow;", 12u, 8661u, 0u},
+ {"UpperLeftArrow;", 15u, 8598u, 0u},
+ {"UpperRightArrow;", 16u, 8599u, 0u},
+ {"Upsi;", 5u, 978u, 0u},
+ {"Upsilon;", 8u, 933u, 0u},
+ {"Uring;", 6u, 366u, 0u},
+ {"Uscr;", 5u, 119984u, 0u},
+ {"Utilde;", 7u, 360u, 0u},
+ {"Uuml", 4u, 220u, 0u},
+ {"Uuml;", 5u, 220u, 0u},
+ {"VDash;", 6u, 8875u, 0u},
+ {"Vbar;", 5u, 10987u, 0u},
+ {"Vcy;", 4u, 1042u, 0u},
+ {"Vdash;", 6u, 8873u, 0u},
+ {"Vdashl;", 7u, 10982u, 0u},
+ {"Vee;", 4u, 8897u, 0u},
+ {"Verbar;", 7u, 8214u, 0u},
+ {"Vert;", 5u, 8214u, 0u},
+ {"VerticalBar;", 12u, 8739u, 0u},
+ {"VerticalLine;", 13u, 124u, 0u},
+ {"VerticalSeparator;", 18u, 10072u, 0u},
+ {"VerticalTilde;", 14u, 8768u, 0u},
+ {"VeryThinSpace;", 14u, 8202u, 0u},
+ {"Vfr;", 4u, 120089u, 0u},
+ {"Vopf;", 5u, 120141u, 0u},
+ {"Vscr;", 5u, 119985u, 0u},
+ {"Vvdash;", 7u, 8874u, 0u},
+ {"Wcirc;", 6u, 372u, 0u},
+ {"Wedge;", 6u, 8896u, 0u},
+ {"Wfr;", 4u, 120090u, 0u},
+ {"Wopf;", 5u, 120142u, 0u},
+ {"Wscr;", 5u, 119986u, 0u},
+ {"Xfr;", 4u, 120091u, 0u},
+ {"Xi;", 3u, 926u, 0u},
+ {"Xopf;", 5u, 120143u, 0u},
+ {"Xscr;", 5u, 119987u, 0u},
+ {"YAcy;", 5u, 1071u, 0u},
+ {"YIcy;", 5u, 1031u, 0u},
+ {"YUcy;", 5u, 1070u, 0u},
+ {"Yacute", 6u, 221u, 0u},
+ {"Yacute;", 7u, 221u, 0u},
+ {"Ycirc;", 6u, 374u, 0u},
+ {"Ycy;", 4u, 1067u, 0u},
+ {"Yfr;", 4u, 120092u, 0u},
+ {"Yopf;", 5u, 120144u, 0u},
+ {"Yscr;", 5u, 119988u, 0u},
+ {"Yuml;", 5u, 376u, 0u},
+ {"ZHcy;", 5u, 1046u, 0u},
+ {"Zacute;", 7u, 377u, 0u},
+ {"Zcaron;", 7u, 381u, 0u},
+ {"Zcy;", 4u, 1047u, 0u},
+ {"Zdot;", 5u, 379u, 0u},
+ {"ZeroWidthSpace;", 15u, 8203u, 0u},
+ {"Zeta;", 5u, 918u, 0u},
+ {"Zfr;", 4u, 8488u, 0u},
+ {"Zopf;", 5u, 8484u, 0u},
+ {"Zscr;", 5u, 119989u, 0u},
+ {"aacute", 6u, 225u, 0u},
+ {"aacute;", 7u, 225u, 0u},
+ {"abreve;", 7u, 259u, 0u},
+ {"ac;", 3u, 8766u, 0u},
+ {"acE;", 4u, 8766u, 819u},
+ {"acd;", 4u, 8767u, 0u},
+ {"acirc", 5u, 226u, 0u},
+ {"acirc;", 6u, 226u, 0u},
+ {"acute", 5u, 180u, 0u},
+ {"acute;", 6u, 180u, 0u},
+ {"acy;", 4u, 1072u, 0u},
+ {"aelig", 5u, 230u, 0u},
+ {"aelig;", 6u, 230u, 0u},
+ {"af;", 3u, 8289u, 0u},
+ {"afr;", 4u, 120094u, 0u},
+ {"agrave", 6u, 224u, 0u},
+ {"agrave;", 7u, 224u, 0u},
+ {"alefsym;", 8u, 8501u, 0u},
+ {"aleph;", 6u, 8501u, 0u},
+ {"alpha;", 6u, 945u, 0u},
+ {"amacr;", 6u, 257u, 0u},
+ {"amalg;", 6u, 10815u, 0u},
+ {"amp", 3u, 38u, 0u},
+ {"amp;", 4u, 38u, 0u},
+ {"and;", 4u, 8743u, 0u},
+ {"andand;", 7u, 10837u, 0u},
+ {"andd;", 5u, 10844u, 0u},
+ {"andslope;", 9u, 10840u, 0u},
+ {"andv;", 5u, 10842u, 0u},
+ {"ang;", 4u, 8736u, 0u},
+ {"ange;", 5u, 10660u, 0u},
+ {"angle;", 6u, 8736u, 0u},
+ {"angmsd;", 7u, 8737u, 0u},
+ {"angmsdaa;", 9u, 10664u, 0u},
+ {"angmsdab;", 9u, 10665u, 0u},
+ {"angmsdac;", 9u, 10666u, 0u},
+ {"angmsdad;", 9u, 10667u, 0u},
+ {"angmsdae;", 9u, 10668u, 0u},
+ {"angmsdaf;", 9u, 10669u, 0u},
+ {"angmsdag;", 9u, 10670u, 0u},
+ {"angmsdah;", 9u, 10671u, 0u},
+ {"angrt;", 6u, 8735u, 0u},
+ {"angrtvb;", 8u, 8894u, 0u},
+ {"angrtvbd;", 9u, 10653u, 0u},
+ {"angsph;", 7u, 8738u, 0u},
+ {"angst;", 6u, 197u, 0u},
+ {"angzarr;", 8u, 9084u, 0u},
+ {"aogon;", 6u, 261u, 0u},
+ {"aopf;", 5u, 120146u, 0u},
+ {"ap;", 3u, 8776u, 0u},
+ {"apE;", 4u, 10864u, 0u},
+ {"apacir;", 7u, 10863u, 0u},
+ {"ape;", 4u, 8778u, 0u},
+ {"apid;", 5u, 8779u, 0u},
+ {"apos;", 5u, 39u, 0u},
+ {"approx;", 7u, 8776u, 0u},
+ {"approxeq;", 9u, 8778u, 0u},
+ {"aring", 5u, 229u, 0u},
+ {"aring;", 6u, 229u, 0u},
+ {"ascr;", 5u, 119990u, 0u},
+ {"ast;", 4u, 42u, 0u},
+ {"asymp;", 6u, 8776u, 0u},
+ {"asympeq;", 8u, 8781u, 0u},
+ {"atilde", 6u, 227u, 0u},
+ {"atilde;", 7u, 227u, 0u},
+ {"auml", 4u, 228u, 0u},
+ {"auml;", 5u, 228u, 0u},
+ {"awconint;", 9u, 8755u, 0u},
+ {"awint;", 6u, 10769u, 0u},
+ {"bNot;", 5u, 10989u, 0u},
+ {"backcong;", 9u, 8780u, 0u},
+ {"backepsilon;", 12u, 1014u, 0u},
+ {"backprime;", 10u, 8245u, 0u},
+ {"backsim;", 8u, 8765u, 0u},
+ {"backsimeq;", 10u, 8909u, 0u},
+ {"barvee;", 7u, 8893u, 0u},
+ {"barwed;", 7u, 8965u, 0u},
+ {"barwedge;", 9u, 8965u, 0u},
+ {"bbrk;", 5u, 9141u, 0u},
+ {"bbrktbrk;", 9u, 9142u, 0u},
+ {"bcong;", 6u, 8780u, 0u},
+ {"bcy;", 4u, 1073u, 0u},
+ {"bdquo;", 6u, 8222u, 0u},
+ {"becaus;", 7u, 8757u, 0u},
+ {"because;", 8u, 8757u, 0u},
+ {"bemptyv;", 8u, 10672u, 0u},
+ {"bepsi;", 6u, 1014u, 0u},
+ {"bernou;", 7u, 8492u, 0u},
+ {"beta;", 5u, 946u, 0u},
+ {"beth;", 5u, 8502u, 0u},
+ {"between;", 8u, 8812u, 0u},
+ {"bfr;", 4u, 120095u, 0u},
+ {"bigcap;", 7u, 8898u, 0u},
+ {"bigcirc;", 8u, 9711u, 0u},
+ {"bigcup;", 7u, 8899u, 0u},
+ {"bigodot;", 8u, 10752u, 0u},
+ {"bigoplus;", 9u, 10753u, 0u},
+ {"bigotimes;", 10u, 10754u, 0u},
+ {"bigsqcup;", 9u, 10758u, 0u},
+ {"bigstar;", 8u, 9733u, 0u},
+ {"bigtriangledown;", 16u, 9661u, 0u},
+ {"bigtriangleup;", 14u, 9651u, 0u},
+ {"biguplus;", 9u, 10756u, 0u},
+ {"bigvee;", 7u, 8897u, 0u},
+ {"bigwedge;", 9u, 8896u, 0u},
+ {"bkarow;", 7u, 10509u, 0u},
+ {"blacklozenge;", 13u, 10731u, 0u},
+ {"blacksquare;", 12u, 9642u, 0u},
+ {"blacktriangle;", 14u, 9652u, 0u},
+ {"blacktriangledown;", 18u, 9662u, 0u},
+ {"blacktriangleleft;", 18u, 9666u, 0u},
+ {"blacktriangleright;", 19u, 9656u, 0u},
+ {"blank;", 6u, 9251u, 0u},
+ {"blk12;", 6u, 9618u, 0u},
+ {"blk14;", 6u, 9617u, 0u},
+ {"blk34;", 6u, 9619u, 0u},
+ {"block;", 6u, 9608u, 0u},
+ {"bne;", 4u, 61u, 8421u},
+ {"bnequiv;", 8u, 8801u, 8421u},
+ {"bnot;", 5u, 8976u, 0u},
+ {"bopf;", 5u, 120147u, 0u},
+ {"bot;", 4u, 8869u, 0u},
+ {"bottom;", 7u, 8869u, 0u},
+ {"bowtie;", 7u, 8904u, 0u},
+ {"boxDL;", 6u, 9559u, 0u},
+ {"boxDR;", 6u, 9556u, 0u},
+ {"boxDl;", 6u, 9558u, 0u},
+ {"boxDr;", 6u, 9555u, 0u},
+ {"boxH;", 5u, 9552u, 0u},
+ {"boxHD;", 6u, 9574u, 0u},
+ {"boxHU;", 6u, 9577u, 0u},
+ {"boxHd;", 6u, 9572u, 0u},
+ {"boxHu;", 6u, 9575u, 0u},
+ {"boxUL;", 6u, 9565u, 0u},
+ {"boxUR;", 6u, 9562u, 0u},
+ {"boxUl;", 6u, 9564u, 0u},
+ {"boxUr;", 6u, 9561u, 0u},
+ {"boxV;", 5u, 9553u, 0u},
+ {"boxVH;", 6u, 9580u, 0u},
+ {"boxVL;", 6u, 9571u, 0u},
+ {"boxVR;", 6u, 9568u, 0u},
+ {"boxVh;", 6u, 9579u, 0u},
+ {"boxVl;", 6u, 9570u, 0u},
+ {"boxVr;", 6u, 9567u, 0u},
+ {"boxbox;", 7u, 10697u, 0u},
+ {"boxdL;", 6u, 9557u, 0u},
+ {"boxdR;", 6u, 9554u, 0u},
+ {"boxdl;", 6u, 9488u, 0u},
+ {"boxdr;", 6u, 9484u, 0u},
+ {"boxh;", 5u, 9472u, 0u},
+ {"boxhD;", 6u, 9573u, 0u},
+ {"boxhU;", 6u, 9576u, 0u},
+ {"boxhd;", 6u, 9516u, 0u},
+ {"boxhu;", 6u, 9524u, 0u},
+ {"boxminus;", 9u, 8863u, 0u},
+ {"boxplus;", 8u, 8862u, 0u},
+ {"boxtimes;", 9u, 8864u, 0u},
+ {"boxuL;", 6u, 9563u, 0u},
+ {"boxuR;", 6u, 9560u, 0u},
+ {"boxul;", 6u, 9496u, 0u},
+ {"boxur;", 6u, 9492u, 0u},
+ {"boxv;", 5u, 9474u, 0u},
+ {"boxvH;", 6u, 9578u, 0u},
+ {"boxvL;", 6u, 9569u, 0u},
+ {"boxvR;", 6u, 9566u, 0u},
+ {"boxvh;", 6u, 9532u, 0u},
+ {"boxvl;", 6u, 9508u, 0u},
+ {"boxvr;", 6u, 9500u, 0u},
+ {"bprime;", 7u, 8245u, 0u},
+ {"breve;", 6u, 728u, 0u},
+ {"brvbar", 6u, 166u, 0u},
+ {"brvbar;", 7u, 166u, 0u},
+ {"bscr;", 5u, 119991u, 0u},
+ {"bsemi;", 6u, 8271u, 0u},
+ {"bsim;", 5u, 8765u, 0u},
+ {"bsime;", 6u, 8909u, 0u},
+ {"bsol;", 5u, 92u, 0u},
+ {"bsolb;", 6u, 10693u, 0u},
+ {"bsolhsub;", 9u, 10184u, 0u},
+ {"bull;", 5u, 8226u, 0u},
+ {"bullet;", 7u, 8226u, 0u},
+ {"bump;", 5u, 8782u, 0u},
+ {"bumpE;", 6u, 10926u, 0u},
+ {"bumpe;", 6u, 8783u, 0u},
+ {"bumpeq;", 7u, 8783u, 0u},
+ {"cacute;", 7u, 263u, 0u},
+ {"cap;", 4u, 8745u, 0u},
+ {"capand;", 7u, 10820u, 0u},
+ {"capbrcup;", 9u, 10825u, 0u},
+ {"capcap;", 7u, 10827u, 0u},
+ {"capcup;", 7u, 10823u, 0u},
+ {"capdot;", 7u, 10816u, 0u},
+ {"caps;", 5u, 8745u, 65024u},
+ {"caret;", 6u, 8257u, 0u},
+ {"caron;", 6u, 711u, 0u},
+ {"ccaps;", 6u, 10829u, 0u},
+ {"ccaron;", 7u, 269u, 0u},
+ {"ccedil", 6u, 231u, 0u},
+ {"ccedil;", 7u, 231u, 0u},
+ {"ccirc;", 6u, 265u, 0u},
+ {"ccups;", 6u, 10828u, 0u},
+ {"ccupssm;", 8u, 10832u, 0u},
+ {"cdot;", 5u, 267u, 0u},
+ {"cedil", 5u, 184u, 0u},
+ {"cedil;", 6u, 184u, 0u},
+ {"cemptyv;", 8u, 10674u, 0u},
+ {"cent", 4u, 162u, 0u},
+ {"cent;", 5u, 162u, 0u},
+ {"centerdot;", 10u, 183u, 0u},
+ {"cfr;", 4u, 120096u, 0u},
+ {"chcy;", 5u, 1095u, 0u},
+ {"check;", 6u, 10003u, 0u},
+ {"checkmark;", 10u, 10003u, 0u},
+ {"chi;", 4u, 967u, 0u},
+ {"cir;", 4u, 9675u, 0u},
+ {"cirE;", 5u, 10691u, 0u},
+ {"circ;", 5u, 710u, 0u},
+ {"circeq;", 7u, 8791u, 0u},
+ {"circlearrowleft;", 16u, 8634u, 0u},
+ {"circlearrowright;", 17u, 8635u, 0u},
+ {"circledR;", 9u, 174u, 0u},
+ {"circledS;", 9u, 9416u, 0u},
+ {"circledast;", 11u, 8859u, 0u},
+ {"circledcirc;", 12u, 8858u, 0u},
+ {"circleddash;", 12u, 8861u, 0u},
+ {"cire;", 5u, 8791u, 0u},
+ {"cirfnint;", 9u, 10768u, 0u},
+ {"cirmid;", 7u, 10991u, 0u},
+ {"cirscir;", 8u, 10690u, 0u},
+ {"clubs;", 6u, 9827u, 0u},
+ {"clubsuit;", 9u, 9827u, 0u},
+ {"colon;", 6u, 58u, 0u},
+ {"colone;", 7u, 8788u, 0u},
+ {"coloneq;", 8u, 8788u, 0u},
+ {"comma;", 6u, 44u, 0u},
+ {"commat;", 7u, 64u, 0u},
+ {"comp;", 5u, 8705u, 0u},
+ {"compfn;", 7u, 8728u, 0u},
+ {"complement;", 11u, 8705u, 0u},
+ {"complexes;", 10u, 8450u, 0u},
+ {"cong;", 5u, 8773u, 0u},
+ {"congdot;", 8u, 10861u, 0u},
+ {"conint;", 7u, 8750u, 0u},
+ {"copf;", 5u, 120148u, 0u},
+ {"coprod;", 7u, 8720u, 0u},
+ {"copy", 4u, 169u, 0u},
+ {"copy;", 5u, 169u, 0u},
+ {"copysr;", 7u, 8471u, 0u},
+ {"crarr;", 6u, 8629u, 0u},
+ {"cross;", 6u, 10007u, 0u},
+ {"cscr;", 5u, 119992u, 0u},
+ {"csub;", 5u, 10959u, 0u},
+ {"csube;", 6u, 10961u, 0u},
+ {"csup;", 5u, 10960u, 0u},
+ {"csupe;", 6u, 10962u, 0u},
+ {"ctdot;", 6u, 8943u, 0u},
+ {"cudarrl;", 8u, 10552u, 0u},
+ {"cudarrr;", 8u, 10549u, 0u},
+ {"cuepr;", 6u, 8926u, 0u},
+ {"cuesc;", 6u, 8927u, 0u},
+ {"cularr;", 7u, 8630u, 0u},
+ {"cularrp;", 8u, 10557u, 0u},
+ {"cup;", 4u, 8746u, 0u},
+ {"cupbrcap;", 9u, 10824u, 0u},
+ {"cupcap;", 7u, 10822u, 0u},
+ {"cupcup;", 7u, 10826u, 0u},
+ {"cupdot;", 7u, 8845u, 0u},
+ {"cupor;", 6u, 10821u, 0u},
+ {"cups;", 5u, 8746u, 65024u},
+ {"curarr;", 7u, 8631u, 0u},
+ {"curarrm;", 8u, 10556u, 0u},
+ {"curlyeqprec;", 12u, 8926u, 0u},
+ {"curlyeqsucc;", 12u, 8927u, 0u},
+ {"curlyvee;", 9u, 8910u, 0u},
+ {"curlywedge;", 11u, 8911u, 0u},
+ {"curren", 6u, 164u, 0u},
+ {"curren;", 7u, 164u, 0u},
+ {"curvearrowleft;", 15u, 8630u, 0u},
+ {"curvearrowright;", 16u, 8631u, 0u},
+ {"cuvee;", 6u, 8910u, 0u},
+ {"cuwed;", 6u, 8911u, 0u},
+ {"cwconint;", 9u, 8754u, 0u},
+ {"cwint;", 6u, 8753u, 0u},
+ {"cylcty;", 7u, 9005u, 0u},
+ {"dArr;", 5u, 8659u, 0u},
+ {"dHar;", 5u, 10597u, 0u},
+ {"dagger;", 7u, 8224u, 0u},
+ {"daleth;", 7u, 8504u, 0u},
+ {"darr;", 5u, 8595u, 0u},
+ {"dash;", 5u, 8208u, 0u},
+ {"dashv;", 6u, 8867u, 0u},
+ {"dbkarow;", 8u, 10511u, 0u},
+ {"dblac;", 6u, 733u, 0u},
+ {"dcaron;", 7u, 271u, 0u},
+ {"dcy;", 4u, 1076u, 0u},
+ {"dd;", 3u, 8518u, 0u},
+ {"ddagger;", 8u, 8225u, 0u},
+ {"ddarr;", 6u, 8650u, 0u},
+ {"ddotseq;", 8u, 10871u, 0u},
+ {"deg", 3u, 176u, 0u},
+ {"deg;", 4u, 176u, 0u},
+ {"delta;", 6u, 948u, 0u},
+ {"demptyv;", 8u, 10673u, 0u},
+ {"dfisht;", 7u, 10623u, 0u},
+ {"dfr;", 4u, 120097u, 0u},
+ {"dharl;", 6u, 8643u, 0u},
+ {"dharr;", 6u, 8642u, 0u},
+ {"diam;", 5u, 8900u, 0u},
+ {"diamond;", 8u, 8900u, 0u},
+ {"diamondsuit;", 12u, 9830u, 0u},
+ {"diams;", 6u, 9830u, 0u},
+ {"die;", 4u, 168u, 0u},
+ {"digamma;", 8u, 989u, 0u},
+ {"disin;", 6u, 8946u, 0u},
+ {"div;", 4u, 247u, 0u},
+ {"divide", 6u, 247u, 0u},
+ {"divide;", 7u, 247u, 0u},
+ {"divideontimes;", 14u, 8903u, 0u},
+ {"divonx;", 7u, 8903u, 0u},
+ {"djcy;", 5u, 1106u, 0u},
+ {"dlcorn;", 7u, 8990u, 0u},
+ {"dlcrop;", 7u, 8973u, 0u},
+ {"dollar;", 7u, 36u, 0u},
+ {"dopf;", 5u, 120149u, 0u},
+ {"dot;", 4u, 729u, 0u},
+ {"doteq;", 6u, 8784u, 0u},
+ {"doteqdot;", 9u, 8785u, 0u},
+ {"dotminus;", 9u, 8760u, 0u},
+ {"dotplus;", 8u, 8724u, 0u},
+ {"dotsquare;", 10u, 8865u, 0u},
+ {"doublebarwedge;", 15u, 8966u, 0u},
+ {"downarrow;", 10u, 8595u, 0u},
+ {"downdownarrows;", 15u, 8650u, 0u},
+ {"downharpoonleft;", 16u, 8643u, 0u},
+ {"downharpoonright;", 17u, 8642u, 0u},
+ {"drbkarow;", 9u, 10512u, 0u},
+ {"drcorn;", 7u, 8991u, 0u},
+ {"drcrop;", 7u, 8972u, 0u},
+ {"dscr;", 5u, 119993u, 0u},
+ {"dscy;", 5u, 1109u, 0u},
+ {"dsol;", 5u, 10742u, 0u},
+ {"dstrok;", 7u, 273u, 0u},
+ {"dtdot;", 6u, 8945u, 0u},
+ {"dtri;", 5u, 9663u, 0u},
+ {"dtrif;", 6u, 9662u, 0u},
+ {"duarr;", 6u, 8693u, 0u},
+ {"duhar;", 6u, 10607u, 0u},
+ {"dwangle;", 8u, 10662u, 0u},
+ {"dzcy;", 5u, 1119u, 0u},
+ {"dzigrarr;", 9u, 10239u, 0u},
+ {"eDDot;", 6u, 10871u, 0u},
+ {"eDot;", 5u, 8785u, 0u},
+ {"eacute", 6u, 233u, 0u},
+ {"eacute;", 7u, 233u, 0u},
+ {"easter;", 7u, 10862u, 0u},
+ {"ecaron;", 7u, 283u, 0u},
+ {"ecir;", 5u, 8790u, 0u},
+ {"ecirc", 5u, 234u, 0u},
+ {"ecirc;", 6u, 234u, 0u},
+ {"ecolon;", 7u, 8789u, 0u},
+ {"ecy;", 4u, 1101u, 0u},
+ {"edot;", 5u, 279u, 0u},
+ {"ee;", 3u, 8519u, 0u},
+ {"efDot;", 6u, 8786u, 0u},
+ {"efr;", 4u, 120098u, 0u},
+ {"eg;", 3u, 10906u, 0u},
+ {"egrave", 6u, 232u, 0u},
+ {"egrave;", 7u, 232u, 0u},
+ {"egs;", 4u, 10902u, 0u},
+ {"egsdot;", 7u, 10904u, 0u},
+ {"el;", 3u, 10905u, 0u},
+ {"elinters;", 9u, 9191u, 0u},
+ {"ell;", 4u, 8467u, 0u},
+ {"els;", 4u, 10901u, 0u},
+ {"elsdot;", 7u, 10903u, 0u},
+ {"emacr;", 6u, 275u, 0u},
+ {"empty;", 6u, 8709u, 0u},
+ {"emptyset;", 9u, 8709u, 0u},
+ {"emptyv;", 7u, 8709u, 0u},
+ {"emsp13;", 7u, 8196u, 0u},
+ {"emsp14;", 7u, 8197u, 0u},
+ {"emsp;", 5u, 8195u, 0u},
+ {"eng;", 4u, 331u, 0u},
+ {"ensp;", 5u, 8194u, 0u},
+ {"eogon;", 6u, 281u, 0u},
+ {"eopf;", 5u, 120150u, 0u},
+ {"epar;", 5u, 8917u, 0u},
+ {"eparsl;", 7u, 10723u, 0u},
+ {"eplus;", 6u, 10865u, 0u},
+ {"epsi;", 5u, 949u, 0u},
+ {"epsilon;", 8u, 949u, 0u},
+ {"epsiv;", 6u, 1013u, 0u},
+ {"eqcirc;", 7u, 8790u, 0u},
+ {"eqcolon;", 8u, 8789u, 0u},
+ {"eqsim;", 6u, 8770u, 0u},
+ {"eqslantgtr;", 11u, 10902u, 0u},
+ {"eqslantless;", 12u, 10901u, 0u},
+ {"equals;", 7u, 61u, 0u},
+ {"equest;", 7u, 8799u, 0u},
+ {"equiv;", 6u, 8801u, 0u},
+ {"equivDD;", 8u, 10872u, 0u},
+ {"eqvparsl;", 9u, 10725u, 0u},
+ {"erDot;", 6u, 8787u, 0u},
+ {"erarr;", 6u, 10609u, 0u},
+ {"escr;", 5u, 8495u, 0u},
+ {"esdot;", 6u, 8784u, 0u},
+ {"esim;", 5u, 8770u, 0u},
+ {"eta;", 4u, 951u, 0u},
+ {"eth", 3u, 240u, 0u},
+ {"eth;", 4u, 240u, 0u},
+ {"euml", 4u, 235u, 0u},
+ {"euml;", 5u, 235u, 0u},
+ {"euro;", 5u, 8364u, 0u},
+ {"excl;", 5u, 33u, 0u},
+ {"exist;", 6u, 8707u, 0u},
+ {"expectation;", 12u, 8496u, 0u},
+ {"exponentiale;", 13u, 8519u, 0u},
+ {"fallingdotseq;", 14u, 8786u, 0u},
+ {"fcy;", 4u, 1092u, 0u},
+ {"female;", 7u, 9792u, 0u},
+ {"ffilig;", 7u, 64259u, 0u},
+ {"fflig;", 6u, 64256u, 0u},
+ {"ffllig;", 7u, 64260u, 0u},
+ {"ffr;", 4u, 120099u, 0u},
+ {"filig;", 6u, 64257u, 0u},
+ {"fjlig;", 6u, 102u, 106u},
+ {"flat;", 5u, 9837u, 0u},
+ {"fllig;", 6u, 64258u, 0u},
+ {"fltns;", 6u, 9649u, 0u},
+ {"fnof;", 5u, 402u, 0u},
+ {"fopf;", 5u, 120151u, 0u},
+ {"forall;", 7u, 8704u, 0u},
+ {"fork;", 5u, 8916u, 0u},
+ {"forkv;", 6u, 10969u, 0u},
+ {"fpartint;", 9u, 10765u, 0u},
+ {"frac12", 6u, 189u, 0u},
+ {"frac12;", 7u, 189u, 0u},
+ {"frac13;", 7u, 8531u, 0u},
+ {"frac14", 6u, 188u, 0u},
+ {"frac14;", 7u, 188u, 0u},
+ {"frac15;", 7u, 8533u, 0u},
+ {"frac16;", 7u, 8537u, 0u},
+ {"frac18;", 7u, 8539u, 0u},
+ {"frac23;", 7u, 8532u, 0u},
+ {"frac25;", 7u, 8534u, 0u},
+ {"frac34", 6u, 190u, 0u},
+ {"frac34;", 7u, 190u, 0u},
+ {"frac35;", 7u, 8535u, 0u},
+ {"frac38;", 7u, 8540u, 0u},
+ {"frac45;", 7u, 8536u, 0u},
+ {"frac56;", 7u, 8538u, 0u},
+ {"frac58;", 7u, 8541u, 0u},
+ {"frac78;", 7u, 8542u, 0u},
+ {"frasl;", 6u, 8260u, 0u},
+ {"frown;", 6u, 8994u, 0u},
+ {"fscr;", 5u, 119995u, 0u},
+ {"gE;", 3u, 8807u, 0u},
+ {"gEl;", 4u, 10892u, 0u},
+ {"gacute;", 7u, 501u, 0u},
+ {"gamma;", 6u, 947u, 0u},
+ {"gammad;", 7u, 989u, 0u},
+ {"gap;", 4u, 10886u, 0u},
+ {"gbreve;", 7u, 287u, 0u},
+ {"gcirc;", 6u, 285u, 0u},
+ {"gcy;", 4u, 1075u, 0u},
+ {"gdot;", 5u, 289u, 0u},
+ {"ge;", 3u, 8805u, 0u},
+ {"gel;", 4u, 8923u, 0u},
+ {"geq;", 4u, 8805u, 0u},
+ {"geqq;", 5u, 8807u, 0u},
+ {"geqslant;", 9u, 10878u, 0u},
+ {"ges;", 4u, 10878u, 0u},
+ {"gescc;", 6u, 10921u, 0u},
+ {"gesdot;", 7u, 10880u, 0u},
+ {"gesdoto;", 8u, 10882u, 0u},
+ {"gesdotol;", 9u, 10884u, 0u},
+ {"gesl;", 5u, 8923u, 65024u},
+ {"gesles;", 7u, 10900u, 0u},
+ {"gfr;", 4u, 120100u, 0u},
+ {"gg;", 3u, 8811u, 0u},
+ {"ggg;", 4u, 8921u, 0u},
+ {"gimel;", 6u, 8503u, 0u},
+ {"gjcy;", 5u, 1107u, 0u},
+ {"gl;", 3u, 8823u, 0u},
+ {"glE;", 4u, 10898u, 0u},
+ {"gla;", 4u, 10917u, 0u},
+ {"glj;", 4u, 10916u, 0u},
+ {"gnE;", 4u, 8809u, 0u},
+ {"gnap;", 5u, 10890u, 0u},
+ {"gnapprox;", 9u, 10890u, 0u},
+ {"gne;", 4u, 10888u, 0u},
+ {"gneq;", 5u, 10888u, 0u},
+ {"gneqq;", 6u, 8809u, 0u},
+ {"gnsim;", 6u, 8935u, 0u},
+ {"gopf;", 5u, 120152u, 0u},
+ {"grave;", 6u, 96u, 0u},
+ {"gscr;", 5u, 8458u, 0u},
+ {"gsim;", 5u, 8819u, 0u},
+ {"gsime;", 6u, 10894u, 0u},
+ {"gsiml;", 6u, 10896u, 0u},
+ {"gt", 2u, 62u, 0u},
+ {"gt;", 3u, 62u, 0u},
+ {"gtcc;", 5u, 10919u, 0u},
+ {"gtcir;", 6u, 10874u, 0u},
+ {"gtdot;", 6u, 8919u, 0u},
+ {"gtlPar;", 7u, 10645u, 0u},
+ {"gtquest;", 8u, 10876u, 0u},
+ {"gtrapprox;", 10u, 10886u, 0u},
+ {"gtrarr;", 7u, 10616u, 0u},
+ {"gtrdot;", 7u, 8919u, 0u},
+ {"gtreqless;", 10u, 8923u, 0u},
+ {"gtreqqless;", 11u, 10892u, 0u},
+ {"gtrless;", 8u, 8823u, 0u},
+ {"gtrsim;", 7u, 8819u, 0u},
+ {"gvertneqq;", 10u, 8809u, 65024u},
+ {"gvnE;", 5u, 8809u, 65024u},
+ {"hArr;", 5u, 8660u, 0u},
+ {"hairsp;", 7u, 8202u, 0u},
+ {"half;", 5u, 189u, 0u},
+ {"hamilt;", 7u, 8459u, 0u},
+ {"hardcy;", 7u, 1098u, 0u},
+ {"harr;", 5u, 8596u, 0u},
+ {"harrcir;", 8u, 10568u, 0u},
+ {"harrw;", 6u, 8621u, 0u},
+ {"hbar;", 5u, 8463u, 0u},
+ {"hcirc;", 6u, 293u, 0u},
+ {"hearts;", 7u, 9829u, 0u},
+ {"heartsuit;", 10u, 9829u, 0u},
+ {"hellip;", 7u, 8230u, 0u},
+ {"hercon;", 7u, 8889u, 0u},
+ {"hfr;", 4u, 120101u, 0u},
+ {"hksearow;", 9u, 10533u, 0u},
+ {"hkswarow;", 9u, 10534u, 0u},
+ {"hoarr;", 6u, 8703u, 0u},
+ {"homtht;", 7u, 8763u, 0u},
+ {"hookleftarrow;", 14u, 8617u, 0u},
+ {"hookrightarrow;", 15u, 8618u, 0u},
+ {"hopf;", 5u, 120153u, 0u},
+ {"horbar;", 7u, 8213u, 0u},
+ {"hscr;", 5u, 119997u, 0u},
+ {"hslash;", 7u, 8463u, 0u},
+ {"hstrok;", 7u, 295u, 0u},
+ {"hybull;", 7u, 8259u, 0u},
+ {"hyphen;", 7u, 8208u, 0u},
+ {"iacute", 6u, 237u, 0u},
+ {"iacute;", 7u, 237u, 0u},
+ {"ic;", 3u, 8291u, 0u},
+ {"icirc", 5u, 238u, 0u},
+ {"icirc;", 6u, 238u, 0u},
+ {"icy;", 4u, 1080u, 0u},
+ {"iecy;", 5u, 1077u, 0u},
+ {"iexcl", 5u, 161u, 0u},
+ {"iexcl;", 6u, 161u, 0u},
+ {"iff;", 4u, 8660u, 0u},
+ {"ifr;", 4u, 120102u, 0u},
+ {"igrave", 6u, 236u, 0u},
+ {"igrave;", 7u, 236u, 0u},
+ {"ii;", 3u, 8520u, 0u},
+ {"iiiint;", 7u, 10764u, 0u},
+ {"iiint;", 6u, 8749u, 0u},
+ {"iinfin;", 7u, 10716u, 0u},
+ {"iiota;", 6u, 8489u, 0u},
+ {"ijlig;", 6u, 307u, 0u},
+ {"imacr;", 6u, 299u, 0u},
+ {"image;", 6u, 8465u, 0u},
+ {"imagline;", 9u, 8464u, 0u},
+ {"imagpart;", 9u, 8465u, 0u},
+ {"imath;", 6u, 305u, 0u},
+ {"imof;", 5u, 8887u, 0u},
+ {"imped;", 6u, 437u, 0u},
+ {"in;", 3u, 8712u, 0u},
+ {"incare;", 7u, 8453u, 0u},
+ {"infin;", 6u, 8734u, 0u},
+ {"infintie;", 9u, 10717u, 0u},
+ {"inodot;", 7u, 305u, 0u},
+ {"int;", 4u, 8747u, 0u},
+ {"intcal;", 7u, 8890u, 0u},
+ {"integers;", 9u, 8484u, 0u},
+ {"intercal;", 9u, 8890u, 0u},
+ {"intlarhk;", 9u, 10775u, 0u},
+ {"intprod;", 8u, 10812u, 0u},
+ {"iocy;", 5u, 1105u, 0u},
+ {"iogon;", 6u, 303u, 0u},
+ {"iopf;", 5u, 120154u, 0u},
+ {"iota;", 5u, 953u, 0u},
+ {"iprod;", 6u, 10812u, 0u},
+ {"iquest", 6u, 191u, 0u},
+ {"iquest;", 7u, 191u, 0u},
+ {"iscr;", 5u, 119998u, 0u},
+ {"isin;", 5u, 8712u, 0u},
+ {"isinE;", 6u, 8953u, 0u},
+ {"isindot;", 8u, 8949u, 0u},
+ {"isins;", 6u, 8948u, 0u},
+ {"isinsv;", 7u, 8947u, 0u},
+ {"isinv;", 6u, 8712u, 0u},
+ {"it;", 3u, 8290u, 0u},
+ {"itilde;", 7u, 297u, 0u},
+ {"iukcy;", 6u, 1110u, 0u},
+ {"iuml", 4u, 239u, 0u},
+ {"iuml;", 5u, 239u, 0u},
+ {"jcirc;", 6u, 309u, 0u},
+ {"jcy;", 4u, 1081u, 0u},
+ {"jfr;", 4u, 120103u, 0u},
+ {"jmath;", 6u, 567u, 0u},
+ {"jopf;", 5u, 120155u, 0u},
+ {"jscr;", 5u, 119999u, 0u},
+ {"jsercy;", 7u, 1112u, 0u},
+ {"jukcy;", 6u, 1108u, 0u},
+ {"kappa;", 6u, 954u, 0u},
+ {"kappav;", 7u, 1008u, 0u},
+ {"kcedil;", 7u, 311u, 0u},
+ {"kcy;", 4u, 1082u, 0u},
+ {"kfr;", 4u, 120104u, 0u},
+ {"kgreen;", 7u, 312u, 0u},
+ {"khcy;", 5u, 1093u, 0u},
+ {"kjcy;", 5u, 1116u, 0u},
+ {"kopf;", 5u, 120156u, 0u},
+ {"kscr;", 5u, 120000u, 0u},
+ {"lAarr;", 6u, 8666u, 0u},
+ {"lArr;", 5u, 8656u, 0u},
+ {"lAtail;", 7u, 10523u, 0u},
+ {"lBarr;", 6u, 10510u, 0u},
+ {"lE;", 3u, 8806u, 0u},
+ {"lEg;", 4u, 10891u, 0u},
+ {"lHar;", 5u, 10594u, 0u},
+ {"lacute;", 7u, 314u, 0u},
+ {"laemptyv;", 9u, 10676u, 0u},
+ {"lagran;", 7u, 8466u, 0u},
+ {"lambda;", 7u, 955u, 0u},
+ {"lang;", 5u, 10216u, 0u},
+ {"langd;", 6u, 10641u, 0u},
+ {"langle;", 7u, 10216u, 0u},
+ {"lap;", 4u, 10885u, 0u},
+ {"laquo", 5u, 171u, 0u},
+ {"laquo;", 6u, 171u, 0u},
+ {"larr;", 5u, 8592u, 0u},
+ {"larrb;", 6u, 8676u, 0u},
+ {"larrbfs;", 8u, 10527u, 0u},
+ {"larrfs;", 7u, 10525u, 0u},
+ {"larrhk;", 7u, 8617u, 0u},
+ {"larrlp;", 7u, 8619u, 0u},
+ {"larrpl;", 7u, 10553u, 0u},
+ {"larrsim;", 8u, 10611u, 0u},
+ {"larrtl;", 7u, 8610u, 0u},
+ {"lat;", 4u, 10923u, 0u},
+ {"latail;", 7u, 10521u, 0u},
+ {"late;", 5u, 10925u, 0u},
+ {"lates;", 6u, 10925u, 65024u},
+ {"lbarr;", 6u, 10508u, 0u},
+ {"lbbrk;", 6u, 10098u, 0u},
+ {"lbrace;", 7u, 123u, 0u},
+ {"lbrack;", 7u, 91u, 0u},
+ {"lbrke;", 6u, 10635u, 0u},
+ {"lbrksld;", 8u, 10639u, 0u},
+ {"lbrkslu;", 8u, 10637u, 0u},
+ {"lcaron;", 7u, 318u, 0u},
+ {"lcedil;", 7u, 316u, 0u},
+ {"lceil;", 6u, 8968u, 0u},
+ {"lcub;", 5u, 123u, 0u},
+ {"lcy;", 4u, 1083u, 0u},
+ {"ldca;", 5u, 10550u, 0u},
+ {"ldquo;", 6u, 8220u, 0u},
+ {"ldquor;", 7u, 8222u, 0u},
+ {"ldrdhar;", 8u, 10599u, 0u},
+ {"ldrushar;", 9u, 10571u, 0u},
+ {"ldsh;", 5u, 8626u, 0u},
+ {"le;", 3u, 8804u, 0u},
+ {"leftarrow;", 10u, 8592u, 0u},
+ {"leftarrowtail;", 14u, 8610u, 0u},
+ {"leftharpoondown;", 16u, 8637u, 0u},
+ {"leftharpoonup;", 14u, 8636u, 0u},
+ {"leftleftarrows;", 15u, 8647u, 0u},
+ {"leftrightarrow;", 15u, 8596u, 0u},
+ {"leftrightarrows;", 16u, 8646u, 0u},
+ {"leftrightharpoons;", 18u, 8651u, 0u},
+ {"leftrightsquigarrow;", 20u, 8621u, 0u},
+ {"leftthreetimes;", 15u, 8907u, 0u},
+ {"leg;", 4u, 8922u, 0u},
+ {"leq;", 4u, 8804u, 0u},
+ {"leqq;", 5u, 8806u, 0u},
+ {"leqslant;", 9u, 10877u, 0u},
+ {"les;", 4u, 10877u, 0u},
+ {"lescc;", 6u, 10920u, 0u},
+ {"lesdot;", 7u, 10879u, 0u},
+ {"lesdoto;", 8u, 10881u, 0u},
+ {"lesdotor;", 9u, 10883u, 0u},
+ {"lesg;", 5u, 8922u, 65024u},
+ {"lesges;", 7u, 10899u, 0u},
+ {"lessapprox;", 11u, 10885u, 0u},
+ {"lessdot;", 8u, 8918u, 0u},
+ {"lesseqgtr;", 10u, 8922u, 0u},
+ {"lesseqqgtr;", 11u, 10891u, 0u},
+ {"lessgtr;", 8u, 8822u, 0u},
+ {"lesssim;", 8u, 8818u, 0u},
+ {"lfisht;", 7u, 10620u, 0u},
+ {"lfloor;", 7u, 8970u, 0u},
+ {"lfr;", 4u, 120105u, 0u},
+ {"lg;", 3u, 8822u, 0u},
+ {"lgE;", 4u, 10897u, 0u},
+ {"lhard;", 6u, 8637u, 0u},
+ {"lharu;", 6u, 8636u, 0u},
+ {"lharul;", 7u, 10602u, 0u},
+ {"lhblk;", 6u, 9604u, 0u},
+ {"ljcy;", 5u, 1113u, 0u},
+ {"ll;", 3u, 8810u, 0u},
+ {"llarr;", 6u, 8647u, 0u},
+ {"llcorner;", 9u, 8990u, 0u},
+ {"llhard;", 7u, 10603u, 0u},
+ {"lltri;", 6u, 9722u, 0u},
+ {"lmidot;", 7u, 320u, 0u},
+ {"lmoust;", 7u, 9136u, 0u},
+ {"lmoustache;", 11u, 9136u, 0u},
+ {"lnE;", 4u, 8808u, 0u},
+ {"lnap;", 5u, 10889u, 0u},
+ {"lnapprox;", 9u, 10889u, 0u},
+ {"lne;", 4u, 10887u, 0u},
+ {"lneq;", 5u, 10887u, 0u},
+ {"lneqq;", 6u, 8808u, 0u},
+ {"lnsim;", 6u, 8934u, 0u},
+ {"loang;", 6u, 10220u, 0u},
+ {"loarr;", 6u, 8701u, 0u},
+ {"lobrk;", 6u, 10214u, 0u},
+ {"longleftarrow;", 14u, 10229u, 0u},
+ {"longleftrightarrow;", 19u, 10231u, 0u},
+ {"longmapsto;", 11u, 10236u, 0u},
+ {"longrightarrow;", 15u, 10230u, 0u},
+ {"looparrowleft;", 14u, 8619u, 0u},
+ {"looparrowright;", 15u, 8620u, 0u},
+ {"lopar;", 6u, 10629u, 0u},
+ {"lopf;", 5u, 120157u, 0u},
+ {"loplus;", 7u, 10797u, 0u},
+ {"lotimes;", 8u, 10804u, 0u},
+ {"lowast;", 7u, 8727u, 0u},
+ {"lowbar;", 7u, 95u, 0u},
+ {"loz;", 4u, 9674u, 0u},
+ {"lozenge;", 8u, 9674u, 0u},
+ {"lozf;", 5u, 10731u, 0u},
+ {"lpar;", 5u, 40u, 0u},
+ {"lparlt;", 7u, 10643u, 0u},
+ {"lrarr;", 6u, 8646u, 0u},
+ {"lrcorner;", 9u, 8991u, 0u},
+ {"lrhar;", 6u, 8651u, 0u},
+ {"lrhard;", 7u, 10605u, 0u},
+ {"lrm;", 4u, 8206u, 0u},
+ {"lrtri;", 6u, 8895u, 0u},
+ {"lsaquo;", 7u, 8249u, 0u},
+ {"lscr;", 5u, 120001u, 0u},
+ {"lsh;", 4u, 8624u, 0u},
+ {"lsim;", 5u, 8818u, 0u},
+ {"lsime;", 6u, 10893u, 0u},
+ {"lsimg;", 6u, 10895u, 0u},
+ {"lsqb;", 5u, 91u, 0u},
+ {"lsquo;", 6u, 8216u, 0u},
+ {"lsquor;", 7u, 8218u, 0u},
+ {"lstrok;", 7u, 322u, 0u},
+ {"lt", 2u, 60u, 0u},
+ {"lt;", 3u, 60u, 0u},
+ {"ltcc;", 5u, 10918u, 0u},
+ {"ltcir;", 6u, 10873u, 0u},
+ {"ltdot;", 6u, 8918u, 0u},
+ {"lthree;", 7u, 8907u, 0u},
+ {"ltimes;", 7u, 8905u, 0u},
+ {"ltlarr;", 7u, 10614u, 0u},
+ {"ltquest;", 8u, 10875u, 0u},
+ {"ltrPar;", 7u, 10646u, 0u},
+ {"ltri;", 5u, 9667u, 0u},
+ {"ltrie;", 6u, 8884u, 0u},
+ {"ltrif;", 6u, 9666u, 0u},
+ {"lurdshar;", 9u, 10570u, 0u},
+ {"luruhar;", 8u, 10598u, 0u},
+ {"lvertneqq;", 10u, 8808u, 65024u},
+ {"lvnE;", 5u, 8808u, 65024u},
+ {"mDDot;", 6u, 8762u, 0u},
+ {"macr", 4u, 175u, 0u},
+ {"macr;", 5u, 175u, 0u},
+ {"male;", 5u, 9794u, 0u},
+ {"malt;", 5u, 10016u, 0u},
+ {"maltese;", 8u, 10016u, 0u},
+ {"map;", 4u, 8614u, 0u},
+ {"mapsto;", 7u, 8614u, 0u},
+ {"mapstodown;", 11u, 8615u, 0u},
+ {"mapstoleft;", 11u, 8612u, 0u},
+ {"mapstoup;", 9u, 8613u, 0u},
+ {"marker;", 7u, 9646u, 0u},
+ {"mcomma;", 7u, 10793u, 0u},
+ {"mcy;", 4u, 1084u, 0u},
+ {"mdash;", 6u, 8212u, 0u},
+ {"measuredangle;", 14u, 8737u, 0u},
+ {"mfr;", 4u, 120106u, 0u},
+ {"mho;", 4u, 8487u, 0u},
+ {"micro", 5u, 181u, 0u},
+ {"micro;", 6u, 181u, 0u},
+ {"mid;", 4u, 8739u, 0u},
+ {"midast;", 7u, 42u, 0u},
+ {"midcir;", 7u, 10992u, 0u},
+ {"middot", 6u, 183u, 0u},
+ {"middot;", 7u, 183u, 0u},
+ {"minus;", 6u, 8722u, 0u},
+ {"minusb;", 7u, 8863u, 0u},
+ {"minusd;", 7u, 8760u, 0u},
+ {"minusdu;", 8u, 10794u, 0u},
+ {"mlcp;", 5u, 10971u, 0u},
+ {"mldr;", 5u, 8230u, 0u},
+ {"mnplus;", 7u, 8723u, 0u},
+ {"models;", 7u, 8871u, 0u},
+ {"mopf;", 5u, 120158u, 0u},
+ {"mp;", 3u, 8723u, 0u},
+ {"mscr;", 5u, 120002u, 0u},
+ {"mstpos;", 7u, 8766u, 0u},
+ {"mu;", 3u, 956u, 0u},
+ {"multimap;", 9u, 8888u, 0u},
+ {"mumap;", 6u, 8888u, 0u},
+ {"nGg;", 4u, 8921u, 824u},
+ {"nGt;", 4u, 8811u, 8402u},
+ {"nGtv;", 5u, 8811u, 824u},
+ {"nLeftarrow;", 11u, 8653u, 0u},
+ {"nLeftrightarrow;", 16u, 8654u, 0u},
+ {"nLl;", 4u, 8920u, 824u},
+ {"nLt;", 4u, 8810u, 8402u},
+ {"nLtv;", 5u, 8810u, 824u},
+ {"nRightarrow;", 12u, 8655u, 0u},
+ {"nVDash;", 7u, 8879u, 0u},
+ {"nVdash;", 7u, 8878u, 0u},
+ {"nabla;", 6u, 8711u, 0u},
+ {"nacute;", 7u, 324u, 0u},
+ {"nang;", 5u, 8736u, 8402u},
+ {"nap;", 4u, 8777u, 0u},
+ {"napE;", 5u, 10864u, 824u},
+ {"napid;", 6u, 8779u, 824u},
+ {"napos;", 6u, 329u, 0u},
+ {"napprox;", 8u, 8777u, 0u},
+ {"natur;", 6u, 9838u, 0u},
+ {"natural;", 8u, 9838u, 0u},
+ {"naturals;", 9u, 8469u, 0u},
+ {"nbsp", 4u, 160u, 0u},
+ {"nbsp;", 5u, 160u, 0u},
+ {"nbump;", 6u, 8782u, 824u},
+ {"nbumpe;", 7u, 8783u, 824u},
+ {"ncap;", 5u, 10819u, 0u},
+ {"ncaron;", 7u, 328u, 0u},
+ {"ncedil;", 7u, 326u, 0u},
+ {"ncong;", 6u, 8775u, 0u},
+ {"ncongdot;", 9u, 10861u, 824u},
+ {"ncup;", 5u, 10818u, 0u},
+ {"ncy;", 4u, 1085u, 0u},
+ {"ndash;", 6u, 8211u, 0u},
+ {"ne;", 3u, 8800u, 0u},
+ {"neArr;", 6u, 8663u, 0u},
+ {"nearhk;", 7u, 10532u, 0u},
+ {"nearr;", 6u, 8599u, 0u},
+ {"nearrow;", 8u, 8599u, 0u},
+ {"nedot;", 6u, 8784u, 824u},
+ {"nequiv;", 7u, 8802u, 0u},
+ {"nesear;", 7u, 10536u, 0u},
+ {"nesim;", 6u, 8770u, 824u},
+ {"nexist;", 7u, 8708u, 0u},
+ {"nexists;", 8u, 8708u, 0u},
+ {"nfr;", 4u, 120107u, 0u},
+ {"ngE;", 4u, 8807u, 824u},
+ {"nge;", 4u, 8817u, 0u},
+ {"ngeq;", 5u, 8817u, 0u},
+ {"ngeqq;", 6u, 8807u, 824u},
+ {"ngeqslant;", 10u, 10878u, 824u},
+ {"nges;", 5u, 10878u, 824u},
+ {"ngsim;", 6u, 8821u, 0u},
+ {"ngt;", 4u, 8815u, 0u},
+ {"ngtr;", 5u, 8815u, 0u},
+ {"nhArr;", 6u, 8654u, 0u},
+ {"nharr;", 6u, 8622u, 0u},
+ {"nhpar;", 6u, 10994u, 0u},
+ {"ni;", 3u, 8715u, 0u},
+ {"nis;", 4u, 8956u, 0u},
+ {"nisd;", 5u, 8954u, 0u},
+ {"niv;", 4u, 8715u, 0u},
+ {"njcy;", 5u, 1114u, 0u},
+ {"nlArr;", 6u, 8653u, 0u},
+ {"nlE;", 4u, 8806u, 824u},
+ {"nlarr;", 6u, 8602u, 0u},
+ {"nldr;", 5u, 8229u, 0u},
+ {"nle;", 4u, 8816u, 0u},
+ {"nleftarrow;", 11u, 8602u, 0u},
+ {"nleftrightarrow;", 16u, 8622u, 0u},
+ {"nleq;", 5u, 8816u, 0u},
+ {"nleqq;", 6u, 8806u, 824u},
+ {"nleqslant;", 10u, 10877u, 824u},
+ {"nles;", 5u, 10877u, 824u},
+ {"nless;", 6u, 8814u, 0u},
+ {"nlsim;", 6u, 8820u, 0u},
+ {"nlt;", 4u, 8814u, 0u},
+ {"nltri;", 6u, 8938u, 0u},
+ {"nltrie;", 7u, 8940u, 0u},
+ {"nmid;", 5u, 8740u, 0u},
+ {"nopf;", 5u, 120159u, 0u},
+ {"not", 3u, 172u, 0u},
+ {"not;", 4u, 172u, 0u},
+ {"notin;", 6u, 8713u, 0u},
+ {"notinE;", 7u, 8953u, 824u},
+ {"notindot;", 9u, 8949u, 824u},
+ {"notinva;", 8u, 8713u, 0u},
+ {"notinvb;", 8u, 8951u, 0u},
+ {"notinvc;", 8u, 8950u, 0u},
+ {"notni;", 6u, 8716u, 0u},
+ {"notniva;", 8u, 8716u, 0u},
+ {"notnivb;", 8u, 8958u, 0u},
+ {"notnivc;", 8u, 8957u, 0u},
+ {"npar;", 5u, 8742u, 0u},
+ {"nparallel;", 10u, 8742u, 0u},
+ {"nparsl;", 7u, 11005u, 8421u},
+ {"npart;", 6u, 8706u, 824u},
+ {"npolint;", 8u, 10772u, 0u},
+ {"npr;", 4u, 8832u, 0u},
+ {"nprcue;", 7u, 8928u, 0u},
+ {"npre;", 5u, 10927u, 824u},
+ {"nprec;", 6u, 8832u, 0u},
+ {"npreceq;", 8u, 10927u, 824u},
+ {"nrArr;", 6u, 8655u, 0u},
+ {"nrarr;", 6u, 8603u, 0u},
+ {"nrarrc;", 7u, 10547u, 824u},
+ {"nrarrw;", 7u, 8605u, 824u},
+ {"nrightarrow;", 12u, 8603u, 0u},
+ {"nrtri;", 6u, 8939u, 0u},
+ {"nrtrie;", 7u, 8941u, 0u},
+ {"nsc;", 4u, 8833u, 0u},
+ {"nsccue;", 7u, 8929u, 0u},
+ {"nsce;", 5u, 10928u, 824u},
+ {"nscr;", 5u, 120003u, 0u},
+ {"nshortmid;", 10u, 8740u, 0u},
+ {"nshortparallel;", 15u, 8742u, 0u},
+ {"nsim;", 5u, 8769u, 0u},
+ {"nsime;", 6u, 8772u, 0u},
+ {"nsimeq;", 7u, 8772u, 0u},
+ {"nsmid;", 6u, 8740u, 0u},
+ {"nspar;", 6u, 8742u, 0u},
+ {"nsqsube;", 8u, 8930u, 0u},
+ {"nsqsupe;", 8u, 8931u, 0u},
+ {"nsub;", 5u, 8836u, 0u},
+ {"nsubE;", 6u, 10949u, 824u},
+ {"nsube;", 6u, 8840u, 0u},
+ {"nsubset;", 8u, 8834u, 8402u},
+ {"nsubseteq;", 10u, 8840u, 0u},
+ {"nsubseteqq;", 11u, 10949u, 824u},
+ {"nsucc;", 6u, 8833u, 0u},
+ {"nsucceq;", 8u, 10928u, 824u},
+ {"nsup;", 5u, 8837u, 0u},
+ {"nsupE;", 6u, 10950u, 824u},
+ {"nsupe;", 6u, 8841u, 0u},
+ {"nsupset;", 8u, 8835u, 8402u},
+ {"nsupseteq;", 10u, 8841u, 0u},
+ {"nsupseteqq;", 11u, 10950u, 824u},
+ {"ntgl;", 5u, 8825u, 0u},
+ {"ntilde", 6u, 241u, 0u},
+ {"ntilde;", 7u, 241u, 0u},
+ {"ntlg;", 5u, 8824u, 0u},
+ {"ntriangleleft;", 14u, 8938u, 0u},
+ {"ntrianglelefteq;", 16u, 8940u, 0u},
+ {"ntriangleright;", 15u, 8939u, 0u},
+ {"ntrianglerighteq;", 17u, 8941u, 0u},
+ {"nu;", 3u, 957u, 0u},
+ {"num;", 4u, 35u, 0u},
+ {"numero;", 7u, 8470u, 0u},
+ {"numsp;", 6u, 8199u, 0u},
+ {"nvDash;", 7u, 8877u, 0u},
+ {"nvHarr;", 7u, 10500u, 0u},
+ {"nvap;", 5u, 8781u, 8402u},
+ {"nvdash;", 7u, 8876u, 0u},
+ {"nvge;", 5u, 8805u, 8402u},
+ {"nvgt;", 5u, 62u, 8402u},
+ {"nvinfin;", 8u, 10718u, 0u},
+ {"nvlArr;", 7u, 10498u, 0u},
+ {"nvle;", 5u, 8804u, 8402u},
+ {"nvlt;", 5u, 60u, 8402u},
+ {"nvltrie;", 8u, 8884u, 8402u},
+ {"nvrArr;", 7u, 10499u, 0u},
+ {"nvrtrie;", 8u, 8885u, 8402u},
+ {"nvsim;", 6u, 8764u, 8402u},
+ {"nwArr;", 6u, 8662u, 0u},
+ {"nwarhk;", 7u, 10531u, 0u},
+ {"nwarr;", 6u, 8598u, 0u},
+ {"nwarrow;", 8u, 8598u, 0u},
+ {"nwnear;", 7u, 10535u, 0u},
+ {"oS;", 3u, 9416u, 0u},
+ {"oacute", 6u, 243u, 0u},
+ {"oacute;", 7u, 243u, 0u},
+ {"oast;", 5u, 8859u, 0u},
+ {"ocir;", 5u, 8858u, 0u},
+ {"ocirc", 5u, 244u, 0u},
+ {"ocirc;", 6u, 244u, 0u},
+ {"ocy;", 4u, 1086u, 0u},
+ {"odash;", 6u, 8861u, 0u},
+ {"odblac;", 7u, 337u, 0u},
+ {"odiv;", 5u, 10808u, 0u},
+ {"odot;", 5u, 8857u, 0u},
+ {"odsold;", 7u, 10684u, 0u},
+ {"oelig;", 6u, 339u, 0u},
+ {"ofcir;", 6u, 10687u, 0u},
+ {"ofr;", 4u, 120108u, 0u},
+ {"ogon;", 5u, 731u, 0u},
+ {"ograve", 6u, 242u, 0u},
+ {"ograve;", 7u, 242u, 0u},
+ {"ogt;", 4u, 10689u, 0u},
+ {"ohbar;", 6u, 10677u, 0u},
+ {"ohm;", 4u, 937u, 0u},
+ {"oint;", 5u, 8750u, 0u},
+ {"olarr;", 6u, 8634u, 0u},
+ {"olcir;", 6u, 10686u, 0u},
+ {"olcross;", 8u, 10683u, 0u},
+ {"oline;", 6u, 8254u, 0u},
+ {"olt;", 4u, 10688u, 0u},
+ {"omacr;", 6u, 333u, 0u},
+ {"omega;", 6u, 969u, 0u},
+ {"omicron;", 8u, 959u, 0u},
+ {"omid;", 5u, 10678u, 0u},
+ {"ominus;", 7u, 8854u, 0u},
+ {"oopf;", 5u, 120160u, 0u},
+ {"opar;", 5u, 10679u, 0u},
+ {"operp;", 6u, 10681u, 0u},
+ {"oplus;", 6u, 8853u, 0u},
+ {"or;", 3u, 8744u, 0u},
+ {"orarr;", 6u, 8635u, 0u},
+ {"ord;", 4u, 10845u, 0u},
+ {"order;", 6u, 8500u, 0u},
+ {"orderof;", 8u, 8500u, 0u},
+ {"ordf", 4u, 170u, 0u},
+ {"ordf;", 5u, 170u, 0u},
+ {"ordm", 4u, 186u, 0u},
+ {"ordm;", 5u, 186u, 0u},
+ {"origof;", 7u, 8886u, 0u},
+ {"oror;", 5u, 10838u, 0u},
+ {"orslope;", 8u, 10839u, 0u},
+ {"orv;", 4u, 10843u, 0u},
+ {"oscr;", 5u, 8500u, 0u},
+ {"oslash", 6u, 248u, 0u},
+ {"oslash;", 7u, 248u, 0u},
+ {"osol;", 5u, 8856u, 0u},
+ {"otilde", 6u, 245u, 0u},
+ {"otilde;", 7u, 245u, 0u},
+ {"otimes;", 7u, 8855u, 0u},
+ {"otimesas;", 9u, 10806u, 0u},
+ {"ouml", 4u, 246u, 0u},
+ {"ouml;", 5u, 246u, 0u},
+ {"ovbar;", 6u, 9021u, 0u},
+ {"par;", 4u, 8741u, 0u},
+ {"para", 4u, 182u, 0u},
+ {"para;", 5u, 182u, 0u},
+ {"parallel;", 9u, 8741u, 0u},
+ {"parsim;", 7u, 10995u, 0u},
+ {"parsl;", 6u, 11005u, 0u},
+ {"part;", 5u, 8706u, 0u},
+ {"pcy;", 4u, 1087u, 0u},
+ {"percnt;", 7u, 37u, 0u},
+ {"period;", 7u, 46u, 0u},
+ {"permil;", 7u, 8240u, 0u},
+ {"perp;", 5u, 8869u, 0u},
+ {"pertenk;", 8u, 8241u, 0u},
+ {"pfr;", 4u, 120109u, 0u},
+ {"phi;", 4u, 966u, 0u},
+ {"phiv;", 5u, 981u, 0u},
+ {"phmmat;", 7u, 8499u, 0u},
+ {"phone;", 6u, 9742u, 0u},
+ {"pi;", 3u, 960u, 0u},
+ {"pitchfork;", 10u, 8916u, 0u},
+ {"piv;", 4u, 982u, 0u},
+ {"planck;", 7u, 8463u, 0u},
+ {"planckh;", 8u, 8462u, 0u},
+ {"plankv;", 7u, 8463u, 0u},
+ {"plus;", 5u, 43u, 0u},
+ {"plusacir;", 9u, 10787u, 0u},
+ {"plusb;", 6u, 8862u, 0u},
+ {"pluscir;", 8u, 10786u, 0u},
+ {"plusdo;", 7u, 8724u, 0u},
+ {"plusdu;", 7u, 10789u, 0u},
+ {"pluse;", 6u, 10866u, 0u},
+ {"plusmn", 6u, 177u, 0u},
+ {"plusmn;", 7u, 177u, 0u},
+ {"plussim;", 8u, 10790u, 0u},
+ {"plustwo;", 8u, 10791u, 0u},
+ {"pm;", 3u, 177u, 0u},
+ {"pointint;", 9u, 10773u, 0u},
+ {"popf;", 5u, 120161u, 0u},
+ {"pound", 5u, 163u, 0u},
+ {"pound;", 6u, 163u, 0u},
+ {"pr;", 3u, 8826u, 0u},
+ {"prE;", 4u, 10931u, 0u},
+ {"prap;", 5u, 10935u, 0u},
+ {"prcue;", 6u, 8828u, 0u},
+ {"pre;", 4u, 10927u, 0u},
+ {"prec;", 5u, 8826u, 0u},
+ {"precapprox;", 11u, 10935u, 0u},
+ {"preccurlyeq;", 12u, 8828u, 0u},
+ {"preceq;", 7u, 10927u, 0u},
+ {"precnapprox;", 12u, 10937u, 0u},
+ {"precneqq;", 9u, 10933u, 0u},
+ {"precnsim;", 9u, 8936u, 0u},
+ {"precsim;", 8u, 8830u, 0u},
+ {"prime;", 6u, 8242u, 0u},
+ {"primes;", 7u, 8473u, 0u},
+ {"prnE;", 5u, 10933u, 0u},
+ {"prnap;", 6u, 10937u, 0u},
+ {"prnsim;", 7u, 8936u, 0u},
+ {"prod;", 5u, 8719u, 0u},
+ {"profalar;", 9u, 9006u, 0u},
+ {"profline;", 9u, 8978u, 0u},
+ {"profsurf;", 9u, 8979u, 0u},
+ {"prop;", 5u, 8733u, 0u},
+ {"propto;", 7u, 8733u, 0u},
+ {"prsim;", 6u, 8830u, 0u},
+ {"prurel;", 7u, 8880u, 0u},
+ {"pscr;", 5u, 120005u, 0u},
+ {"psi;", 4u, 968u, 0u},
+ {"puncsp;", 7u, 8200u, 0u},
+ {"qfr;", 4u, 120110u, 0u},
+ {"qint;", 5u, 10764u, 0u},
+ {"qopf;", 5u, 120162u, 0u},
+ {"qprime;", 7u, 8279u, 0u},
+ {"qscr;", 5u, 120006u, 0u},
+ {"quaternions;", 12u, 8461u, 0u},
+ {"quatint;", 8u, 10774u, 0u},
+ {"quest;", 6u, 63u, 0u},
+ {"questeq;", 8u, 8799u, 0u},
+ {"quot", 4u, 34u, 0u},
+ {"quot;", 5u, 34u, 0u},
+ {"rAarr;", 6u, 8667u, 0u},
+ {"rArr;", 5u, 8658u, 0u},
+ {"rAtail;", 7u, 10524u, 0u},
+ {"rBarr;", 6u, 10511u, 0u},
+ {"rHar;", 5u, 10596u, 0u},
+ {"race;", 5u, 8765u, 817u},
+ {"racute;", 7u, 341u, 0u},
+ {"radic;", 6u, 8730u, 0u},
+ {"raemptyv;", 9u, 10675u, 0u},
+ {"rang;", 5u, 10217u, 0u},
+ {"rangd;", 6u, 10642u, 0u},
+ {"range;", 6u, 10661u, 0u},
+ {"rangle;", 7u, 10217u, 0u},
+ {"raquo", 5u, 187u, 0u},
+ {"raquo;", 6u, 187u, 0u},
+ {"rarr;", 5u, 8594u, 0u},
+ {"rarrap;", 7u, 10613u, 0u},
+ {"rarrb;", 6u, 8677u, 0u},
+ {"rarrbfs;", 8u, 10528u, 0u},
+ {"rarrc;", 6u, 10547u, 0u},
+ {"rarrfs;", 7u, 10526u, 0u},
+ {"rarrhk;", 7u, 8618u, 0u},
+ {"rarrlp;", 7u, 8620u, 0u},
+ {"rarrpl;", 7u, 10565u, 0u},
+ {"rarrsim;", 8u, 10612u, 0u},
+ {"rarrtl;", 7u, 8611u, 0u},
+ {"rarrw;", 6u, 8605u, 0u},
+ {"ratail;", 7u, 10522u, 0u},
+ {"ratio;", 6u, 8758u, 0u},
+ {"rationals;", 10u, 8474u, 0u},
+ {"rbarr;", 6u, 10509u, 0u},
+ {"rbbrk;", 6u, 10099u, 0u},
+ {"rbrace;", 7u, 125u, 0u},
+ {"rbrack;", 7u, 93u, 0u},
+ {"rbrke;", 6u, 10636u, 0u},
+ {"rbrksld;", 8u, 10638u, 0u},
+ {"rbrkslu;", 8u, 10640u, 0u},
+ {"rcaron;", 7u, 345u, 0u},
+ {"rcedil;", 7u, 343u, 0u},
+ {"rceil;", 6u, 8969u, 0u},
+ {"rcub;", 5u, 125u, 0u},
+ {"rcy;", 4u, 1088u, 0u},
+ {"rdca;", 5u, 10551u, 0u},
+ {"rdldhar;", 8u, 10601u, 0u},
+ {"rdquo;", 6u, 8221u, 0u},
+ {"rdquor;", 7u, 8221u, 0u},
+ {"rdsh;", 5u, 8627u, 0u},
+ {"real;", 5u, 8476u, 0u},
+ {"realine;", 8u, 8475u, 0u},
+ {"realpart;", 9u, 8476u, 0u},
+ {"reals;", 6u, 8477u, 0u},
+ {"rect;", 5u, 9645u, 0u},
+ {"reg", 3u, 174u, 0u},
+ {"reg;", 4u, 174u, 0u},
+ {"rfisht;", 7u, 10621u, 0u},
+ {"rfloor;", 7u, 8971u, 0u},
+ {"rfr;", 4u, 120111u, 0u},
+ {"rhard;", 6u, 8641u, 0u},
+ {"rharu;", 6u, 8640u, 0u},
+ {"rharul;", 7u, 10604u, 0u},
+ {"rho;", 4u, 961u, 0u},
+ {"rhov;", 5u, 1009u, 0u},
+ {"rightarrow;", 11u, 8594u, 0u},
+ {"rightarrowtail;", 15u, 8611u, 0u},
+ {"rightharpoondown;", 17u, 8641u, 0u},
+ {"rightharpoonup;", 15u, 8640u, 0u},
+ {"rightleftarrows;", 16u, 8644u, 0u},
+ {"rightleftharpoons;", 18u, 8652u, 0u},
+ {"rightrightarrows;", 17u, 8649u, 0u},
+ {"rightsquigarrow;", 16u, 8605u, 0u},
+ {"rightthreetimes;", 16u, 8908u, 0u},
+ {"ring;", 5u, 730u, 0u},
+ {"risingdotseq;", 13u, 8787u, 0u},
+ {"rlarr;", 6u, 8644u, 0u},
+ {"rlhar;", 6u, 8652u, 0u},
+ {"rlm;", 4u, 8207u, 0u},
+ {"rmoust;", 7u, 9137u, 0u},
+ {"rmoustache;", 11u, 9137u, 0u},
+ {"rnmid;", 6u, 10990u, 0u},
+ {"roang;", 6u, 10221u, 0u},
+ {"roarr;", 6u, 8702u, 0u},
+ {"robrk;", 6u, 10215u, 0u},
+ {"ropar;", 6u, 10630u, 0u},
+ {"ropf;", 5u, 120163u, 0u},
+ {"roplus;", 7u, 10798u, 0u},
+ {"rotimes;", 8u, 10805u, 0u},
+ {"rpar;", 5u, 41u, 0u},
+ {"rpargt;", 7u, 10644u, 0u},
+ {"rppolint;", 9u, 10770u, 0u},
+ {"rrarr;", 6u, 8649u, 0u},
+ {"rsaquo;", 7u, 8250u, 0u},
+ {"rscr;", 5u, 120007u, 0u},
+ {"rsh;", 4u, 8625u, 0u},
+ {"rsqb;", 5u, 93u, 0u},
+ {"rsquo;", 6u, 8217u, 0u},
+ {"rsquor;", 7u, 8217u, 0u},
+ {"rthree;", 7u, 8908u, 0u},
+ {"rtimes;", 7u, 8906u, 0u},
+ {"rtri;", 5u, 9657u, 0u},
+ {"rtrie;", 6u, 8885u, 0u},
+ {"rtrif;", 6u, 9656u, 0u},
+ {"rtriltri;", 9u, 10702u, 0u},
+ {"ruluhar;", 8u, 10600u, 0u},
+ {"rx;", 3u, 8478u, 0u},
+ {"sacute;", 7u, 347u, 0u},
+ {"sbquo;", 6u, 8218u, 0u},
+ {"sc;", 3u, 8827u, 0u},
+ {"scE;", 4u, 10932u, 0u},
+ {"scap;", 5u, 10936u, 0u},
+ {"scaron;", 7u, 353u, 0u},
+ {"sccue;", 6u, 8829u, 0u},
+ {"sce;", 4u, 10928u, 0u},
+ {"scedil;", 7u, 351u, 0u},
+ {"scirc;", 6u, 349u, 0u},
+ {"scnE;", 5u, 10934u, 0u},
+ {"scnap;", 6u, 10938u, 0u},
+ {"scnsim;", 7u, 8937u, 0u},
+ {"scpolint;", 9u, 10771u, 0u},
+ {"scsim;", 6u, 8831u, 0u},
+ {"scy;", 4u, 1089u, 0u},
+ {"sdot;", 5u, 8901u, 0u},
+ {"sdotb;", 6u, 8865u, 0u},
+ {"sdote;", 6u, 10854u, 0u},
+ {"seArr;", 6u, 8664u, 0u},
+ {"searhk;", 7u, 10533u, 0u},
+ {"searr;", 6u, 8600u, 0u},
+ {"searrow;", 8u, 8600u, 0u},
+ {"sect", 4u, 167u, 0u},
+ {"sect;", 5u, 167u, 0u},
+ {"semi;", 5u, 59u, 0u},
+ {"seswar;", 7u, 10537u, 0u},
+ {"setminus;", 9u, 8726u, 0u},
+ {"setmn;", 6u, 8726u, 0u},
+ {"sext;", 5u, 10038u, 0u},
+ {"sfr;", 4u, 120112u, 0u},
+ {"sfrown;", 7u, 8994u, 0u},
+ {"sharp;", 6u, 9839u, 0u},
+ {"shchcy;", 7u, 1097u, 0u},
+ {"shcy;", 5u, 1096u, 0u},
+ {"shortmid;", 9u, 8739u, 0u},
+ {"shortparallel;", 14u, 8741u, 0u},
+ {"shy", 3u, 173u, 0u},
+ {"shy;", 4u, 173u, 0u},
+ {"sigma;", 6u, 963u, 0u},
+ {"sigmaf;", 7u, 962u, 0u},
+ {"sigmav;", 7u, 962u, 0u},
+ {"sim;", 4u, 8764u, 0u},
+ {"simdot;", 7u, 10858u, 0u},
+ {"sime;", 5u, 8771u, 0u},
+ {"simeq;", 6u, 8771u, 0u},
+ {"simg;", 5u, 10910u, 0u},
+ {"simgE;", 6u, 10912u, 0u},
+ {"siml;", 5u, 10909u, 0u},
+ {"simlE;", 6u, 10911u, 0u},
+ {"simne;", 6u, 8774u, 0u},
+ {"simplus;", 8u, 10788u, 0u},
+ {"simrarr;", 8u, 10610u, 0u},
+ {"slarr;", 6u, 8592u, 0u},
+ {"smallsetminus;", 14u, 8726u, 0u},
+ {"smashp;", 7u, 10803u, 0u},
+ {"smeparsl;", 9u, 10724u, 0u},
+ {"smid;", 5u, 8739u, 0u},
+ {"smile;", 6u, 8995u, 0u},
+ {"smt;", 4u, 10922u, 0u},
+ {"smte;", 5u, 10924u, 0u},
+ {"smtes;", 6u, 10924u, 65024u},
+ {"softcy;", 7u, 1100u, 0u},
+ {"sol;", 4u, 47u, 0u},
+ {"solb;", 5u, 10692u, 0u},
+ {"solbar;", 7u, 9023u, 0u},
+ {"sopf;", 5u, 120164u, 0u},
+ {"spades;", 7u, 9824u, 0u},
+ {"spadesuit;", 10u, 9824u, 0u},
+ {"spar;", 5u, 8741u, 0u},
+ {"sqcap;", 6u, 8851u, 0u},
+ {"sqcaps;", 7u, 8851u, 65024u},
+ {"sqcup;", 6u, 8852u, 0u},
+ {"sqcups;", 7u, 8852u, 65024u},
+ {"sqsub;", 6u, 8847u, 0u},
+ {"sqsube;", 7u, 8849u, 0u},
+ {"sqsubset;", 9u, 8847u, 0u},
+ {"sqsubseteq;", 11u, 8849u, 0u},
+ {"sqsup;", 6u, 8848u, 0u},
+ {"sqsupe;", 7u, 8850u, 0u},
+ {"sqsupset;", 9u, 8848u, 0u},
+ {"sqsupseteq;", 11u, 8850u, 0u},
+ {"squ;", 4u, 9633u, 0u},
+ {"square;", 7u, 9633u, 0u},
+ {"squarf;", 7u, 9642u, 0u},
+ {"squf;", 5u, 9642u, 0u},
+ {"srarr;", 6u, 8594u, 0u},
+ {"sscr;", 5u, 120008u, 0u},
+ {"ssetmn;", 7u, 8726u, 0u},
+ {"ssmile;", 7u, 8995u, 0u},
+ {"sstarf;", 7u, 8902u, 0u},
+ {"star;", 5u, 9734u, 0u},
+ {"starf;", 6u, 9733u, 0u},
+ {"straightepsilon;", 16u, 1013u, 0u},
+ {"straightphi;", 12u, 981u, 0u},
+ {"strns;", 6u, 175u, 0u},
+ {"sub;", 4u, 8834u, 0u},
+ {"subE;", 5u, 10949u, 0u},
+ {"subdot;", 7u, 10941u, 0u},
+ {"sube;", 5u, 8838u, 0u},
+ {"subedot;", 8u, 10947u, 0u},
+ {"submult;", 8u, 10945u, 0u},
+ {"subnE;", 6u, 10955u, 0u},
+ {"subne;", 6u, 8842u, 0u},
+ {"subplus;", 8u, 10943u, 0u},
+ {"subrarr;", 8u, 10617u, 0u},
+ {"subset;", 7u, 8834u, 0u},
+ {"subseteq;", 9u, 8838u, 0u},
+ {"subseteqq;", 10u, 10949u, 0u},
+ {"subsetneq;", 10u, 8842u, 0u},
+ {"subsetneqq;", 11u, 10955u, 0u},
+ {"subsim;", 7u, 10951u, 0u},
+ {"subsub;", 7u, 10965u, 0u},
+ {"subsup;", 7u, 10963u, 0u},
+ {"succ;", 5u, 8827u, 0u},
+ {"succapprox;", 11u, 10936u, 0u},
+ {"succcurlyeq;", 12u, 8829u, 0u},
+ {"succeq;", 7u, 10928u, 0u},
+ {"succnapprox;", 12u, 10938u, 0u},
+ {"succneqq;", 9u, 10934u, 0u},
+ {"succnsim;", 9u, 8937u, 0u},
+ {"succsim;", 8u, 8831u, 0u},
+ {"sum;", 4u, 8721u, 0u},
+ {"sung;", 5u, 9834u, 0u},
+ {"sup1", 4u, 185u, 0u},
+ {"sup1;", 5u, 185u, 0u},
+ {"sup2", 4u, 178u, 0u},
+ {"sup2;", 5u, 178u, 0u},
+ {"sup3", 4u, 179u, 0u},
+ {"sup3;", 5u, 179u, 0u},
+ {"sup;", 4u, 8835u, 0u},
+ {"supE;", 5u, 10950u, 0u},
+ {"supdot;", 7u, 10942u, 0u},
+ {"supdsub;", 8u, 10968u, 0u},
+ {"supe;", 5u, 8839u, 0u},
+ {"supedot;", 8u, 10948u, 0u},
+ {"suphsol;", 8u, 10185u, 0u},
+ {"suphsub;", 8u, 10967u, 0u},
+ {"suplarr;", 8u, 10619u, 0u},
+ {"supmult;", 8u, 10946u, 0u},
+ {"supnE;", 6u, 10956u, 0u},
+ {"supne;", 6u, 8843u, 0u},
+ {"supplus;", 8u, 10944u, 0u},
+ {"supset;", 7u, 8835u, 0u},
+ {"supseteq;", 9u, 8839u, 0u},
+ {"supseteqq;", 10u, 10950u, 0u},
+ {"supsetneq;", 10u, 8843u, 0u},
+ {"supsetneqq;", 11u, 10956u, 0u},
+ {"supsim;", 7u, 10952u, 0u},
+ {"supsub;", 7u, 10964u, 0u},
+ {"supsup;", 7u, 10966u, 0u},
+ {"swArr;", 6u, 8665u, 0u},
+ {"swarhk;", 7u, 10534u, 0u},
+ {"swarr;", 6u, 8601u, 0u},
+ {"swarrow;", 8u, 8601u, 0u},
+ {"swnwar;", 7u, 10538u, 0u},
+ {"szlig", 5u, 223u, 0u},
+ {"szlig;", 6u, 223u, 0u},
+ {"target;", 7u, 8982u, 0u},
+ {"tau;", 4u, 964u, 0u},
+ {"tbrk;", 5u, 9140u, 0u},
+ {"tcaron;", 7u, 357u, 0u},
+ {"tcedil;", 7u, 355u, 0u},
+ {"tcy;", 4u, 1090u, 0u},
+ {"tdot;", 5u, 8411u, 0u},
+ {"telrec;", 7u, 8981u, 0u},
+ {"tfr;", 4u, 120113u, 0u},
+ {"there4;", 7u, 8756u, 0u},
+ {"therefore;", 10u, 8756u, 0u},
+ {"theta;", 6u, 952u, 0u},
+ {"thetasym;", 9u, 977u, 0u},
+ {"thetav;", 7u, 977u, 0u},
+ {"thickapprox;", 12u, 8776u, 0u},
+ {"thicksim;", 9u, 8764u, 0u},
+ {"thinsp;", 7u, 8201u, 0u},
+ {"thkap;", 6u, 8776u, 0u},
+ {"thksim;", 7u, 8764u, 0u},
+ {"thorn", 5u, 254u, 0u},
+ {"thorn;", 6u, 254u, 0u},
+ {"tilde;", 6u, 732u, 0u},
+ {"times", 5u, 215u, 0u},
+ {"times;", 6u, 215u, 0u},
+ {"timesb;", 7u, 8864u, 0u},
+ {"timesbar;", 9u, 10801u, 0u},
+ {"timesd;", 7u, 10800u, 0u},
+ {"tint;", 5u, 8749u, 0u},
+ {"toea;", 5u, 10536u, 0u},
+ {"top;", 4u, 8868u, 0u},
+ {"topbot;", 7u, 9014u, 0u},
+ {"topcir;", 7u, 10993u, 0u},
+ {"topf;", 5u, 120165u, 0u},
+ {"topfork;", 8u, 10970u, 0u},
+ {"tosa;", 5u, 10537u, 0u},
+ {"tprime;", 7u, 8244u, 0u},
+ {"trade;", 6u, 8482u, 0u},
+ {"triangle;", 9u, 9653u, 0u},
+ {"triangledown;", 13u, 9663u, 0u},
+ {"triangleleft;", 13u, 9667u, 0u},
+ {"trianglelefteq;", 15u, 8884u, 0u},
+ {"triangleq;", 10u, 8796u, 0u},
+ {"triangleright;", 14u, 9657u, 0u},
+ {"trianglerighteq;", 16u, 8885u, 0u},
+ {"tridot;", 7u, 9708u, 0u},
+ {"trie;", 5u, 8796u, 0u},
+ {"triminus;", 9u, 10810u, 0u},
+ {"triplus;", 8u, 10809u, 0u},
+ {"trisb;", 6u, 10701u, 0u},
+ {"tritime;", 8u, 10811u, 0u},
+ {"trpezium;", 9u, 9186u, 0u},
+ {"tscr;", 5u, 120009u, 0u},
+ {"tscy;", 5u, 1094u, 0u},
+ {"tshcy;", 6u, 1115u, 0u},
+ {"tstrok;", 7u, 359u, 0u},
+ {"twixt;", 6u, 8812u, 0u},
+ {"twoheadleftarrow;", 17u, 8606u, 0u},
+ {"twoheadrightarrow;", 18u, 8608u, 0u},
+ {"uArr;", 5u, 8657u, 0u},
+ {"uHar;", 5u, 10595u, 0u},
+ {"uacute", 6u, 250u, 0u},
+ {"uacute;", 7u, 250u, 0u},
+ {"uarr;", 5u, 8593u, 0u},
+ {"ubrcy;", 6u, 1118u, 0u},
+ {"ubreve;", 7u, 365u, 0u},
+ {"ucirc", 5u, 251u, 0u},
+ {"ucirc;", 6u, 251u, 0u},
+ {"ucy;", 4u, 1091u, 0u},
+ {"udarr;", 6u, 8645u, 0u},
+ {"udblac;", 7u, 369u, 0u},
+ {"udhar;", 6u, 10606u, 0u},
+ {"ufisht;", 7u, 10622u, 0u},
+ {"ufr;", 4u, 120114u, 0u},
+ {"ugrave", 6u, 249u, 0u},
+ {"ugrave;", 7u, 249u, 0u},
+ {"uharl;", 6u, 8639u, 0u},
+ {"uharr;", 6u, 8638u, 0u},
+ {"uhblk;", 6u, 9600u, 0u},
+ {"ulcorn;", 7u, 8988u, 0u},
+ {"ulcorner;", 9u, 8988u, 0u},
+ {"ulcrop;", 7u, 8975u, 0u},
+ {"ultri;", 6u, 9720u, 0u},
+ {"umacr;", 6u, 363u, 0u},
+ {"uml", 3u, 168u, 0u},
+ {"uml;", 4u, 168u, 0u},
+ {"uogon;", 6u, 371u, 0u},
+ {"uopf;", 5u, 120166u, 0u},
+ {"uparrow;", 8u, 8593u, 0u},
+ {"updownarrow;", 12u, 8597u, 0u},
+ {"upharpoonleft;", 14u, 8639u, 0u},
+ {"upharpoonright;", 15u, 8638u, 0u},
+ {"uplus;", 6u, 8846u, 0u},
+ {"upsi;", 5u, 965u, 0u},
+ {"upsih;", 6u, 978u, 0u},
+ {"upsilon;", 8u, 965u, 0u},
+ {"upuparrows;", 11u, 8648u, 0u},
+ {"urcorn;", 7u, 8989u, 0u},
+ {"urcorner;", 9u, 8989u, 0u},
+ {"urcrop;", 7u, 8974u, 0u},
+ {"uring;", 6u, 367u, 0u},
+ {"urtri;", 6u, 9721u, 0u},
+ {"uscr;", 5u, 120010u, 0u},
+ {"utdot;", 6u, 8944u, 0u},
+ {"utilde;", 7u, 361u, 0u},
+ {"utri;", 5u, 9653u, 0u},
+ {"utrif;", 6u, 9652u, 0u},
+ {"uuarr;", 6u, 8648u, 0u},
+ {"uuml", 4u, 252u, 0u},
+ {"uuml;", 5u, 252u, 0u},
+ {"uwangle;", 8u, 10663u, 0u},
+ {"vArr;", 5u, 8661u, 0u},
+ {"vBar;", 5u, 10984u, 0u},
+ {"vBarv;", 6u, 10985u, 0u},
+ {"vDash;", 6u, 8872u, 0u},
+ {"vangrt;", 7u, 10652u, 0u},
+ {"varepsilon;", 11u, 1013u, 0u},
+ {"varkappa;", 9u, 1008u, 0u},
+ {"varnothing;", 11u, 8709u, 0u},
+ {"varphi;", 7u, 981u, 0u},
+ {"varpi;", 6u, 982u, 0u},
+ {"varpropto;", 10u, 8733u, 0u},
+ {"varr;", 5u, 8597u, 0u},
+ {"varrho;", 7u, 1009u, 0u},
+ {"varsigma;", 9u, 962u, 0u},
+ {"varsubsetneq;", 13u, 8842u, 65024u},
+ {"varsubsetneqq;", 14u, 10955u, 65024u},
+ {"varsupsetneq;", 13u, 8843u, 65024u},
+ {"varsupsetneqq;", 14u, 10956u, 65024u},
+ {"vartheta;", 9u, 977u, 0u},
+ {"vartriangleleft;", 16u, 8882u, 0u},
+ {"vartriangleright;", 17u, 8883u, 0u},
+ {"vcy;", 4u, 1074u, 0u},
+ {"vdash;", 6u, 8866u, 0u},
+ {"vee;", 4u, 8744u, 0u},
+ {"veebar;", 7u, 8891u, 0u},
+ {"veeeq;", 6u, 8794u, 0u},
+ {"vellip;", 7u, 8942u, 0u},
+ {"verbar;", 7u, 124u, 0u},
+ {"vert;", 5u, 124u, 0u},
+ {"vfr;", 4u, 120115u, 0u},
+ {"vltri;", 6u, 8882u, 0u},
+ {"vnsub;", 6u, 8834u, 8402u},
+ {"vnsup;", 6u, 8835u, 8402u},
+ {"vopf;", 5u, 120167u, 0u},
+ {"vprop;", 6u, 8733u, 0u},
+ {"vrtri;", 6u, 8883u, 0u},
+ {"vscr;", 5u, 120011u, 0u},
+ {"vsubnE;", 7u, 10955u, 65024u},
+ {"vsubne;", 7u, 8842u, 65024u},
+ {"vsupnE;", 7u, 10956u, 65024u},
+ {"vsupne;", 7u, 8843u, 65024u},
+ {"vzigzag;", 8u, 10650u, 0u},
+ {"wcirc;", 6u, 373u, 0u},
+ {"wedbar;", 7u, 10847u, 0u},
+ {"wedge;", 6u, 8743u, 0u},
+ {"wedgeq;", 7u, 8793u, 0u},
+ {"weierp;", 7u, 8472u, 0u},
+ {"wfr;", 4u, 120116u, 0u},
+ {"wopf;", 5u, 120168u, 0u},
+ {"wp;", 3u, 8472u, 0u},
+ {"wr;", 3u, 8768u, 0u},
+ {"wreath;", 7u, 8768u, 0u},
+ {"wscr;", 5u, 120012u, 0u},
+ {"xcap;", 5u, 8898u, 0u},
+ {"xcirc;", 6u, 9711u, 0u},
+ {"xcup;", 5u, 8899u, 0u},
+ {"xdtri;", 6u, 9661u, 0u},
+ {"xfr;", 4u, 120117u, 0u},
+ {"xhArr;", 6u, 10234u, 0u},
+ {"xharr;", 6u, 10231u, 0u},
+ {"xi;", 3u, 958u, 0u},
+ {"xlArr;", 6u, 10232u, 0u},
+ {"xlarr;", 6u, 10229u, 0u},
+ {"xmap;", 5u, 10236u, 0u},
+ {"xnis;", 5u, 8955u, 0u},
+ {"xodot;", 6u, 10752u, 0u},
+ {"xopf;", 5u, 120169u, 0u},
+ {"xoplus;", 7u, 10753u, 0u},
+ {"xotime;", 7u, 10754u, 0u},
+ {"xrArr;", 6u, 10233u, 0u},
+ {"xrarr;", 6u, 10230u, 0u},
+ {"xscr;", 5u, 120013u, 0u},
+ {"xsqcup;", 7u, 10758u, 0u},
+ {"xuplus;", 7u, 10756u, 0u},
+ {"xutri;", 6u, 9651u, 0u},
+ {"xvee;", 5u, 8897u, 0u},
+ {"xwedge;", 7u, 8896u, 0u},
+ {"yacute", 6u, 253u, 0u},
+ {"yacute;", 7u, 253u, 0u},
+ {"yacy;", 5u, 1103u, 0u},
+ {"ycirc;", 6u, 375u, 0u},
+ {"ycy;", 4u, 1099u, 0u},
+ {"yen", 3u, 165u, 0u},
+ {"yen;", 4u, 165u, 0u},
+ {"yfr;", 4u, 120118u, 0u},
+ {"yicy;", 5u, 1111u, 0u},
+ {"yopf;", 5u, 120170u, 0u},
+ {"yscr;", 5u, 120014u, 0u},
+ {"yucy;", 5u, 1102u, 0u},
+ {"yuml", 4u, 255u, 0u},
+ {"yuml;", 5u, 255u, 0u},
+ {"zacute;", 7u, 378u, 0u},
+ {"zcaron;", 7u, 382u, 0u},
+ {"zcy;", 4u, 1079u, 0u},
+ {"zdot;", 5u, 380u, 0u},
+ {"zeetrf;", 7u, 8488u, 0u},
+ {"zeta;", 5u, 950u, 0u},
+ {"zfr;", 4u, 120119u, 0u},
+ {"zhcy;", 5u, 1078u, 0u},
+ {"zigrarr;", 8u, 8669u, 0u},
+ {"zopf;", 5u, 120171u, 0u},
+ {"zscr;", 5u, 120015u, 0u},
+ {"zwj;", 4u, 8205u, 0u},
+ {"zwnj;", 5u, 8204u, 0u},
+};
+
+typedef struct { Py_UCS4 num; Py_UCS4 cp; } html5_charref;
+static const int invalid_charref_count = 34;
+static const html5_charref invalid_charrefs[] = {
+ {0u, 65533u},
+ {13u, 13u},
+ {128u, 8364u},
+ {129u, 129u},
+ {130u, 8218u},
+ {131u, 402u},
+ {132u, 8222u},
+ {133u, 8230u},
+ {134u, 8224u},
+ {135u, 8225u},
+ {136u, 710u},
+ {137u, 8240u},
+ {138u, 352u},
+ {139u, 8249u},
+ {140u, 338u},
+ {141u, 141u},
+ {142u, 381u},
+ {143u, 143u},
+ {144u, 144u},
+ {145u, 8216u},
+ {146u, 8217u},
+ {147u, 8220u},
+ {148u, 8221u},
+ {149u, 8226u},
+ {150u, 8211u},
+ {151u, 8212u},
+ {152u, 732u},
+ {153u, 8482u},
+ {154u, 353u},
+ {155u, 8250u},
+ {156u, 339u},
+ {157u, 157u},
+ {158u, 382u},
+ {159u, 376u},
+};
+
+static const int invalid_codepoint_count = 126;
+static const Py_UCS4 invalid_codepoints[] = {
+ 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 11u, 14u,
+ 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u,
+ 25u, 26u, 27u, 28u, 29u, 30u, 31u, 127u, 128u, 129u,
+ 130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u, 138u, 139u,
+ 140u, 141u, 142u, 143u, 144u, 145u, 146u, 147u, 148u, 149u,
+ 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u, 158u, 159u,
+ 64976u, 64977u, 64978u, 64979u, 64980u, 64981u, 64982u, 64983u, 64984u, 64985u,
+ 64986u, 64987u, 64988u, 64989u, 64990u, 64991u, 64992u, 64993u, 64994u, 64995u,
+ 64996u, 64997u, 64998u, 64999u, 65000u, 65001u, 65002u, 65003u, 65004u, 65005u,
+ 65006u, 65007u, 65534u, 65535u, 131070u, 131071u, 196606u, 196607u, 262142u, 262143u,
+ 327678u, 327679u, 393214u, 393215u, 458750u, 458751u, 524286u, 524287u, 589822u, 589823u,
+ 655358u, 655359u, 720894u, 720895u, 786430u, 786431u, 851966u, 851967u, 917502u, 917503u,
+ 983038u, 983039u, 1048574u, 1048575u, 1114110u, 1114111u,
+};
diff --git a/PC/config.c b/PC/config.c
index 51b46c64d99b816..5dc57a385f19761 100644
--- a/PC/config.c
+++ b/PC/config.c
@@ -60,6 +60,7 @@ extern PyObject* PyInit_winreg(void);
extern PyObject* PyInit__struct(void);
extern PyObject* PyInit__datetime(void);
extern PyObject* PyInit__functools(void);
+extern PyObject* PyInit__html(void);
extern PyObject* PyInit__json(void);
#ifdef _Py_HAVE_ZLIB
extern PyObject* PyInit_zlib(void);
@@ -151,6 +152,7 @@ struct _inittab _PyImport_Inittab[] = {
{"_struct", PyInit__struct},
{"_datetime", PyInit__datetime},
{"_functools", PyInit__functools},
+ {"_html", PyInit__html},
{"_json", PyInit__json},
{"_suggestions", PyInit__suggestions},
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index e255ed5af19125d..85d4173868d8b76 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -464,6 +464,7 @@
/arch:AVX %(AdditionalOptions)
+
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 649ee1859ff9961..7cf2ce473f08b00 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -1013,6 +1013,9 @@
Modules
+
+ Modules
+
Modules
diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h
index 8937e666bbbdd5b..a45d8878917815e 100644
--- a/Python/stdlib_module_names.h
+++ b/Python/stdlib_module_names.h
@@ -40,6 +40,7 @@ static const char* _Py_stdlib_module_names[] = {
"_hashlib",
"_heapq",
"_hmac",
+"_html",
"_imp",
"_interpchannels",
"_interpqueues",
diff --git a/Tools/build/generate_html_entities.py b/Tools/build/generate_html_entities.py
new file mode 100644
index 000000000000000..b9ce96cc1f5a735
--- /dev/null
+++ b/Tools/build/generate_html_entities.py
@@ -0,0 +1,75 @@
+"""Generate Modules/html_entities.h from the html module data.
+
+The C accelerator :mod:`!_html` binary-searches the HTML5 named character
+references and the numeric-charref correction tables. Both are derived from
+the pure-Python source of truth in :mod:`html.entities` and :mod:`html`, so this
+script regenerates the C header to keep them in sync.
+
+Usage: python Tools/build/generate_html_entities.py Modules/html_entities.h
+"""
+
+import sys
+from html import _invalid_charrefs, _invalid_codepoints
+from html.entities import html5
+
+
+def generate(out) -> None:
+ max_value_len = max(len(v) for v in html5.values())
+ if max_value_len > 2:
+ raise SystemExit(f"named value longer than 2 code points: {max_value_len}")
+ max_name_len = max(len(k) for k in html5)
+
+ named = []
+ for name in sorted(html5): # ASCII names: code-point order == byte order
+ value = html5[name]
+ cp0 = ord(value[0])
+ cp1 = ord(value[1]) if len(value) == 2 else 0
+ named.append(f' {{"{name}", {len(name)}u, {cp0}u, {cp1}u}},')
+
+ charrefs = []
+ for num in sorted(_invalid_charrefs):
+ repl = _invalid_charrefs[num]
+ if len(repl) != 1:
+ raise SystemExit(f"invalid charref value not a single char: {num:#x}")
+ charrefs.append(f" {{{num}u, {ord(repl)}u}},")
+
+ codepoints = sorted(_invalid_codepoints)
+ cps = "\n".join(
+ " " + " ".join(f"{cp}u," for cp in codepoints[i : i + 10])
+ for i in range(0, len(codepoints), 10)
+ )
+
+ write = out.write
+ write("/* Auto-generated by Tools/build/generate_html_entities.py */\n")
+ write("/* HTML5 named references and numeric-charref correction tables. */\n\n")
+ write("typedef struct {\n")
+ write(" const char *name;\n")
+ write(" unsigned char name_len;\n")
+ write(" Py_UCS4 cp0;\n")
+ write(" Py_UCS4 cp1; /* second code point, or 0 if the value is one char */\n")
+ write("} html5_entity;\n\n")
+ write(f"#define HTML5_MAX_NAME_LEN {max_name_len}\n")
+ write(f"static const int html5_count = {len(named)};\n")
+ write("static const html5_entity html5_entities[] = {\n")
+ write("\n".join(named))
+ write("\n};\n\n")
+ write("typedef struct { Py_UCS4 num; Py_UCS4 cp; } html5_charref;\n")
+ write(f"static const int invalid_charref_count = {len(charrefs)};\n")
+ write("static const html5_charref invalid_charrefs[] = {\n")
+ write("\n".join(charrefs))
+ write("\n};\n\n")
+ write(f"static const int invalid_codepoint_count = {len(codepoints)};\n")
+ write("static const Py_UCS4 invalid_codepoints[] = {\n")
+ write(cps)
+ write("\n};\n")
+
+
+def main() -> None:
+ if len(sys.argv) != 2:
+ raise SystemExit(__doc__)
+ with open(sys.argv[1], "w", encoding="utf-8") as out:
+ generate(out)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index d7248c34c59be45..bce6749a271bba6 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -70,6 +70,7 @@ def format_tsv_lines(lines):
# only huge constants (safe but parsing is slow)
'Modules/_ssl_data_*.h',
'Modules/cjkcodecs/mappings_*.h',
+ 'Modules/html_entities.h',
'Modules/unicodedata_db.h',
'Modules/unicodename_db.h',
'Objects/unicodetype_db.h',
diff --git a/configure b/configure
index eb53b200bf78bc5..a70224f1b7d4234 100755
--- a/configure
+++ b/configure
@@ -810,6 +810,8 @@ MODULE__LSPROF_FALSE
MODULE__LSPROF_TRUE
MODULE__JSON_FALSE
MODULE__JSON_TRUE
+MODULE__HTML_FALSE
+MODULE__HTML_TRUE
MODULE__HEAPQ_FALSE
MODULE__HEAPQ_TRUE
MODULE__CSV_FALSE
@@ -32322,6 +32324,28 @@ then :
+fi
+
+
+ if test "$py_cv_module__html" != "n/a"
+then :
+ py_cv_module__html=yes
+fi
+ if test "$py_cv_module__html" = yes; then
+ MODULE__HTML_TRUE=
+ MODULE__HTML_FALSE='#'
+else
+ MODULE__HTML_TRUE='#'
+ MODULE__HTML_FALSE=
+fi
+
+ as_fn_append MODULE_BLOCK "MODULE__HTML_STATE=$py_cv_module__html$as_nl"
+ if test "x$py_cv_module__html" = xyes
+then :
+
+
+
+
fi
@@ -35429,6 +35453,10 @@ if test -z "${MODULE__HEAPQ_TRUE}" && test -z "${MODULE__HEAPQ_FALSE}"; then
as_fn_error $? "conditional \"MODULE__HEAPQ\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${MODULE__HTML_TRUE}" && test -z "${MODULE__HTML_FALSE}"; then
+ as_fn_error $? "conditional \"MODULE__HTML\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${MODULE__JSON_TRUE}" && test -z "${MODULE__JSON_FALSE}"; then
as_fn_error $? "conditional \"MODULE__JSON\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/configure.ac b/configure.ac
index b2f3f7210050693..e71b5644dd2d57d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8141,6 +8141,7 @@ PY_STDLIB_MOD_SIMPLE([_asyncio])
PY_STDLIB_MOD_SIMPLE([_bisect])
PY_STDLIB_MOD_SIMPLE([_csv])
PY_STDLIB_MOD_SIMPLE([_heapq])
+PY_STDLIB_MOD_SIMPLE([_html])
PY_STDLIB_MOD_SIMPLE([_json])
PY_STDLIB_MOD_SIMPLE([_lsprof])
PY_STDLIB_MOD_SIMPLE([_pickle])