Skip to content

Commit f127bef

Browse files
committed
py/makeqstrdata.py: Compute the qstr hash from bytes, not characters.
1 parent f98bb2d commit f127bef

1 file changed

Lines changed: 10 additions & 9 deletions

File tree

py/makeqstrdata.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
# - codepoint2name lives in a different module
1515
import platform
1616
if platform.python_version_tuple()[0] == '2':
17-
ord_bytes = ord
17+
bytes_cons = lambda val, enc=None: bytearray(val)
1818
from htmlentitydefs import codepoint2name
1919
elif platform.python_version_tuple()[0] == '3':
20-
ord_bytes = lambda x:x
20+
bytes_cons = bytes
2121
from html.entities import codepoint2name
22+
# end compatibility code
23+
2224
codepoint2name[ord('-')] = 'hyphen';
2325

2426
# add some custom names to map characters that aren't in HTML
@@ -52,8 +54,8 @@
5254
# this must match the equivalent function in qstr.c
5355
def compute_hash(qstr, bytes_hash):
5456
hash = 5381
55-
for char in qstr:
56-
hash = (hash * 33) ^ ord(char)
57+
for b in qstr:
58+
hash = (hash * 33) ^ b
5759
# Make sure that valid hash is never zero, zero means "hash not computed"
5860
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
5961

@@ -115,16 +117,15 @@ def parse_input_headers(infiles):
115117
return qcfgs, qstrs
116118

117119
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
118-
qhash = compute_hash(qstr, cfg_bytes_hash)
120+
qbytes = bytes_cons(qstr, 'utf8')
121+
qlen = len(qbytes)
122+
qhash = compute_hash(qbytes, cfg_bytes_hash)
119123
if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr):
120124
# qstr is all printable ASCII so render it as-is (for easier debugging)
121-
qlen = len(qstr)
122125
qdata = qstr
123126
else:
124127
# qstr contains non-printable codes so render entire thing as hex pairs
125-
qbytes = qstr.encode('utf8')
126-
qlen = len(qbytes)
127-
qdata = ''.join(('\\x%02x' % ord_bytes(b)) for b in qbytes)
128+
qdata = ''.join(('\\x%02x' % b) for b in qbytes)
128129
if qlen >= (1 << (8 * cfg_bytes_len)):
129130
print('qstr is too long:', qstr)
130131
assert False

0 commit comments

Comments
 (0)