Skip to content

Commit c3bd941

Browse files
committed
py: Make qstr hash size configurable, defaults to 2 bytes.
This patch makes configurable, via MICROPY_QSTR_BYTES_IN_HASH, the number of bytes used for a qstr hash. It was originally fixed at 2 bytes, and now defaults to 2 bytes. Setting it to 1 byte will save ROM and RAM at a small expense of hash collisions.
1 parent 1e8ca3a commit c3bd941

5 files changed

Lines changed: 45 additions & 29 deletions

File tree

bare-arm/mpconfigport.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// options to control how Micro Python is built
44

5+
#define MICROPY_QSTR_BYTES_IN_HASH (1)
56
#define MICROPY_ALLOC_PATH_MAX (512)
67
#define MICROPY_EMIT_X64 (0)
78
#define MICROPY_EMIT_THUMB (0)

py/makeqstrdata.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@
3737
codepoint2name[ord('\\')] = 'backslash'
3838

3939
# this must match the equivalent function in qstr.c
40-
def compute_hash(qstr):
40+
def compute_hash(qstr, bytes_hash):
4141
hash = 5381
4242
for char in qstr:
4343
hash = (hash * 33) ^ ord(char)
4444
# Make sure that valid hash is never zero, zero means "hash not computed"
45-
return (hash & 0xffff) or 1
45+
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
4646

4747
def do_work(infiles):
4848
# read the qstrs in from the input files
@@ -81,26 +81,28 @@ def do_work(infiles):
8181

8282
# get config variables
8383
cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
84+
cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
8485
cfg_max_len = 1 << (8 * cfg_bytes_len)
8586

8687
# print out the starte of the generated C header file
8788
print('// This file was automatically generated by makeqstrdata.py')
8889
print('')
8990

9091
# add NULL qstr with no hash or data
91-
print('QDEF(MP_QSTR_NULL, (const byte*)"\\x00\\x00%s" "")' % ('\\x00' * cfg_bytes_len))
92+
print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))
9293

9394
# go through each qstr and print it out
9495
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
95-
qhash = compute_hash(qstr)
96+
qhash = compute_hash(qstr, cfg_bytes_hash)
9697
# Calculate len of str, taking escapes into account
9798
qlen = len(qstr.replace("\\\\", "-").replace("\\", ""))
9899
qdata = qstr.replace('"', '\\"')
99100
if qlen >= cfg_max_len:
100101
print('qstr is too long:', qstr)
101102
assert False
102103
qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
103-
print('QDEF(MP_QSTR_%s, (const byte*)"\\x%02x\\x%02x%s" "%s")' % (ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen_str, qdata))
104+
qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
105+
print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata))
104106

105107
if __name__ == "__main__":
106108
do_work(sys.argv[1:])

py/mpconfig.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@
152152
#define MICROPY_QSTR_BYTES_IN_LEN (1)
153153
#endif
154154

155+
// Number of bytes used to store qstr hash
156+
#ifndef MICROPY_QSTR_BYTES_IN_HASH
157+
#define MICROPY_QSTR_BYTES_IN_HASH (2)
158+
#endif
159+
155160
// Avoid using C stack when making Python function calls. C stack still
156161
// may be used if there's no free heap.
157162
#ifndef MICROPY_STACKLESS

py/qstr.c

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,31 @@
4343
#endif
4444

4545
// A qstr is an index into the qstr pool.
46-
// The data for a qstr contains (hash, length, data).
47-
// For now we use very simple encoding, just to get the framework correct:
48-
// - hash is 2 bytes (see function below)
49-
// - length is 2 bytes
50-
// - data follows
51-
// - \0 terminated (for now, so they can be printed using printf)
52-
53-
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
54-
#define Q_GET_ALLOC(q) (2 + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
55-
#define Q_GET_DATA(q) ((q) + 2 + MICROPY_QSTR_BYTES_IN_LEN)
46+
// The data for a qstr contains (hash, length, data):
47+
// - hash (configurable number of bytes)
48+
// - length (configurable number of bytes)
49+
// - data ("length" number of bytes)
50+
// - \0 terminated (so they can be printed using printf)
51+
52+
#if MICROPY_QSTR_BYTES_IN_HASH == 1
53+
#define Q_HASH_MASK (0xff)
54+
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0])
55+
#define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0)
56+
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
57+
#define Q_HASH_MASK (0xffff)
58+
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
59+
#define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0)
60+
#else
61+
#error unimplemented qstr hash decoding
62+
#endif
63+
#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
64+
#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN)
5665
#if MICROPY_QSTR_BYTES_IN_LEN == 1
57-
#define Q_GET_LENGTH(q) ((q)[2])
58-
#define Q_SET_LENGTH(q, len) do { (q)[2] = (len); } while (0)
66+
#define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH])
67+
#define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0)
5968
#elif MICROPY_QSTR_BYTES_IN_LEN == 2
60-
#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
61-
#define Q_SET_LENGTH(q, len) do { (q)[2] = (len); (q)[3] = (len) >> 8; } while (0)
69+
#define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8))
70+
#define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0)
6271
#else
6372
#error unimplemented qstr length decoding
6473
#endif
@@ -70,7 +79,7 @@ mp_uint_t qstr_compute_hash(const byte *data, mp_uint_t len) {
7079
for (const byte *top = data + len; data < top; data++) {
7180
hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
7281
}
73-
hash &= 0xffff;
82+
hash &= Q_HASH_MASK;
7483
// Make sure that valid hash is never zero, zero means "hash not computed"
7584
if (hash == 0) {
7685
hash++;
@@ -156,7 +165,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
156165
// qstr does not exist in interned pool so need to add it
157166

158167
// compute number of bytes needed to intern this string
159-
mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
168+
mp_uint_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
160169

161170
if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
162171
// not enough room at end of previously interned string so try to grow
@@ -193,19 +202,18 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
193202

194203
// store the interned strings' data
195204
mp_uint_t hash = qstr_compute_hash((const byte*)str, len);
196-
q_ptr[0] = hash;
197-
q_ptr[1] = hash >> 8;
205+
Q_SET_HASH(q_ptr, hash);
198206
Q_SET_LENGTH(q_ptr, len);
199-
memcpy(q_ptr + 2 + MICROPY_QSTR_BYTES_IN_LEN, str, len);
200-
q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
207+
memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len);
208+
q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
201209
q = qstr_add(q_ptr);
202210
}
203211
return q;
204212
}
205213

206214
byte *qstr_build_start(mp_uint_t len, byte **q_ptr) {
207215
assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN)));
208-
*q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
216+
*q_ptr = m_new(byte, MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
209217
Q_SET_LENGTH(*q_ptr, len);
210218
return Q_GET_DATA(*q_ptr);
211219
}
@@ -215,9 +223,8 @@ qstr qstr_build_end(byte *q_ptr) {
215223
if (q == 0) {
216224
mp_uint_t len = Q_GET_LENGTH(q_ptr);
217225
mp_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len);
218-
q_ptr[0] = hash;
219-
q_ptr[1] = hash >> 8;
220-
q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
226+
Q_SET_HASH(q_ptr, hash);
227+
q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
221228
q = qstr_add(q_ptr);
222229
} else {
223230
m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));

py/qstrdefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
// qstr configuration passed to makeqstrdata.py of the form QCFG(key, value)
3333
QCFG(BYTES_IN_LEN, MICROPY_QSTR_BYTES_IN_LEN)
34+
QCFG(BYTES_IN_HASH, MICROPY_QSTR_BYTES_IN_HASH)
3435

3536
Q()
3637
Q(*)

0 commit comments

Comments
 (0)