Skip to content

Commit c2a4e4e

Browse files
committed
py: Convert hash API to use MP_UNARY_OP_HASH instead of ad-hoc function.
Hashing is now done using mp_unary_op function with MP_UNARY_OP_HASH as the operator argument. Hashing for int, str and bytes still go via fast-path in mp_unary_op since they are the most common objects which need to be hashed. This lead to quite a bit of code cleanup, and should be more efficient if anything. It saves 176 bytes code space on Thumb2, and 360 bytes on x86. The only loss is that the error message "unhashable type" is now the more generic "unsupported type for __hash__".
1 parent 6738c1d commit c2a4e4e

File tree

20 files changed

+85
-112
lines changed

20 files changed

+85
-112
lines changed

py/bc0.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@
116116
#define MP_BC_LOAD_CONST_SMALL_INT_MULTI (0x70) // + N(64)
117117
#define MP_BC_LOAD_FAST_MULTI (0xb0) // + N(16)
118118
#define MP_BC_STORE_FAST_MULTI (0xc0) // + N(16)
119-
#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(5)
120-
#define MP_BC_BINARY_OP_MULTI (0xd5) // + op(35)
119+
#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(6)
120+
#define MP_BC_BINARY_OP_MULTI (0xd6) // + op(35)
121121

122122
#endif // __MICROPY_INCLUDED_PY_BC0_H__

py/map.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131

3232
#include "py/mpconfig.h"
3333
#include "py/misc.h"
34-
#include "py/obj.h"
34+
#include "py/runtime0.h"
35+
#include "py/runtime.h"
3536

3637
// Fixed empty map. Useful when need to call kw-receiving functions
3738
// without any keywords from C, etc.
@@ -200,7 +201,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
200201
}
201202
}
202203

203-
mp_uint_t hash = mp_obj_hash(index);
204+
mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
204205
mp_uint_t pos = hash % map->alloc;
205206
mp_uint_t start_pos = pos;
206207
mp_map_elem_t *avail_slot = NULL;
@@ -308,7 +309,7 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
308309
return NULL;
309310
}
310311
}
311-
mp_uint_t hash = mp_obj_hash(index);
312+
mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
312313
mp_uint_t pos = hash % set->alloc;
313314
mp_uint_t start_pos = pos;
314315
mp_obj_t *avail_slot = NULL;

py/modbuiltins.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,8 @@ STATIC mp_obj_t mp_builtin_divmod(mp_obj_t o1_in, mp_obj_t o2_in) {
272272
MP_DEFINE_CONST_FUN_OBJ_2(mp_builtin_divmod_obj, mp_builtin_divmod);
273273

274274
STATIC mp_obj_t mp_builtin_hash(mp_obj_t o_in) {
275-
// TODO hash will generally overflow small integer; can we safely truncate it?
276-
return mp_obj_new_int(mp_obj_hash(o_in));
275+
// result is guaranteed to be a (small) int
276+
return mp_unary_op(MP_UNARY_OP_HASH, o_in);
277277
}
278278
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_hash_obj, mp_builtin_hash);
279279

py/obj.c

Lines changed: 7 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -147,61 +147,6 @@ bool mp_obj_is_callable(mp_obj_t o_in) {
147147
return mp_obj_instance_is_callable(o_in);
148148
}
149149

150-
mp_int_t mp_obj_hash(mp_obj_t o_in) {
151-
if (o_in == mp_const_false) {
152-
return 0; // needs to hash to same as the integer 0, since False==0
153-
} else if (o_in == mp_const_true) {
154-
return 1; // needs to hash to same as the integer 1, since True==1
155-
} else if (MP_OBJ_IS_SMALL_INT(o_in)) {
156-
return MP_OBJ_SMALL_INT_VALUE(o_in);
157-
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_int)) {
158-
return mp_obj_int_hash(o_in);
159-
} else if (MP_OBJ_IS_STR(o_in) || MP_OBJ_IS_TYPE(o_in, &mp_type_bytes)) {
160-
return mp_obj_str_get_hash(o_in);
161-
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_NoneType)) {
162-
return (mp_int_t)o_in;
163-
} else if (MP_OBJ_IS_FUN(o_in)) {
164-
return (mp_int_t)o_in;
165-
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_tuple)) {
166-
return mp_obj_tuple_hash(o_in);
167-
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_type)) {
168-
return (mp_int_t)o_in;
169-
} else if (mp_obj_is_instance_type(mp_obj_get_type(o_in))) {
170-
// if a valid __hash__ method exists, use it
171-
mp_obj_t method[2];
172-
mp_load_method_maybe(o_in, MP_QSTR___hash__, method);
173-
if (method[0] != MP_OBJ_NULL) {
174-
mp_obj_t hash_val = mp_call_method_n_kw(0, 0, method);
175-
if (MP_OBJ_IS_INT(hash_val)) {
176-
return mp_obj_int_get_truncated(hash_val);
177-
}
178-
goto error;
179-
}
180-
181-
mp_load_method_maybe(o_in, MP_QSTR___eq__, method);
182-
if (method[0] == MP_OBJ_NULL) {
183-
// https://docs.python.org/3/reference/datamodel.html#object.__hash__
184-
// "User-defined classes have __eq__() and __hash__() methods by default;
185-
// with them, all objects compare unequal (except with themselves) and
186-
// x.__hash__() returns an appropriate value such that x == y implies
187-
// both that x is y and hash(x) == hash(y)."
188-
return (mp_int_t)o_in;
189-
}
190-
// "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
191-
// When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
192-
}
193-
194-
// TODO hash classes
195-
196-
error:
197-
if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
198-
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unhashable type"));
199-
} else {
200-
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
201-
"unhashable type: '%s'", mp_obj_get_type_str(o_in)));
202-
}
203-
}
204-
205150
// This function implements the '==' operator (and so the inverse of '!=').
206151
//
207152
// From the Python language reference:
@@ -540,3 +485,10 @@ void mp_get_buffer_raise(mp_obj_t obj, mp_buffer_info_t *bufinfo, mp_uint_t flag
540485
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "object with buffer protocol required"));
541486
}
542487
}
488+
489+
mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in) {
490+
switch (op) {
491+
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
492+
default: return MP_OBJ_NULL; // op not supported
493+
}
494+
}

py/obj.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,6 @@ void mp_obj_print_exception(const mp_print_t *print, mp_obj_t exc);
508508

509509
bool mp_obj_is_true(mp_obj_t arg);
510510
bool mp_obj_is_callable(mp_obj_t o_in);
511-
mp_int_t mp_obj_hash(mp_obj_t o_in);
512511
bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2);
513512

514513
mp_int_t mp_obj_get_int(mp_const_obj_t arg);
@@ -525,6 +524,7 @@ mp_obj_t mp_obj_id(mp_obj_t o_in);
525524
mp_obj_t mp_obj_len(mp_obj_t o_in);
526525
mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); // may return MP_OBJ_NULL
527526
mp_obj_t mp_obj_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t val);
527+
mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in);
528528

529529
// bool
530530
// TODO make lower case when it has proven itself

py/objbool.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,21 @@ STATIC mp_obj_t bool_unary_op(mp_uint_t op, mp_obj_t o_in) {
6969
mp_int_t value = ((mp_obj_bool_t*)o_in)->value;
7070
switch (op) {
7171
case MP_UNARY_OP_BOOL: return o_in;
72+
// needs to hash to the same value as if converting to an integer
73+
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(value);
7274
case MP_UNARY_OP_POSITIVE: return MP_OBJ_NEW_SMALL_INT(value);
7375
case MP_UNARY_OP_NEGATIVE: return MP_OBJ_NEW_SMALL_INT(-value);
7476
case MP_UNARY_OP_INVERT: return MP_OBJ_NEW_SMALL_INT(~value);
7577

7678
// only bool needs to implement MP_UNARY_OP_NOT
7779
case MP_UNARY_OP_NOT:
78-
default: // no other cases
7980
if (value) {
8081
return mp_const_false;
8182
} else {
8283
return mp_const_true;
8384
}
85+
86+
default: return MP_OBJ_NULL; // op not supported
8487
}
8588
}
8689

py/objfun.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ const mp_obj_type_t mp_type_fun_builtin = {
9898
{ &mp_type_type },
9999
.name = MP_QSTR_function,
100100
.call = fun_builtin_call,
101+
.unary_op = mp_generic_unary_op,
101102
};
102103

103104
/******************************************************************************/
@@ -314,6 +315,7 @@ const mp_obj_type_t mp_type_fun_bc = {
314315
.print = fun_bc_print,
315316
#endif
316317
.call = fun_bc_call,
318+
.unary_op = mp_generic_unary_op,
317319
#if MICROPY_PY_FUNCTION_ATTRS
318320
.attr = fun_bc_attr,
319321
#endif
@@ -366,6 +368,7 @@ STATIC const mp_obj_type_t mp_type_fun_native = {
366368
{ &mp_type_type },
367369
.name = MP_QSTR_function,
368370
.call = fun_native_call,
371+
.unary_op = mp_generic_unary_op,
369372
};
370373

371374
mp_obj_t mp_obj_new_fun_native(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) {
@@ -421,6 +424,7 @@ STATIC const mp_obj_type_t mp_type_fun_viper = {
421424
{ &mp_type_type },
422425
.name = MP_QSTR_function,
423426
.call = fun_viper_call,
427+
.unary_op = mp_generic_unary_op,
424428
};
425429

426430
mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig) {
@@ -533,6 +537,7 @@ STATIC const mp_obj_type_t mp_type_fun_asm = {
533537
{ &mp_type_type },
534538
.name = MP_QSTR_function,
535539
.call = fun_asm_call,
540+
.unary_op = mp_generic_unary_op,
536541
};
537542

538543
mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data) {

py/objint.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,6 @@ char *mp_obj_int_formatted(char **buf, mp_uint_t *buf_size, mp_uint_t *fmt_size,
259259

260260
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
261261

262-
mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
263-
return MP_OBJ_SMALL_INT_VALUE(self_in);
264-
}
265-
266262
bool mp_obj_int_is_positive(mp_obj_t self_in) {
267263
return mp_obj_get_int(self_in) >= 0;
268264
}

py/objint_longlong.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,6 @@
5353
const mp_obj_int_t mp_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
5454
#endif
5555

56-
mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
57-
if (MP_OBJ_IS_SMALL_INT(self_in)) {
58-
return MP_OBJ_SMALL_INT_VALUE(self_in);
59-
}
60-
mp_obj_int_t *self = self_in;
61-
// truncate value to fit in mp_int_t, which gives the same hash as
62-
// small int if the value fits without truncation
63-
return self->val;
64-
}
65-
6656
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
6757
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
6858
mp_obj_int_t *self = self_in;
@@ -117,6 +107,11 @@ mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in) {
117107
mp_obj_int_t *o = o_in;
118108
switch (op) {
119109
case MP_UNARY_OP_BOOL: return MP_BOOL(o->val != 0);
110+
111+
// truncate value to fit in mp_int_t, which gives the same hash as
112+
// small int if the value fits without truncation
113+
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val);
114+
120115
case MP_UNARY_OP_POSITIVE: return o_in;
121116
case MP_UNARY_OP_NEGATIVE: return mp_obj_new_int_from_ll(-o->val);
122117
case MP_UNARY_OP_INVERT: return mp_obj_new_int_from_ll(~o->val);

py/objint_mpz.c

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,6 @@ char *mp_obj_int_formatted_impl(char **buf, mp_uint_t *buf_size, mp_uint_t *fmt_
9696
return str;
9797
}
9898

99-
mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
100-
if (MP_OBJ_IS_SMALL_INT(self_in)) {
101-
return MP_OBJ_SMALL_INT_VALUE(self_in);
102-
}
103-
mp_obj_int_t *self = self_in;
104-
return mpz_hash(&self->mpz);
105-
}
106-
10799
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
108100
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
109101
mp_obj_int_t *self = self_in;
@@ -143,6 +135,7 @@ mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in) {
143135
mp_obj_int_t *o = o_in;
144136
switch (op) {
145137
case MP_UNARY_OP_BOOL: return MP_BOOL(!mpz_is_zero(&o->mpz));
138+
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(mpz_hash(&o->mpz));
146139
case MP_UNARY_OP_POSITIVE: return o_in;
147140
case MP_UNARY_OP_NEGATIVE: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_neg_inpl(&o2->mpz, &o->mpz); return o2; }
148141
case MP_UNARY_OP_INVERT: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_not_inpl(&o2->mpz, &o->mpz); return o2; }

0 commit comments

Comments
 (0)