Skip to content

Commit d1e355e

Browse files
committed
py: Fix check of small-int overflow when parsing ints.
Also unifies use of SMALL_INT_FITS macro across parser and runtime.
1 parent 813ed3b commit d1e355e

11 files changed

Lines changed: 37 additions & 34 deletions

File tree

py/compile.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
249249
// shouldn't happen
250250
assert(0);
251251
}
252-
if (MP_PARSE_FITS_SMALL_INT(arg0)) {
252+
if (MP_SMALL_INT_FITS(arg0)) {
253253
//printf("%ld + %ld\n", arg0, arg1);
254254
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0);
255255
}
@@ -264,7 +264,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
264264
// int * int
265265
if (!mp_small_int_mul_overflow(arg0, arg1)) {
266266
arg0 *= arg1;
267-
if (MP_PARSE_FITS_SMALL_INT(arg0)) {
267+
if (MP_SMALL_INT_FITS(arg0)) {
268268
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0);
269269
}
270270
}
@@ -337,7 +337,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
337337
mp_load_method_maybe(elem->value, q_attr, dest);
338338
if (MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == NULL) {
339339
machine_int_t val = MP_OBJ_SMALL_INT_VALUE(dest[0]);
340-
if (MP_PARSE_FITS_SMALL_INT(val)) {
340+
if (MP_SMALL_INT_FITS(val)) {
341341
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, val);
342342
}
343343
}

py/obj.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ typedef struct _mp_obj_base_t mp_obj_base_t;
7272

7373
// These macros check for small int, qstr or object, and access small int and qstr values
7474

75-
// In SMALL_INT, next-to-highest bits is used as sign, so both must match for value in range
76-
#define MP_SMALL_INT_MIN ((mp_small_int_t)(((machine_int_t)WORD_MSBIT_HIGH) >> 1))
77-
#define MP_SMALL_INT_MAX ((mp_small_int_t)(~(MP_SMALL_INT_MIN)))
78-
#define MP_OBJ_FITS_SMALL_INT(n) ((((n) ^ ((n) << 1)) & WORD_MSBIT_HIGH) == 0)
7975
// these macros have now become inline functions; see below
8076
//#define MP_OBJ_IS_SMALL_INT(o) ((((mp_small_int_t)(o)) & 1) != 0)
8177
//#define MP_OBJ_IS_QSTR(o) ((((mp_small_int_t)(o)) & 3) == 2)

py/objint.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "qstr.h"
3636
#include "obj.h"
3737
#include "parsenum.h"
38+
#include "smallint.h"
3839
#include "mpz.h"
3940
#include "objint.h"
4041
#include "runtime0.h"
@@ -251,7 +252,7 @@ mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value) {
251252
}
252253

253254
mp_obj_t mp_obj_new_int(machine_int_t value) {
254-
if (MP_OBJ_FITS_SMALL_INT(value)) {
255+
if (MP_SMALL_INT_FITS(value)) {
255256
return MP_OBJ_NEW_SMALL_INT(value);
256257
}
257258
nlr_raise(mp_obj_new_exception_msg(&mp_type_OverflowError, "small int overflow"));

py/objint_longlong.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "misc.h"
3535
#include "qstr.h"
3636
#include "obj.h"
37+
#include "smallint.h"
3738
#include "mpz.h"
3839
#include "objint.h"
3940
#include "runtime0.h"
@@ -140,7 +141,7 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
140141
}
141142

142143
mp_obj_t mp_obj_new_int(machine_int_t value) {
143-
if (MP_OBJ_FITS_SMALL_INT(value)) {
144+
if (MP_SMALL_INT_FITS(value)) {
144145
return MP_OBJ_NEW_SMALL_INT(value);
145146
}
146147
return mp_obj_new_int_from_ll(value);

py/objint_mpz.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "qstr.h"
3636
#include "parsenumbase.h"
3737
#include "obj.h"
38+
#include "smallint.h"
3839
#include "mpz.h"
3940
#include "objint.h"
4041
#include "runtime0.h"
@@ -239,7 +240,7 @@ mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
239240
}
240241

241242
mp_obj_t mp_obj_new_int(machine_int_t value) {
242-
if (MP_OBJ_FITS_SMALL_INT(value)) {
243+
if (MP_SMALL_INT_FITS(value)) {
243244
return MP_OBJ_NEW_SMALL_INT(value);
244245
}
245246
return mp_obj_new_int_from_ll(value);

py/parse.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "lexer.h"
3737
#include "parsenumbase.h"
3838
#include "parse.h"
39+
#include "smallint.h"
3940

4041
#define RULE_ACT_KIND_MASK (0xf0)
4142
#define RULE_ACT_ARG_MASK (0x0f)
@@ -311,31 +312,32 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
311312
int i = mp_parse_num_base(str, len, &base);
312313
bool overflow = false;
313314
for (; i < len; i++) {
314-
machine_int_t old_val = int_val;
315+
int dig;
315316
if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
316-
int_val = base * int_val + str[i] - '0';
317+
dig = str[i] - '0';
317318
} else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
318-
int_val = base * int_val + str[i] - 'a' + 10;
319+
dig = str[i] - 'a' + 10;
319320
} else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') {
320-
int_val = base * int_val + str[i] - 'A' + 10;
321+
dig = str[i] - 'A' + 10;
321322
} else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') {
322323
dec = true;
323324
break;
324325
} else {
325326
small_int = false;
326327
break;
327328
}
328-
if (int_val < old_val) {
329-
// If new value became less than previous, it's overflow
329+
// add next digi and check for overflow
330+
if (mp_small_int_mul_overflow(int_val, base)) {
330331
overflow = true;
331-
} else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
332-
// If signed number changed sign - it's overflow
332+
}
333+
int_val = int_val * base + dig;
334+
if (!MP_SMALL_INT_FITS(int_val)) {
333335
overflow = true;
334336
}
335337
}
336338
if (dec) {
337339
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
338-
} else if (small_int && !overflow && MP_PARSE_FITS_SMALL_INT(int_val)) {
340+
} else if (small_int && !overflow && MP_SMALL_INT_FITS(int_val)) {
339341
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
340342
} else {
341343
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));

py/parse.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,6 @@ struct _mp_lexer_t;
3737
// - xx...x10010: a string of bytes; bits 5 and above are the qstr holding the value
3838
// - xx...x10110: a token; bits 5 and above are mp_token_kind_t
3939

40-
// TODO: these can now be unified with MP_OBJ_FITS_SMALL_INT(x)
41-
// makes sure the top 2 bits of x are all cleared (positive number) or all set (negavite number)
42-
// these macros can probably go somewhere else because they are used more than just in the parser
43-
#define MP_UINT_HIGH_2_BITS (~((~((machine_uint_t)0)) >> 2))
44-
// parser's small ints are different from VM small int
45-
#define MP_PARSE_FITS_SMALL_INT(x) (((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == 0) || ((((machine_uint_t)(x)) & MP_UINT_HIGH_2_BITS) == MP_UINT_HIGH_2_BITS))
46-
4740
#define MP_PARSE_NODE_NULL (0)
4841
#define MP_PARSE_NODE_SMALL_INT (0x1)
4942
#define MP_PARSE_NODE_ID (0x02)

py/parsenum.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "obj.h"
3535
#include "parsenumbase.h"
3636
#include "parsenum.h"
37+
#include "smallint.h"
3738

3839
#if MICROPY_ENABLE_FLOAT
3940
#include <math.h>
@@ -70,16 +71,16 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
7071
machine_int_t int_val = 0;
7172
const char *restrict str_val_start = str;
7273
for (; str < top; str++) {
73-
machine_int_t old_val = int_val;
74+
// get next digit as a value
7475
int dig = *str;
7576
if (unichar_isdigit(dig) && dig - '0' < base) {
7677
// 0-9 digit
77-
int_val = base * int_val + dig - '0';
78+
dig = dig - '0';
7879
} else if (base == 16) {
7980
dig |= 0x20;
8081
if ('a' <= dig && dig <= 'f') {
8182
// a-f hex digit
82-
int_val = base * int_val + dig - 'a' + 10;
83+
dig = dig - 'a' + 10;
8384
} else {
8485
// unknown character
8586
break;
@@ -88,11 +89,13 @@ mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
8889
// unknown character
8990
break;
9091
}
91-
if (int_val < old_val) {
92-
// If new value became less than previous, it's overflow
92+
93+
// add next digi and check for overflow
94+
if (mp_small_int_mul_overflow(int_val, base)) {
9395
goto overflow;
94-
} else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
95-
// If signed number changed sign - it's overflow
96+
}
97+
int_val = int_val * base + dig;
98+
if (!MP_SMALL_INT_FITS(int_val)) {
9699
goto overflow;
97100
}
98101
}

py/runtime.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ mp_obj_t mp_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
413413
goto unsupported_op;
414414
}
415415
// TODO: We just should make mp_obj_new_int() inline and use that
416-
if (MP_OBJ_FITS_SMALL_INT(lhs_val)) {
416+
if (MP_SMALL_INT_FITS(lhs_val)) {
417417
return MP_OBJ_NEW_SMALL_INT(lhs_val);
418418
} else {
419419
return mp_obj_new_int(lhs_val);

py/smallint.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "mpconfig.h"
2929
#include "qstr.h"
3030
#include "obj.h"
31+
#include "smallint.h"
3132

3233
bool mp_small_int_mul_overflow(machine_int_t x, machine_int_t y) {
3334
// Check for multiply overflow; see CERT INT32-C

0 commit comments

Comments
 (0)