Skip to content

Commit 22b2265

Browse files
committed
py/parse: Improve constant folding to operate on small and big ints.
Constant folding in the parser can now operate on big ints, whatever their representation. This is now possible because the parser can create parse nodes holding arbitrary objects. For the case of small ints the folding is still efficient in RAM because the folded small int is stored inplace in the parse node. Adds 48 bytes to code size on Thumb2 architecture. Helps reduce heap usage because more constants can be computed at compile time, leading to a smaller parse tree, and most importantly means that the constants don't have to be computed at runtime (perhaps more than once). Parser will now be a little slower when folding due to calls to runtime to do the arithmetic.
1 parent d6b31e4 commit 22b2265

File tree

3 files changed

+81
-79
lines changed

3 files changed

+81
-79
lines changed

py/parse.c

Lines changed: 74 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@
3434
#include "py/lexer.h"
3535
#include "py/parse.h"
3636
#include "py/parsenum.h"
37-
#include "py/smallint.h"
37+
#include "py/runtime0.h"
3838
#include "py/runtime.h"
39+
#include "py/objint.h"
3940
#include "py/builtin.h"
4041

4142
#if MICROPY_ENABLE_COMPILER
@@ -234,6 +235,24 @@ mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg) {
234235
return (mp_parse_node_t)(kind | (arg << 4));
235236
}
236237

238+
bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o) {
239+
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
240+
*o = MP_OBJ_NEW_SMALL_INT(MP_PARSE_NODE_LEAF_SMALL_INT(pn));
241+
return true;
242+
} else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_const_object)) {
243+
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
244+
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
245+
// nodes are 32-bit pointers, but need to extract 64-bit object
246+
*o = (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
247+
#else
248+
*o = (mp_obj_t)pns->nodes[0];
249+
#endif
250+
return MP_OBJ_IS_INT(*o);
251+
} else {
252+
return false;
253+
}
254+
}
255+
237256
int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) {
238257
if (MP_PARSE_NODE_IS_NULL(*pn)) {
239258
*nodes = NULL;
@@ -445,119 +464,94 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
445464
// this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
446465
// it does not do partial folding, eg 1 + 2 + x -> 3 + x
447466

448-
mp_int_t arg0;
467+
mp_obj_t arg0;
449468
if (rule->rule_id == RULE_expr
450469
|| rule->rule_id == RULE_xor_expr
451470
|| rule->rule_id == RULE_and_expr) {
452471
// folding for binary ops: | ^ &
453472
mp_parse_node_t pn = peek_result(parser, num_args - 1);
454-
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
473+
if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
455474
return false;
456475
}
457-
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
476+
mp_binary_op_t op;
477+
if (rule->rule_id == RULE_expr) {
478+
op = MP_BINARY_OP_OR;
479+
} else if (rule->rule_id == RULE_xor_expr) {
480+
op = MP_BINARY_OP_XOR;
481+
} else {
482+
op = MP_BINARY_OP_AND;
483+
}
458484
for (ssize_t i = num_args - 2; i >= 0; --i) {
459485
pn = peek_result(parser, i);
460-
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
486+
mp_obj_t arg1;
487+
if (!mp_parse_node_get_int_maybe(pn, &arg1)) {
461488
return false;
462489
}
463-
mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
464-
if (rule->rule_id == RULE_expr) {
465-
// int | int
466-
arg0 |= arg1;
467-
} else if (rule->rule_id == RULE_xor_expr) {
468-
// int ^ int
469-
arg0 ^= arg1;
470-
} else if (rule->rule_id == RULE_and_expr) {
471-
// int & int
472-
arg0 &= arg1;
473-
}
490+
arg0 = mp_binary_op(op, arg0, arg1);
474491
}
475492
} else if (rule->rule_id == RULE_shift_expr
476493
|| rule->rule_id == RULE_arith_expr
477494
|| rule->rule_id == RULE_term) {
478495
// folding for binary ops: << >> + - * / % //
479496
mp_parse_node_t pn = peek_result(parser, num_args - 1);
480-
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
497+
if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
481498
return false;
482499
}
483-
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
484500
for (ssize_t i = num_args - 2; i >= 1; i -= 2) {
485501
pn = peek_result(parser, i - 1);
486-
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
502+
mp_obj_t arg1;
503+
if (!mp_parse_node_get_int_maybe(pn, &arg1)) {
487504
return false;
488505
}
489-
mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
490506
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
491-
if (tok == MP_TOKEN_OP_DBL_LESS) {
492-
// int << int
493-
if (arg1 >= (mp_int_t)BITS_PER_WORD
494-
|| arg0 > (MP_SMALL_INT_MAX >> arg1)
495-
|| arg0 < (MP_SMALL_INT_MIN >> arg1)) {
496-
return false;
497-
}
498-
arg0 <<= arg1;
499-
} else if (tok == MP_TOKEN_OP_DBL_MORE) {
500-
// int >> int
501-
if (arg1 >= (mp_int_t)BITS_PER_WORD) {
502-
// Shifting to big amounts is underfined behavior
503-
// in C and is CPU-dependent; propagate sign bit.
504-
arg1 = BITS_PER_WORD - 1;
505-
}
506-
arg0 >>= arg1;
507-
} else if (tok == MP_TOKEN_OP_PLUS) {
508-
// int + int
509-
arg0 += arg1;
510-
} else if (tok == MP_TOKEN_OP_MINUS) {
511-
// int - int
512-
arg0 -= arg1;
513-
} else if (tok == MP_TOKEN_OP_STAR) {
514-
// int * int
515-
if (mp_small_int_mul_overflow(arg0, arg1)) {
516-
return false;
517-
}
518-
arg0 *= arg1;
519-
} else if (tok == MP_TOKEN_OP_SLASH) {
520-
// int / int
507+
static const uint8_t token_to_op[] = {
508+
MP_BINARY_OP_ADD,
509+
MP_BINARY_OP_SUBTRACT,
510+
MP_BINARY_OP_MULTIPLY,
511+
255,//MP_BINARY_OP_POWER,
512+
255,//MP_BINARY_OP_TRUE_DIVIDE,
513+
MP_BINARY_OP_FLOOR_DIVIDE,
514+
MP_BINARY_OP_MODULO,
515+
255,//MP_BINARY_OP_LESS
516+
MP_BINARY_OP_LSHIFT,
517+
255,//MP_BINARY_OP_MORE
518+
MP_BINARY_OP_RSHIFT,
519+
};
520+
mp_binary_op_t op = token_to_op[tok - MP_TOKEN_OP_PLUS];
521+
if (op == 255) {
521522
return false;
522-
} else if (tok == MP_TOKEN_OP_PERCENT) {
523-
// int % int
524-
if (arg1 == 0) {
523+
}
524+
int rhs_sign = mp_obj_int_sign(arg1);
525+
if (op <= MP_BINARY_OP_RSHIFT) {
526+
// << and >> can't have negative rhs
527+
if (rhs_sign < 0) {
525528
return false;
526529
}
527-
arg0 = mp_small_int_modulo(arg0, arg1);
528-
} else {
529-
assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be
530-
// int // int
531-
if (arg1 == 0) {
530+
} else if (op >= MP_BINARY_OP_FLOOR_DIVIDE) {
531+
// % and // can't have zero rhs
532+
if (rhs_sign == 0) {
532533
return false;
533534
}
534-
arg0 = mp_small_int_floor_divide(arg0, arg1);
535-
}
536-
if (!MP_SMALL_INT_FITS(arg0)) {
537-
return false;
538535
}
536+
arg0 = mp_binary_op(op, arg0, arg1);
539537
}
540538
} else if (rule->rule_id == RULE_factor_2) {
541539
// folding for unary ops: + - ~
542540
mp_parse_node_t pn = peek_result(parser, 0);
543-
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
541+
if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
544542
return false;
545543
}
546-
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
547544
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
545+
mp_binary_op_t op;
548546
if (tok == MP_TOKEN_OP_PLUS) {
549-
// +int
547+
op = MP_UNARY_OP_POSITIVE;
550548
} else if (tok == MP_TOKEN_OP_MINUS) {
551-
// -int
552-
arg0 = -arg0;
553-
if (!MP_SMALL_INT_FITS(arg0)) {
554-
return false;
555-
}
549+
op = MP_UNARY_OP_NEGATIVE;
556550
} else {
557551
assert(tok == MP_TOKEN_OP_TILDE); // should be
558-
// ~int
559-
arg0 = ~arg0;
552+
op = MP_UNARY_OP_INVERT;
560553
}
554+
arg0 = mp_unary_op(op, arg0);
561555

562556
#if MICROPY_COMP_CONST
563557
} else if (rule->rule_id == RULE_expr_stmt) {
@@ -625,10 +619,10 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
625619
}
626620
mp_obj_t dest[2];
627621
mp_load_method_maybe(elem->value, q_attr, dest);
628-
if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
622+
if (!(MP_OBJ_IS_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
629623
return false;
630624
}
631-
arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]);
625+
arg0 = dest[0];
632626
#endif
633627

634628
} else {
@@ -640,7 +634,12 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
640634
for (size_t i = num_args; i > 0; i--) {
641635
pop_result(parser);
642636
}
643-
push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0));
637+
if (MP_OBJ_IS_SMALL_INT(arg0)) {
638+
push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(arg0)));
639+
} else {
640+
// TODO reuse memory for parse node struct?
641+
push_result_node(parser, make_node_const_object(parser, 0, arg0));
642+
}
644643

645644
return true;
646645
}

py/parse.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#include <stddef.h>
3030
#include <stdint.h>
3131

32-
#include "py/mpconfig.h"
32+
#include "py/obj.h"
3333

3434
struct _mp_lexer_t;
3535

@@ -77,6 +77,7 @@ typedef struct _mp_parse_node_struct_t {
7777
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
7878

7979
mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg);
80+
bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o);
8081
int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes);
8182
void mp_parse_node_print(mp_parse_node_t pn, size_t indent);
8283

tests/misc/non_compliant.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,22 @@
7070
except NotImplementedError:
7171
print('NotImplementedError')
7272

73+
mpz = 1 << 70
74+
7375
# mpz and with both args negative
7476
try:
75-
-(1<<70) & -2
77+
-mpz & -2
7678
except NotImplementedError:
7779
print('NotImplementedError')
7880

7981
# mpz or with args opposite sign
8082
try:
81-
-(1<<70) | 2
83+
-mpz | 2
8284
except NotImplementedError:
8385
print('NotImplementedError')
8486

8587
# mpz xor with args opposite sign
8688
try:
87-
-(1<<70) ^ 2
89+
-mpz ^ 2
8890
except NotImplementedError:
8991
print('NotImplementedError')

0 commit comments

Comments
 (0)