Skip to content

Commit 7d414a1

Browse files
committed
py: Parse big-int/float/imag constants directly in parser.
Previous to this patch, a big-int, float or imag constant was interned (made into a qstr) and then parsed at runtime to create an object each time it was needed. This is wasteful in RAM and not efficient. Now, these constants are parsed straight away in the parser and turned into objects. This allows constants with large numbers of digits (so addresses issue micropython#1103) and takes us a step closer to micropython#722.
1 parent 5f97aae commit 7d414a1

30 files changed

Lines changed: 174 additions & 211 deletions

extmod/modujson.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,9 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
161161
vstr_add_byte(&vstr, *s);
162162
}
163163
if (flt) {
164-
next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false);
164+
next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL);
165165
} else {
166-
next = mp_parse_num_integer(vstr.buf, vstr.len, 10);
166+
next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
167167
}
168168
break;
169169
}

py/bc0.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@
3434
#define MP_BC_LOAD_CONST_TRUE (0x12)
3535
#define MP_BC_LOAD_CONST_ELLIPSIS (0x13)
3636
#define MP_BC_LOAD_CONST_SMALL_INT (0x14) // signed var-int
37-
#define MP_BC_LOAD_CONST_INT (0x15) // qstr
38-
#define MP_BC_LOAD_CONST_DEC (0x16) // qstr
39-
#define MP_BC_LOAD_CONST_BYTES (0x17) // qstr
40-
#define MP_BC_LOAD_CONST_STRING (0x18) // qstr
41-
#define MP_BC_LOAD_CONST_OBJ (0x09) // ptr; TODO renumber to be in order
42-
#define MP_BC_LOAD_NULL (0x19)
37+
#define MP_BC_LOAD_CONST_BYTES (0x15) // qstr
38+
#define MP_BC_LOAD_CONST_STRING (0x16) // qstr
39+
#define MP_BC_LOAD_CONST_OBJ (0x17) // ptr
40+
#define MP_BC_LOAD_NULL (0x18)
4341

4442
#define MP_BC_LOAD_FAST_N (0x1a) // uint
4543
#define MP_BC_LOAD_DEREF (0x1b) // uint

py/compile.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ typedef enum {
4747
PN_maximum_number_of,
4848
PN_string, // special node for non-interned string
4949
PN_bytes, // special node for non-interned bytes
50+
PN_const_object, // special node for a constant, generic Python object
5051
} pn_kind_t;
5152

5253
#define EMIT(fun) (comp->emit_method_table->fun(comp->emit))
@@ -174,6 +175,7 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
174175
#endif
175176
case PN_string:
176177
case PN_bytes:
178+
case PN_const_object:
177179
return pn;
178180
}
179181

@@ -432,6 +434,9 @@ STATIC bool cpython_c_tuple_is_const(mp_parse_node_t pn) {
432434
if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_bytes)) {
433435
return true;
434436
}
437+
if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_const_object)) {
438+
return true;
439+
}
435440
if (!MP_PARSE_NODE_IS_LEAF(pn)) {
436441
return false;
437442
}
@@ -486,6 +491,12 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst
486491
return;
487492
}
488493

494+
if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_const_object)) {
495+
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
496+
mp_obj_print((mp_obj_t)pns->nodes[0], PRINT_REPR);
497+
return;
498+
}
499+
489500
assert(MP_PARSE_NODE_IS_LEAF(pn));
490501
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
491502
vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn));
@@ -495,8 +506,6 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst
495506
mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
496507
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
497508
case MP_PARSE_NODE_ID: assert(0);
498-
case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break;
499-
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
500509
case MP_PARSE_NODE_STRING:
501510
case MP_PARSE_NODE_BYTES: {
502511
mp_uint_t len;
@@ -2159,7 +2168,8 @@ STATIC void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
21592168
// for non-REPL, evaluate then discard the expression
21602169
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0]))
21612170
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)
2162-
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes)) {
2171+
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes)
2172+
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)) {
21632173
// do nothing with a lonely constant
21642174
} else {
21652175
compile_node(comp, pns->nodes[0]); // just an expression
@@ -2954,6 +2964,10 @@ STATIC void compile_bytes(compiler_t *comp, mp_parse_node_struct_t *pns) {
29542964
}
29552965
}
29562966

2967+
STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
2968+
EMIT_ARG(load_const_obj, (mp_obj_t)pns->nodes[0]);
2969+
}
2970+
29572971
typedef void (*compile_function_t)(compiler_t*, mp_parse_node_struct_t*);
29582972
STATIC compile_function_t compile_function[] = {
29592973
#define nc NULL
@@ -2966,6 +2980,7 @@ STATIC compile_function_t compile_function[] = {
29662980
NULL,
29672981
compile_string,
29682982
compile_bytes,
2983+
compile_const_object,
29692984
};
29702985

29712986
STATIC void compile_node(compiler_t *comp, mp_parse_node_t pn) {
@@ -2978,8 +2993,6 @@ STATIC void compile_node(compiler_t *comp, mp_parse_node_t pn) {
29782993
mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
29792994
switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
29802995
case MP_PARSE_NODE_ID: EMIT_ARG(load_id, arg); break;
2981-
case MP_PARSE_NODE_INTEGER: EMIT_ARG(load_const_int, arg); break;
2982-
case MP_PARSE_NODE_DECIMAL: EMIT_ARG(load_const_dec, arg); break;
29832996
case MP_PARSE_NODE_STRING: EMIT_ARG(load_const_str, arg, false); break;
29842997
case MP_PARSE_NODE_BYTES: EMIT_ARG(load_const_str, arg, true); break;
29852998
case MP_PARSE_NODE_TOKEN: default:

py/emit.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ typedef struct _emit_method_table_t {
7777
void (*import_star)(emit_t *emit);
7878
void (*load_const_tok)(emit_t *emit, mp_token_kind_t tok);
7979
void (*load_const_small_int)(emit_t *emit, mp_int_t arg);
80-
void (*load_const_int)(emit_t *emit, qstr qst);
81-
void (*load_const_dec)(emit_t *emit, qstr qst);
8280
void (*load_const_str)(emit_t *emit, qstr qst, bool bytes);
8381
void (*load_const_obj)(emit_t *emit, void *obj);
8482
void (*load_null)(emit_t *emit);

py/emitbc.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -473,16 +473,6 @@ STATIC void emit_bc_load_const_small_int(emit_t *emit, mp_int_t arg) {
473473
}
474474
}
475475

476-
STATIC void emit_bc_load_const_int(emit_t *emit, qstr qst) {
477-
emit_bc_pre(emit, 1);
478-
emit_write_bytecode_byte_qstr(emit, MP_BC_LOAD_CONST_INT, qst);
479-
}
480-
481-
STATIC void emit_bc_load_const_dec(emit_t *emit, qstr qst) {
482-
emit_bc_pre(emit, 1);
483-
emit_write_bytecode_byte_qstr(emit, MP_BC_LOAD_CONST_DEC, qst);
484-
}
485-
486476
STATIC void emit_bc_load_const_str(emit_t *emit, qstr qst, bool bytes) {
487477
emit_bc_pre(emit, 1);
488478
if (bytes) {
@@ -932,8 +922,6 @@ const emit_method_table_t emit_bc_method_table = {
932922
emit_bc_import_star,
933923
emit_bc_load_const_tok,
934924
emit_bc_load_const_small_int,
935-
emit_bc_load_const_int,
936-
emit_bc_load_const_dec,
937925
emit_bc_load_const_str,
938926
emit_bc_load_const_obj,
939927
emit_bc_load_null,

py/emitcpy.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -171,20 +171,6 @@ STATIC void emit_cpy_load_const_small_int(emit_t *emit, mp_int_t arg) {
171171
}
172172
}
173173

174-
STATIC void emit_cpy_load_const_int(emit_t *emit, qstr qst) {
175-
emit_pre(emit, 1, 3);
176-
if (emit->pass == MP_PASS_EMIT) {
177-
printf("LOAD_CONST %s\n", qstr_str(qst));
178-
}
179-
}
180-
181-
STATIC void emit_cpy_load_const_dec(emit_t *emit, qstr qst) {
182-
emit_pre(emit, 1, 3);
183-
if (emit->pass == MP_PASS_EMIT) {
184-
printf("LOAD_CONST %s\n", qstr_str(qst));
185-
}
186-
}
187-
188174
STATIC void print_quoted_str(qstr qst, bool bytes) {
189175
const char *str = qstr_str(qst);
190176
int len = strlen(str);
@@ -839,8 +825,6 @@ const emit_method_table_t emit_cpython_method_table = {
839825
emit_cpy_import_star,
840826
emit_cpy_load_const_tok,
841827
emit_cpy_load_const_small_int,
842-
emit_cpy_load_const_int,
843-
emit_cpy_load_const_dec,
844828
emit_cpy_load_const_str,
845829
emit_cpy_load_const_obj,
846830
emit_cpy_load_null,

py/emitnative.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,21 +1142,6 @@ STATIC void emit_native_load_const_small_int(emit_t *emit, mp_int_t arg) {
11421142
}
11431143
}
11441144

1145-
STATIC void emit_native_load_const_int(emit_t *emit, qstr qst) {
1146-
DEBUG_printf("load_const_int %s\n", qstr_str(qst));
1147-
// for viper: load integer, check fits in 32 bits
1148-
emit_native_pre(emit);
1149-
emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_INT, qst, REG_ARG_1);
1150-
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
1151-
}
1152-
1153-
STATIC void emit_native_load_const_dec(emit_t *emit, qstr qst) {
1154-
// for viper, a float/complex is just a Python object
1155-
emit_native_pre(emit);
1156-
emit_call_with_imm_arg(emit, MP_F_LOAD_CONST_DEC, qst, REG_ARG_1);
1157-
emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
1158-
}
1159-
11601145
STATIC void emit_native_load_const_str(emit_t *emit, qstr qst, bool bytes) {
11611146
emit_native_pre(emit);
11621147
// TODO: Eventually we want to be able to work with raw pointers in viper to
@@ -2317,8 +2302,6 @@ const emit_method_table_t EXPORT_FUN(method_table) = {
23172302
emit_native_import_star,
23182303
emit_native_load_const_tok,
23192304
emit_native_load_const_small_int,
2320-
emit_native_load_const_int,
2321-
emit_native_load_const_dec,
23222305
emit_native_load_const_str,
23232306
emit_native_load_const_obj,
23242307
emit_native_load_null,

py/emitpass1.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,6 @@ const emit_method_table_t emit_pass1_method_table = {
192192
(void*)emit_pass1_dummy,
193193
(void*)emit_pass1_dummy,
194194
(void*)emit_pass1_dummy,
195-
(void*)emit_pass1_dummy,
196-
(void*)emit_pass1_dummy,
197195
#if MICROPY_PY_BUILTINS_SET
198196
(void*)emit_pass1_dummy,
199197
(void*)emit_pass1_dummy,

py/grammar.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ DEF_RULE(power_dbl_star, c(power_dbl_star), and(2), tok(OP_DBL_STAR), rule(facto
248248
// testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
249249
// trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
250250

251-
DEF_RULE(atom, nc, or(10), tok(NAME), tok(NUMBER), rule(atom_string), tok(ELLIPSIS), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
251+
DEF_RULE(atom, nc, or(11), tok(NAME), tok(INTEGER), tok(FLOAT_OR_IMAG), rule(atom_string), tok(ELLIPSIS), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace))
252252
DEF_RULE(atom_string, c(atom_string), one_or_more, rule(string_or_bytes))
253253
DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES))
254254
DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE))

py/lexer.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ STATIC bool is_following_digit(mp_lexer_t *lex) {
104104
return unichar_isdigit(lex->chr1);
105105
}
106106

107+
STATIC bool is_following_letter(mp_lexer_t *lex) {
108+
return unichar_isalpha(lex->chr1);
109+
}
110+
107111
STATIC bool is_following_odigit(mp_lexer_t *lex) {
108112
return lex->chr1 >= '0' && lex->chr1 <= '7';
109113
}
@@ -540,22 +544,34 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
540544
}
541545

542546
} else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
543-
lex->tok_kind = MP_TOKEN_NUMBER;
547+
bool forced_integer = false;
548+
if (is_char(lex, '.')) {
549+
lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG;
550+
} else {
551+
lex->tok_kind = MP_TOKEN_INTEGER;
552+
if (is_char(lex, '0') && is_following_letter(lex)) {
553+
forced_integer = true;
554+
}
555+
}
544556

545557
// get first char
546558
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
547559
next_char(lex);
548560

549561
// get tail chars
550562
while (!is_end(lex)) {
551-
if (is_char_or(lex, 'e', 'E')) {
563+
if (!forced_integer && is_char_or(lex, 'e', 'E')) {
564+
lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG;
552565
vstr_add_char(&lex->vstr, 'e');
553566
next_char(lex);
554567
if (is_char(lex, '+') || is_char(lex, '-')) {
555568
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
556569
next_char(lex);
557570
}
558-
} else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) {
571+
} else if (is_letter(lex) || is_digit(lex) || is_char(lex, '.')) {
572+
if (is_char_or3(lex, '.', 'j', 'J')) {
573+
lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG;
574+
}
559575
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
560576
next_char(lex);
561577
} else {

0 commit comments

Comments
 (0)