Skip to content

Commit 69a818d

Browse files
committed
py: Improve memory management for parser; add lexer error for bad line cont.
1 parent 97eb73c commit 69a818d

3 files changed

Lines changed: 57 additions & 22 deletions

File tree

py/lexer.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,15 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
299299
// backslash (outside string literals) must appear just before a physical newline
300300
next_char(lex);
301301
if (!is_physical_newline(lex)) {
302-
// TODO SyntaxError
303-
assert(0);
302+
// SyntaxError: unexpected character after line continuation character
303+
tok->src_name = lex->name;
304+
tok->src_line = lex->line;
305+
tok->src_column = lex->column;
306+
tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION;
307+
vstr_reset(&lex->vstr);
308+
tok->str = vstr_str(&lex->vstr);
309+
tok->len = 0;
310+
return;
304311
} else {
305312
next_char(lex);
306313
}

py/lexer.h

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,20 @@ typedef enum _mp_token_kind_t {
1010
MP_TOKEN_INVALID,
1111
MP_TOKEN_DEDENT_MISMATCH,
1212
MP_TOKEN_LONELY_STRING_OPEN,
13+
MP_TOKEN_BAD_LINE_CONTINUATION,
1314

14-
MP_TOKEN_NEWLINE, // 4
15-
MP_TOKEN_INDENT, // 5
16-
MP_TOKEN_DEDENT, // 6
15+
MP_TOKEN_NEWLINE, // 5
16+
MP_TOKEN_INDENT, // 6
17+
MP_TOKEN_DEDENT, // 7
1718

18-
MP_TOKEN_NAME, // 7
19+
MP_TOKEN_NAME, // 8
1920
MP_TOKEN_NUMBER,
2021
MP_TOKEN_STRING,
2122
MP_TOKEN_BYTES,
2223

2324
MP_TOKEN_ELLIPSIS,
2425

25-
MP_TOKEN_KW_FALSE, // 12
26+
MP_TOKEN_KW_FALSE, // 13
2627
MP_TOKEN_KW_NONE,
2728
MP_TOKEN_KW_TRUE,
2829
MP_TOKEN_KW_AND,
@@ -31,7 +32,7 @@ typedef enum _mp_token_kind_t {
3132
MP_TOKEN_KW_BREAK,
3233
MP_TOKEN_KW_CLASS,
3334
MP_TOKEN_KW_CONTINUE,
34-
MP_TOKEN_KW_DEF, // 21
35+
MP_TOKEN_KW_DEF, // 22
3536
MP_TOKEN_KW_DEL,
3637
MP_TOKEN_KW_ELIF,
3738
MP_TOKEN_KW_ELSE,
@@ -41,7 +42,7 @@ typedef enum _mp_token_kind_t {
4142
MP_TOKEN_KW_FROM,
4243
MP_TOKEN_KW_GLOBAL,
4344
MP_TOKEN_KW_IF,
44-
MP_TOKEN_KW_IMPORT, // 31
45+
MP_TOKEN_KW_IMPORT, // 32
4546
MP_TOKEN_KW_IN,
4647
MP_TOKEN_KW_IS,
4748
MP_TOKEN_KW_LAMBDA,
@@ -51,12 +52,12 @@ typedef enum _mp_token_kind_t {
5152
MP_TOKEN_KW_PASS,
5253
MP_TOKEN_KW_RAISE,
5354
MP_TOKEN_KW_RETURN,
54-
MP_TOKEN_KW_TRY, // 41
55+
MP_TOKEN_KW_TRY, // 42
5556
MP_TOKEN_KW_WHILE,
5657
MP_TOKEN_KW_WITH,
5758
MP_TOKEN_KW_YIELD,
5859

59-
MP_TOKEN_OP_PLUS, // 45
60+
MP_TOKEN_OP_PLUS, // 46
6061
MP_TOKEN_OP_MINUS,
6162
MP_TOKEN_OP_STAR,
6263
MP_TOKEN_OP_DBL_STAR,
@@ -66,7 +67,7 @@ typedef enum _mp_token_kind_t {
6667
MP_TOKEN_OP_LESS,
6768
MP_TOKEN_OP_DBL_LESS,
6869
MP_TOKEN_OP_MORE,
69-
MP_TOKEN_OP_DBL_MORE, // 55
70+
MP_TOKEN_OP_DBL_MORE, // 56
7071
MP_TOKEN_OP_AMPERSAND,
7172
MP_TOKEN_OP_PIPE,
7273
MP_TOKEN_OP_CARET,
@@ -76,7 +77,7 @@ typedef enum _mp_token_kind_t {
7677
MP_TOKEN_OP_DBL_EQUAL,
7778
MP_TOKEN_OP_NOT_EQUAL,
7879

79-
MP_TOKEN_DEL_PAREN_OPEN, // 64
80+
MP_TOKEN_DEL_PAREN_OPEN, // 65
8081
MP_TOKEN_DEL_PAREN_CLOSE,
8182
MP_TOKEN_DEL_BRACKET_OPEN,
8283
MP_TOKEN_DEL_BRACKET_CLOSE,
@@ -86,7 +87,7 @@ typedef enum _mp_token_kind_t {
8687
MP_TOKEN_DEL_COLON,
8788
MP_TOKEN_DEL_PERIOD,
8889
MP_TOKEN_DEL_SEMICOLON,
89-
MP_TOKEN_DEL_AT, // 74
90+
MP_TOKEN_DEL_AT, // 75
9091
MP_TOKEN_DEL_EQUAL,
9192
MP_TOKEN_DEL_PLUS_EQUAL,
9293
MP_TOKEN_DEL_MINUS_EQUAL,
@@ -96,7 +97,7 @@ typedef enum _mp_token_kind_t {
9697
MP_TOKEN_DEL_PERCENT_EQUAL,
9798
MP_TOKEN_DEL_AMPERSAND_EQUAL,
9899
MP_TOKEN_DEL_PIPE_EQUAL,
99-
MP_TOKEN_DEL_CARET_EQUAL, // 84
100+
MP_TOKEN_DEL_CARET_EQUAL, // 85
100101
MP_TOKEN_DEL_DBL_MORE_EQUAL,
101102
MP_TOKEN_DEL_DBL_LESS_EQUAL,
102103
MP_TOKEN_DEL_DBL_STAR_EQUAL,

py/parse.c

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ typedef struct _parser_t {
8888
uint rule_stack_top;
8989
rule_stack_t *rule_stack;
9090

91+
uint result_stack_alloc;
9192
uint result_stack_top;
9293
mp_parse_node_t *result_stack;
9394
} parser_t;
@@ -121,7 +122,7 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
121122

122123
int num_parse_nodes_allocated = 0;
123124
mp_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) {
124-
mp_parse_node_struct_t *pn = m_malloc(sizeof(mp_parse_node_struct_t) + num_args * sizeof(mp_parse_node_t));
125+
mp_parse_node_struct_t *pn = m_new_obj_var(mp_parse_node_struct_t, mp_parse_node_t, num_args);
125126
pn->source = 0; // TODO
126127
pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8);
127128
num_parse_nodes_allocated += 1;
@@ -180,6 +181,10 @@ static mp_parse_node_t peek_result(parser_t *parser, int pos) {
180181
}
181182

182183
static void push_result_node(parser_t *parser, mp_parse_node_t pn) {
184+
if (parser->result_stack_top >= parser->result_stack_alloc) {
185+
parser->result_stack = m_renew(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc * 2);
186+
parser->result_stack_alloc *= 2;
187+
}
183188
parser->result_stack[parser->result_stack_top++] = pn;
184189
}
185190

@@ -252,14 +257,20 @@ static void push_result_rule(parser_t *parser, const rule_t *rule, int num_args)
252257
}
253258

254259
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
255-
parser_t *parser = m_new(parser_t, 1);
260+
261+
// allocate memory for the parser and its stacks
262+
263+
parser_t *parser = m_new_obj(parser_t);
264+
256265
parser->rule_stack_alloc = 64;
257266
parser->rule_stack_top = 0;
258267
parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);
259268

260-
parser->result_stack = m_new(mp_parse_node_t, 1000);
269+
parser->result_stack_alloc = 64;
261270
parser->result_stack_top = 0;
271+
parser->result_stack = m_new(mp_parse_node_t, parser->result_stack_alloc);
262272

273+
// work out the top-level rule to use, and push it on the stack
263274
int top_level_rule;
264275
switch (input_kind) {
265276
case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
@@ -268,6 +279,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
268279
}
269280
push_rule(parser, rules[top_level_rule], 0);
270281

282+
// parse!
283+
271284
uint n, i;
272285
bool backtrack = false;
273286
const rule_t *rule;
@@ -558,12 +571,25 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
558571

559572
//printf("--------------\n");
560573
//result_stack_show(parser);
561-
assert(parser->result_stack_top == 1);
562-
//printf("maximum depth: %d\n", parser->rule_stack_alloc);
574+
//printf("rule stack alloc: %d\n", parser->rule_stack_alloc);
575+
//printf("result stack alloc: %d\n", parser->result_stack_alloc);
563576
//printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
564-
return parser->result_stack[0];
577+
578+
// get the root parse node that we created
579+
assert(parser->result_stack_top == 1);
580+
mp_parse_node_t result = parser->result_stack[0];
581+
582+
finished:
583+
// free the memory that we don't need anymore
584+
m_del(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc);
585+
m_del(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc);
586+
m_del_obj(parser_t, parser);
587+
588+
// return the result
589+
return result;
565590

566591
syntax_error:
592+
// TODO these should raise a proper exception
567593
if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
568594
mp_lexer_show_error_pythonic(lex, "IndentationError: unexpected indent");
569595
} else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
@@ -575,5 +601,6 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
575601
#endif
576602
mp_token_show(mp_lexer_cur(lex));
577603
}
578-
return MP_PARSE_NODE_NULL;
604+
result = MP_PARSE_NODE_NULL;
605+
goto finished;
579606
}

0 commit comments

Comments
 (0)