Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(id)
STRUCT_FOR_ID(ident)
STRUCT_FOR_ID(ignore)
STRUCT_FOR_ID(ignore_unmatched_parens)
STRUCT_FOR_ID(imag)
STRUCT_FOR_ID(importlib)
STRUCT_FOR_ID(in_fd)
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,9 @@ def tokenize(readline):

def _tokenize(rl_gen, encoding):
source = b"".join(rl_gen).decode(encoding)
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
for token in _generate_tokens_from_c_tokenizer(source,
extra_tokens=True,
ignore_unmatched_parens=True):
Comment thread
pablogsal marked this conversation as resolved.
Outdated
yield token

def generate_tokens(readline):
Expand Down Expand Up @@ -531,10 +533,12 @@ def error(message, filename=None, location=None):
perror("unexpected error: %s" % err)
raise

def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
def _generate_tokens_from_c_tokenizer(source, extra_tokens=False, ignore_unmatched_parens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
import _tokenize as c_tokenizer
for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
for info in c_tokenizer.TokenizerIter(source,
extra_tokens=extra_tokens,
ignore_unmatched_parens=ignore_unmatched_parens):
yield TokenInfo._make(info)


Expand Down
60 changes: 31 additions & 29 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ tok_new(void)
tok->report_warnings = 1;
tok->tok_extra_tokens = 0;
tok->comment_newline = 0;
tok->ignore_unmatched_parens = 0;
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
tok->tok_mode_stack_index = 0;
tok->tok_report_warnings = 1;
Expand Down Expand Up @@ -2496,41 +2497,42 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
case ')':
case ']':
case '}':
if (!tok->level) {
if (!tok->ignore_unmatched_parens && !tok->level) {
Comment thread
pablogsal marked this conversation as resolved.
Outdated
if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed"));
}
return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c));
}
tok->level--;
int opening = tok->parenstack[tok->level];
if (!((opening == '(' && c == ')') ||
(opening == '[' && c == ']') ||
(opening == '{' && c == '}')))
{
/* If the opening bracket belongs to an f-string's expression
part (e.g. f"{)}") and the closing bracket is an arbitrary
nested expression, then instead of matching a different
syntactical construct with it; we'll throw an unmatched
parentheses error. */
if (INSIDE_FSTRING(tok) && opening == '{') {
assert(current_tok->curly_bracket_depth >= 0);
int previous_bracket = current_tok->curly_bracket_depth - 1;
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c));
if (tok->level > 0) {
tok->level--;
int opening = tok->parenstack[tok->level];
if (!tok->ignore_unmatched_parens && !((opening == '(' && c == ')') ||
(opening == '[' && c == ']') ||
(opening == '{' && c == '}'))) {
/* If the opening bracket belongs to an f-string's expression
part (e.g. f"{)}") and the closing bracket is an arbitrary
nested expression, then instead of matching a different
syntactical construct with it; we'll throw an unmatched
parentheses error. */
if (INSIDE_FSTRING(tok) && opening == '{') {
assert(current_tok->curly_bracket_depth >= 0);
int previous_bracket = current_tok->curly_bracket_depth - 1;
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c));
}
}
if (tok->parenlinenostack[tok->level] != tok->lineno) {
return MAKE_TOKEN(syntaxerror(tok,
"closing parenthesis '%c' does not match "
"opening parenthesis '%c' on line %d",
c, opening, tok->parenlinenostack[tok->level]));
}
else {
return MAKE_TOKEN(syntaxerror(tok,
"closing parenthesis '%c' does not match "
"opening parenthesis '%c'",
c, opening));
}
}
if (tok->parenlinenostack[tok->level] != tok->lineno) {
return MAKE_TOKEN(syntaxerror(tok,
"closing parenthesis '%c' does not match "
"opening parenthesis '%c' on line %d",
c, opening, tok->parenlinenostack[tok->level]));
}
else {
return MAKE_TOKEN(syntaxerror(tok,
"closing parenthesis '%c' does not match "
"opening parenthesis '%c'",
c, opening));
}
}

Expand Down
1 change: 1 addition & 0 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct tok_state {
int tok_report_warnings;
int tok_extra_tokens;
int comment_newline;
int ignore_unmatched_parens;
#ifdef Py_DEBUG
int debug;
#endif
Expand Down
13 changes: 10 additions & 3 deletions Python/Python-tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ _tokenizer.tokenizeriter.__new__ as tokenizeriter_new
source: str
*
extra_tokens: bool
ignore_unmatched_parens: bool
[clinic start generated code]*/

static PyObject *
tokenizeriter_new_impl(PyTypeObject *type, const char *source,
int extra_tokens)
/*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/
int extra_tokens, int ignore_unmatched_parens)
/*[clinic end generated code: output=5437e7bbc30de3f4 input=7f6b22d7c235ffd7]*/
{
tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
if (self == NULL) {
Expand All @@ -64,6 +65,12 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
if (extra_tokens) {
self->tok->tok_extra_tokens = 1;
}
if (ignore_unmatched_parens) {
self->tok->ignore_unmatched_parens = 1;
}
if (ignore_unmatched_parens) {
Comment thread
pablogsal marked this conversation as resolved.
Outdated
self->tok->ignore_unmatched_parens = 1;
}
self->done = 0;
return (PyObject *)self;
}
Expand All @@ -82,7 +89,7 @@ _tokenizer_error(struct tok_state *tok)
msg = "invalid token";
break;
case E_EOF:
if (tok->level) {
if (tok->level > 0) {
PyErr_Format(PyExc_SyntaxError,
"parenthesis '%c' was never closed",
tok->parenstack[tok->level-1]);
Expand Down
21 changes: 13 additions & 8 deletions Python/clinic/Python-tokenize.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.