-
-
Notifications
You must be signed in to change notification settings - Fork 34.5k
bpo-43950: Specialize tracebacks for subscripts/binary ops #27037
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
0a825ea
d79d365
26430d4
081738d
045c491
012ae63
825e3b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,10 @@ | |
| #include "pycore_interp.h" // PyInterpreterState.gc | ||
| #include "frameobject.h" // PyFrame_GetBack() | ||
| #include "pycore_frame.h" // _PyFrame_GetCode() | ||
| #include "pycore_pyarena.h" // _PyArena_Free() | ||
| #include "pycore_ast.h" // asdl_seq_* | ||
| #include "pycore_compile.h" // _PyAST_Optimize | ||
| #include "pycore_parser.h" // _PyParser_ASTFromString | ||
| #include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset() | ||
| #include "structmember.h" // PyMemberDef | ||
| #include "osdefs.h" // SEP | ||
|
|
@@ -512,8 +516,136 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i | |
| return err; | ||
| } | ||
|
|
||
| /* AST based Traceback Specialization | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought from the title of the PR that it's related to Mark's bytecode specializations. Are you settled on using this word? Seems a bit overloaded now.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are not relevant at all, but I can see the confusion. Any suggestions on alternative renaming?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's how clang's documentation talks about their carets: https://clang.llvm.org/diagnostics.html So following from there maybe something like |
||
| * | ||
| * When displaying a new traceback line, for certain syntactical constructs | ||
| * (e.g a subscript, an arithmetic operation) we try to create a representation | ||
| * that separates the primary source of error from the rest. | ||
| * | ||
| * Example specialization of BinOp nodes: | ||
| * Traceback (most recent call last): | ||
| * File "/home/isidentical/cpython/cpython/t.py", line 10, in <module> | ||
| * add_values(1, 2, 'x', 3, 4) | ||
| * ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
| * File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values | ||
| * return a + b + c + d + e | ||
| * ~~~~~~^~~ | ||
| * TypeError: 'NoneType' object is not subscriptable | ||
| */ | ||
|
|
||
| #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f')) | ||
|
|
||
| static int | ||
| extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_anchor, int *right_anchor) | ||
| { | ||
| switch (expr->kind) { | ||
| case BinOp_kind: { | ||
| expr_ty left = expr->v.BinOp.left; | ||
| expr_ty right = expr->v.BinOp.right; | ||
| for (int i = left->end_col_offset + 1; i < right->col_offset; i++) { | ||
| if (IS_WHITESPACE(segment_str[i])) { | ||
| continue; | ||
| } | ||
|
|
||
| *left_anchor = i; | ||
| *right_anchor = i + 1; | ||
|
|
||
| // Check whether if this a two-character operator (e.g //) | ||
| if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) { | ||
| ++*right_anchor; | ||
| } | ||
| break; | ||
| } | ||
| return 1; | ||
| } | ||
| case Subscript_kind: { | ||
| *left_anchor = expr->v.Subscript.value->end_col_offset; | ||
| *right_anchor = expr->v.Subscript.slice->end_col_offset + 1; | ||
| return 1; | ||
| } | ||
| default: | ||
| return 0; | ||
| } | ||
| } | ||
|
|
||
| static int | ||
| extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_anchor, int *right_anchor) | ||
| { | ||
| switch (statement->kind) { | ||
| case Expr_kind: { | ||
| return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor); | ||
| } | ||
| default: | ||
| return 0; | ||
| } | ||
| } | ||
|
|
||
| static int | ||
| extract_anchors_from_line(PyObject *filename, PyObject *line, | ||
| Py_ssize_t start_offset, Py_ssize_t end_offset, | ||
| int *left_anchor, int *right_anchor) | ||
| { | ||
| int res = -1; | ||
| PyArena *arena = NULL; | ||
| PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset); | ||
| if (!segment) { | ||
| goto done; | ||
| } | ||
|
|
||
| const char *segment_str = PyUnicode_AsUTF8(segment); | ||
| if (!segment) { | ||
| goto done; | ||
| } | ||
|
|
||
| arena = _PyArena_New(); | ||
| if (!arena) { | ||
| goto done; | ||
| } | ||
|
|
||
| PyCompilerFlags flags = _PyCompilerFlags_INIT; | ||
|
|
||
| _PyASTOptimizeState state; | ||
|
isidentical marked this conversation as resolved.
|
||
| state.optimize = _Py_GetConfig()->optimization_level; | ||
| state.ff_features = 0; | ||
|
|
||
| mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input, | ||
| &flags, arena); | ||
| if (!module) { | ||
| goto done; | ||
| } | ||
| if (!_PyAST_Optimize(module, arena, &state)) { | ||
|
isidentical marked this conversation as resolved.
|
||
| goto done; | ||
| } | ||
|
|
||
| assert(module->kind == Module_kind); | ||
| if (asdl_seq_LEN(module->v.Module.body) == 1) { | ||
| stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0); | ||
| res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor); | ||
| } else { | ||
| res = 0; | ||
| } | ||
|
|
||
| done: | ||
| Py_XDECREF(segment); | ||
| if (arena) { | ||
| _PyArena_Free(arena); | ||
| } | ||
| return res; | ||
| } | ||
|
|
||
| #define _TRACEBACK_SOURCE_LINE_INDENT 4 | ||
|
|
||
| static inline int | ||
| ignore_source_errors(void) { | ||
| if (PyErr_Occurred()) { | ||
| if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { | ||
| return -1; | ||
| } | ||
| PyErr_Clear(); | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| static int | ||
| tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno, | ||
| PyFrameObject *frame, PyObject *name) | ||
|
|
@@ -544,7 +676,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen | |
| int start_col_byte_offset; | ||
| int end_col_byte_offset; | ||
| if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset, | ||
| &end_line, &end_col_byte_offset)) { | ||
| &end_line, &end_col_byte_offset)) { | ||
| goto done; | ||
| } | ||
| if (start_line != end_line) { | ||
|
|
@@ -554,29 +686,53 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen | |
| if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { | ||
| goto done; | ||
| } | ||
|
|
||
| // Convert the utf-8 byte offset to the actual character offset so we | ||
| // print the right number of carets. | ||
| Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); | ||
| Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); | ||
| Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset; | ||
| Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset; | ||
|
|
||
| char offset = truncation; | ||
| while (++offset <= start_offset) { | ||
| err = PyFile_WriteString(" ", f); | ||
| if (err < 0) { | ||
| goto done; | ||
| if (source_line) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this guarded by
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @isidentical This still applies, could you please check out what's going on here? |
||
| start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); | ||
| end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); | ||
| } | ||
|
|
||
| const char *primary, *secondary; | ||
| primary = secondary = "^"; | ||
|
|
||
| int left_end_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int); | ||
| int right_start_offset = left_end_offset; | ||
|
|
||
| if (source_line) { | ||
| int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset, | ||
| &left_end_offset, &right_start_offset); | ||
| if (res < 0) { | ||
| err = ignore_source_errors(); | ||
| if (err < 0) { | ||
| goto done; | ||
| } | ||
| } else if (res > 0) { | ||
| primary = "^"; | ||
| secondary = "~"; | ||
| } | ||
| } | ||
| while (++offset <= end_offset + 1) { | ||
| err = PyFile_WriteString("^", f); | ||
| if (err < 0) { | ||
| goto done; | ||
|
|
||
| char offset = truncation; | ||
| while (++offset <= end_offset) { | ||
| if (offset <= start_offset) { | ||
| err = PyFile_WriteString(" ", f); | ||
| } else if (offset <= left_end_offset + start_offset) { | ||
| err = PyFile_WriteString(secondary, f); | ||
| } else if (offset <= right_start_offset + start_offset) { | ||
| err = PyFile_WriteString(primary, f); | ||
| } else { | ||
| err = PyFile_WriteString(secondary, f); | ||
| } | ||
| } | ||
| err = PyFile_WriteString("\n", f); | ||
| } | ||
|
|
||
| else { | ||
|
isidentical marked this conversation as resolved.
Outdated
|
||
| PyErr_Clear(); | ||
| err = ignore_source_errors(); | ||
| } | ||
|
|
||
| done: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.