Skip to content

Commit ef53c04

Browse files
committed
bpo-45848: Allow the parser to get error lines from encoded files
1 parent 3295910 commit ef53c04

5 files changed

Lines changed: 42 additions & 11 deletions

File tree

Include/cpython/pyerrors.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ PyAPI_FUNC(PyObject *) PyErr_ProgramTextObject(
149149
PyObject *filename,
150150
int lineno);
151151

152+
PyAPI_FUNC(PyObject *) _PyErr_ProgramDecodedTextObject(
153+
PyObject *filename,
154+
int lineno,
155+
const char* encoding);
156+
152157
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
153158
PyObject *object,
154159
Py_ssize_t start,

Lib/test/test_exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,19 @@ def test_encodings(self):
23522352
finally:
23532353
unlink(TESTFN)
23542354

2355+
# Check backwards tokenizer errors
2356+
source = '# -*- coding: ascii -*-\n\n(\n'
2357+
try:
2358+
with open(TESTFN, 'w', encoding='ascii') as testfile:
2359+
testfile.write(source)
2360+
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
2361+
err = err.decode('utf-8').splitlines()
2362+
2363+
self.assertEqual(err[-3], ' (')
2364+
self.assertEqual(err[-2], ' ^')
2365+
finally:
2366+
unlink(TESTFN)
2367+
23552368
def test_attributes_new_constructor(self):
23562369
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
23572370
the_exception = SyntaxError("bad bad", args)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Allow the parser to obtain error lines directly from encoded files. Patch by
2+
Pablo Galindo

Parser/pegen.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
480480
goto error;
481481
}
482482

483-
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
484-
// with an arbitrary encoding or otherwise we could get some badly decoded text.
485-
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
486483
if (p->tok->fp_interactive) {
487484
error_line = get_error_line(p, lineno);
488485
}
489-
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
490-
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
486+
else if (p->start_rule == Py_file_input) {
487+
error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
488+
(int) lineno, p->tok->encoding);
491489
}
492490

493491
if (!error_line) {
@@ -498,15 +496,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
498496
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
499497
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
500498
does not physically exist */
501-
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
499+
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
502500

503501
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
504502
Py_ssize_t size = p->tok->inp - p->tok->buf;
505503
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
506504
}
507-
else {
505+
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
508506
error_line = get_error_line(p, lineno);
509507
}
508+
else {
509+
error_line = PyUnicode_FromStringAndSize("", 0);
510+
}
510511
if (!error_line) {
511512
goto error;
512513
}

Python/errors.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,7 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
16921692
functionality in tb_displayline() in traceback.c. */
16931693

16941694
static PyObject *
1695-
err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
1695+
err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
16961696
{
16971697
int i;
16981698
char linebuf[1000];
@@ -1720,7 +1720,11 @@ err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
17201720
fclose(fp);
17211721
if (i == lineno) {
17221722
PyObject *res;
1723-
res = PyUnicode_FromString(linebuf);
1723+
if (encoding != NULL) {
1724+
res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
1725+
} else {
1726+
res = PyUnicode_FromString(linebuf);
1727+
}
17241728
if (res == NULL)
17251729
_PyErr_Clear(tstate);
17261730
return res;
@@ -1746,7 +1750,7 @@ PyErr_ProgramText(const char *filename, int lineno)
17461750
}
17471751

17481752
PyObject *
1749-
PyErr_ProgramTextObject(PyObject *filename, int lineno)
1753+
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
17501754
{
17511755
if (filename == NULL || lineno <= 0) {
17521756
return NULL;
@@ -1758,7 +1762,13 @@ PyErr_ProgramTextObject(PyObject *filename, int lineno)
17581762
_PyErr_Clear(tstate);
17591763
return NULL;
17601764
}
1761-
return err_programtext(tstate, fp, lineno);
1765+
return err_programtext(tstate, fp, lineno, encoding);
1766+
}
1767+
1768+
PyObject *
1769+
PyErr_ProgramTextObject(PyObject *filename, int lineno)
1770+
{
1771+
return _PyErr_ProgramDecodedTextObject(filename, lineno, NULL);
17621772
}
17631773

17641774
#ifdef __cplusplus

0 commit comments

Comments
 (0)