Move byte_offset_to_character_offset to the parser API

python · pablogsal · Jul 4, 2021 · Jul 4, 2021 · Jul 4, 2021 · Jul 4, 2021
commit 58e2f7fc035aec5f777168737b3e0ddbfde985b8
diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h
@@ -87,27 +87,6 @@ PyAPI_FUNC(PyObject*) _PyTraceBack_FromFrame(
     PyObject *tb_next,
     PyFrameObject *frame);
 
-static inline Py_ssize_t
-_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
-{
-    const char *str = PyUnicode_AsUTF8(line);
-    if (!str) {
-        return 0;
-    }
-    Py_ssize_t len = strlen(str);
-    if (col_offset > len + 1) {
-        col_offset = len + 1;
-    }
-    assert(col_offset >= 0);
-    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
-    if (!text) {
-        return 0;
-    }
-    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
-    Py_DECREF(text);
-    return size;
-}
-
 #ifdef __cplusplus
 }
 #endif

@@ -1,6 +1,5 @@
 #include <Python.h>
 #include "pycore_ast.h"           // _PyAST_Validate(),
-#include "pycore_traceback.h"     // _byte_offset_to_character_offset(),
 #include <errcode.h>
 #include "tokenizer.h"
 
@@ -398,6 +397,27 @@ get_error_line(Parser *p, Py_ssize_t lineno)
     return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
 }
 
+Py_ssize_t
+_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
+{
+    const char *str = PyUnicode_AsUTF8(line);
+    if (!str) {
+        return 0;
+    }
+    Py_ssize_t len = strlen(str);
+    if (col_offset > len + 1) {
+        col_offset = len + 1;
+    }
+    assert(col_offset >= 0);
+    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
+    if (!text) {
+        return 0;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
+    Py_DECREF(text);
+    return size;
+}
+
 void *
 _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                     Py_ssize_t lineno, Py_ssize_t col_offset,
@@ -478,9 +498,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
     Py_ssize_t end_col_number = end_col_offset;
 
     if (p->tok->encoding != NULL) {
-        col_number = _byte_offset_to_character_offset(error_line, col_offset);
+        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
         end_col_number = end_col_number > 0 ?
-                         _byte_offset_to_character_offset(error_line, end_col_offset) :
+                         _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset) :
                          end_col_number;
     }
     tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);

@@ -139,6 +139,7 @@ expr_ty _PyPegen_name_token(Parser *p);
 expr_ty _PyPegen_number_token(Parser *p);
 void *_PyPegen_string_token(Parser *p);
 const char *_PyPegen_get_expr_name(expr_ty);
+Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
 void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                           Py_ssize_t lineno, Py_ssize_t col_offset,

diff --git a/Python/traceback.c b/Python/traceback.c
@@ -7,7 +7,7 @@
 #include "pycore_interp.h"        // PyInterpreterState.gc
 #include "frameobject.h"          // PyFrame_GetBack()
 #include "pycore_frame.h"         // _PyFrame_GetCode()
-#include "pycore_traceback.h"     // _byte_offset_to_character_offset()
+#include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 #include "structmember.h"         // PyMemberDef
 #include "osdefs.h"               // SEP
 #ifdef HAVE_FCNTL_H
@@ -556,8 +556,8 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
         }
         // Convert the utf-8 byte offset to the actual character offset so we
         // print the right number of carets.
-        Py_ssize_t start_offset = _byte_offset_to_character_offset(source_line, start_col_byte_offset);
-        Py_ssize_t end_offset = _byte_offset_to_character_offset(source_line, end_col_byte_offset);
+        Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+        Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
 
         char offset = truncation;
         while (++offset <= start_offset) {