Skip to content

Commit a6473f9

Browse files
committed
Issues #15169, #14599: Make PyImport_ExecCodeModuleWithPathnames() use
Lib/imp.py for imp.source_from_cache() instead of its own C version. Also change PyImport_ExecCodeModuleObject() to not infer the source path from the bytecode path like PyImport_ExecCodeModuleWithPathnames() does. This makes the function less magical. This also has the side-effect of removing all uses of MAXPATHLEN in Python/import.c which can cause failures on really long filenames.
1 parent d104eef commit a6473f9

6 files changed

Lines changed: 4044 additions & 4023 deletions

File tree

Doc/c-api/import.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,14 @@ Importing Modules
163163
.. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)
164164
165165
Like :c:func:`PyImport_ExecCodeModuleObject`, but *name*, *pathname* and
166-
*cpathname* are UTF-8 encoded strings.
166+
*cpathname* are UTF-8 encoded strings. Attempts are also made to figure out
167+
what the value for *pathname* should be from *cpathname* if the former is
168+
set to ``NULL``.
167169
168170
.. versionadded:: 3.2
171+
.. versionchanged:: 3.3
172+
Uses :func:`imp.source_from_cache()` in calculating the source path if
173+
only the bytecode path is provided.
169174
170175
171176
.. c:function:: long PyImport_GetMagicNumber()

Lib/imp.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# Directly exposed by this module
1515
from importlib._bootstrap import new_module
16-
from importlib._bootstrap import cache_from_source
16+
from importlib._bootstrap import cache_from_source, source_from_cache
1717

1818

1919
from importlib import _bootstrap
@@ -58,29 +58,6 @@ def get_suffixes():
5858
return extensions + source + bytecode
5959

6060

61-
def source_from_cache(path):
62-
"""Given the path to a .pyc./.pyo file, return the path to its .py file.
63-
64-
The .pyc/.pyo file does not need to exist; this simply returns the path to
65-
the .py file calculated to correspond to the .pyc/.pyo file. If path does
66-
not conform to PEP 3147 format, ValueError will be raised. If
67-
sys.implementation.cache_tag is None then NotImplementedError is raised.
68-
69-
"""
70-
if sys.implementation.cache_tag is None:
71-
raise NotImplementedError('sys.implementation.cache_tag is None')
72-
head, pycache_filename = os.path.split(path)
73-
head, pycache = os.path.split(head)
74-
if pycache != _bootstrap._PYCACHE:
75-
raise ValueError('{} not bottom-level directory in '
76-
'{!r}'.format(_bootstrap._PYCACHE, path))
77-
if pycache_filename.count('.') != 2:
78-
raise ValueError('expected only 2 dots in '
79-
'{!r}'.format(pycache_filename))
80-
base_filename = pycache_filename.partition('.')[0]
81-
return os.path.join(head, base_filename + machinery.SOURCE_SUFFIXES[0])
82-
83-
8461
class NullImporter:
8562

8663
"""Null import object."""

Lib/importlib/_bootstrap.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,50 @@ def cache_from_source(path, debug_override=None):
428428
return _path_join(head, _PYCACHE, filename)
429429

430430

431+
def source_from_cache(path):
432+
"""Given the path to a .pyc./.pyo file, return the path to its .py file.
433+
434+
The .pyc/.pyo file does not need to exist; this simply returns the path to
435+
the .py file calculated to correspond to the .pyc/.pyo file. If path does
436+
not conform to PEP 3147 format, ValueError will be raised. If
437+
sys.implementation.cache_tag is None then NotImplementedError is raised.
438+
439+
"""
440+
if sys.implementation.cache_tag is None:
441+
raise NotImplementedError('sys.implementation.cache_tag is None')
442+
head, pycache_filename = _path_split(path)
443+
head, pycache = _path_split(head)
444+
if pycache != _PYCACHE:
445+
raise ValueError('{} not bottom-level directory in '
446+
'{!r}'.format(_PYCACHE, path))
447+
if pycache_filename.count('.') != 2:
448+
raise ValueError('expected only 2 dots in '
449+
'{!r}'.format(pycache_filename))
450+
base_filename = pycache_filename.partition('.')[0]
451+
return _path_join(head, base_filename + SOURCE_SUFFIXES[0])
452+
453+
454+
def _get_sourcefile(bytecode_path):
455+
"""Convert a bytecode file path to a source path (if possible).
456+
457+
This function exists purely for backwards-compatibility for
458+
PyImport_ExecCodeModuleWithFilenames() in the C API.
459+
460+
"""
461+
if len(bytecode_path) == 0:
462+
return None
463+
rest, _, extension = bytecode_path.rparition('.')
464+
if not rest or extension.lower()[-3:-1] != '.py':
465+
return bytecode_path
466+
467+
try:
468+
source_path = source_from_cache(bytecode_path)
469+
except (NotImplementedError, ValueError):
470+
source_path = bytcode_path[-1:]
471+
472+
return source_path if _path_isfile(source_stats) else bytecode_path
473+
474+
431475
def _verbose_message(message, *args):
432476
"""Print the message to stderr if -v/PYTHONVERBOSE is turned on."""
433477
if sys.flags.verbose:

Misc/NEWS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,15 @@ Library
9292
- Issue 10924: Fixed mksalt() to use a RNG that is suitable for cryptographic
9393
purpose.
9494

95+
C API
96+
-----
97+
98+
- Issues #15169, #14599: Strip out the C implementation of
99+
imp.source_from_cache() used by PyImport_ExecCodeModuleWithPathnames() and
100+
used the Python code instead. Leads to PyImport_ExecCodeModuleObject() to not
101+
try to infer the source path from the bytecode path as
102+
PyImport_ExecCodeModuleWithPathnames() does.
103+
95104
Extension Modules
96105
-----------------
97106

Python/import.c

Lines changed: 29 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -630,8 +630,6 @@ remove_module(PyObject *name)
630630
"sys.modules failed");
631631
}
632632

633-
static PyObject * get_sourcefile(PyObject *filename);
634-
static PyObject *make_source_pathname(PyObject *pathname);
635633

636634
/* Execute a code object in a module and return the module object
637635
* WITH INCREMENTED REFERENCE COUNT. If an error occurs, name is
@@ -668,18 +666,37 @@ PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname,
668666
if (nameobj == NULL)
669667
return NULL;
670668

671-
if (pathname != NULL) {
672-
pathobj = PyUnicode_DecodeFSDefault(pathname);
673-
if (pathobj == NULL)
674-
goto error;
675-
} else
676-
pathobj = NULL;
677669
if (cpathname != NULL) {
678670
cpathobj = PyUnicode_DecodeFSDefault(cpathname);
679671
if (cpathobj == NULL)
680672
goto error;
681-
} else
673+
}
674+
else
682675
cpathobj = NULL;
676+
677+
if (pathname != NULL) {
678+
pathobj = PyUnicode_DecodeFSDefault(pathname);
679+
if (pathobj == NULL)
680+
goto error;
681+
}
682+
else if (cpathobj != NULL) {
683+
PyInterpreterState *interp = PyThreadState_GET()->interp;
684+
_Py_IDENTIFIER(_get_sourcefile);
685+
686+
if (interp == NULL) {
687+
Py_FatalError("PyImport_ExecCodeModuleWithPathnames: "
688+
"no interpreter!");
689+
}
690+
691+
pathobj = _PyObject_CallMethodObjIdArgs(interp->importlib,
692+
&PyId__get_sourcefile, cpathobj,
693+
NULL);
694+
if (pathobj == NULL)
695+
PyErr_Clear();
696+
}
697+
else
698+
pathobj = NULL;
699+
683700
m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj);
684701
error:
685702
Py_DECREF(nameobj);
@@ -706,18 +723,13 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
706723
PyEval_GetBuiltins()) != 0)
707724
goto error;
708725
}
709-
/* Remember the filename as the __file__ attribute */
710726
if (pathname != NULL) {
711-
v = get_sourcefile(pathname);
712-
if (v == NULL)
713-
PyErr_Clear();
727+
v = pathname;
714728
}
715-
else
716-
v = NULL;
717-
if (v == NULL) {
729+
else {
718730
v = ((PyCodeObject *)co)->co_filename;
719-
Py_INCREF(v);
720731
}
732+
Py_INCREF(v);
721733
if (PyDict_SetItemString(d, "__file__", v) != 0)
722734
PyErr_Clear(); /* Not important enough to report */
723735
Py_DECREF(v);
@@ -752,100 +764,6 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
752764
}
753765

754766

755-
/* Like rightmost_sep, but operate on unicode objects. */
756-
static Py_ssize_t
757-
rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end)
758-
{
759-
Py_ssize_t found, i;
760-
Py_UCS4 c;
761-
for (found = -1, i = start; i < end; i++) {
762-
c = PyUnicode_READ_CHAR(o, i);
763-
if (c == SEP
764-
#ifdef ALTSEP
765-
|| c == ALTSEP
766-
#endif
767-
)
768-
{
769-
found = i;
770-
}
771-
}
772-
return found;
773-
}
774-
775-
776-
/* Given a pathname to a Python byte compiled file, return the path to the
777-
source file, if the path matches the PEP 3147 format. This does not check
778-
for any file existence, however, if the pyc file name does not match PEP
779-
3147 style, NULL is returned. buf must be at least as big as pathname;
780-
the resulting path will always be shorter.
781-
782-
(...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
783-
784-
static PyObject*
785-
make_source_pathname(PyObject *path)
786-
{
787-
Py_ssize_t left, right, dot0, dot1, len;
788-
Py_ssize_t i, j;
789-
PyObject *result;
790-
int kind;
791-
void *data;
792-
793-
len = PyUnicode_GET_LENGTH(path);
794-
if (len > MAXPATHLEN)
795-
return NULL;
796-
797-
/* Look back two slashes from the end. In between these two slashes
798-
must be the string __pycache__ or this is not a PEP 3147 style
799-
path. It's possible for there to be only one slash.
800-
*/
801-
right = rightmost_sep_obj(path, 0, len);
802-
if (right == -1)
803-
return NULL;
804-
left = rightmost_sep_obj(path, 0, right);
805-
if (left == -1)
806-
left = 0;
807-
else
808-
left++;
809-
if (right-left != sizeof(CACHEDIR)-1)
810-
return NULL;
811-
for (i = 0; i < sizeof(CACHEDIR)-1; i++)
812-
if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i])
813-
return NULL;
814-
815-
/* Now verify that the path component to the right of the last slash
816-
has two dots in it.
817-
*/
818-
dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1);
819-
if (dot0 < 0)
820-
return NULL;
821-
dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1);
822-
if (dot1 < 0)
823-
return NULL;
824-
/* Too many dots? */
825-
if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1)
826-
return NULL;
827-
828-
/* This is a PEP 3147 path. Start by copying everything from the
829-
start of pathname up to and including the leftmost slash. Then
830-
copy the file's basename, removing the magic tag and adding a .py
831-
suffix.
832-
*/
833-
result = PyUnicode_New(left + (dot0-right) + 2,
834-
PyUnicode_MAX_CHAR_VALUE(path));
835-
if (!result)
836-
return NULL;
837-
kind = PyUnicode_KIND(result);
838-
data = PyUnicode_DATA(result);
839-
PyUnicode_CopyCharacters(result, 0, path, 0, (i = left));
840-
PyUnicode_CopyCharacters(result, left, path, right+1,
841-
(j = dot0-right));
842-
PyUnicode_WRITE(kind, data, i+j, 'p');
843-
PyUnicode_WRITE(kind, data, i+j+1, 'y');
844-
assert(_PyUnicode_CheckConsistency(result, 1));
845-
return result;
846-
}
847-
848-
849767
static void
850768
update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname)
851769
{
@@ -911,61 +829,6 @@ imp_fix_co_filename(PyObject *self, PyObject *args)
911829
}
912830

913831

914-
/* Get source file -> unicode or None
915-
* Returns the path to the py file if available, else the given path
916-
*/
917-
static PyObject *
918-
get_sourcefile(PyObject *filename)
919-
{
920-
Py_ssize_t len;
921-
PyObject *py;
922-
struct stat statbuf;
923-
int err;
924-
void *data;
925-
unsigned int kind;
926-
927-
len = PyUnicode_GET_LENGTH(filename);
928-
if (len == 0)
929-
Py_RETURN_NONE;
930-
931-
/* don't match *.pyc or *.pyo? */
932-
data = PyUnicode_DATA(filename);
933-
kind = PyUnicode_KIND(filename);
934-
if (len < 5
935-
|| PyUnicode_READ(kind, data, len-4) != '.'
936-
|| (PyUnicode_READ(kind, data, len-3) != 'p'
937-
&& PyUnicode_READ(kind, data, len-3) != 'P')
938-
|| (PyUnicode_READ(kind, data, len-2) != 'y'
939-
&& PyUnicode_READ(kind, data, len-2) != 'Y'))
940-
goto unchanged;
941-
942-
/* Start by trying to turn PEP 3147 path into source path. If that
943-
* fails, just chop off the trailing character, i.e. legacy pyc path
944-
* to py.
945-
*/
946-
py = make_source_pathname(filename);
947-
if (py == NULL) {
948-
PyErr_Clear();
949-
py = PyUnicode_Substring(filename, 0, len - 1);
950-
}
951-
if (py == NULL)
952-
goto error;
953-
954-
err = _Py_stat(py, &statbuf);
955-
if (err == -2)
956-
goto error;
957-
if (err == 0 && S_ISREG(statbuf.st_mode))
958-
return py;
959-
Py_DECREF(py);
960-
goto unchanged;
961-
962-
error:
963-
PyErr_Clear();
964-
unchanged:
965-
Py_INCREF(filename);
966-
return filename;
967-
}
968-
969832
/* Forward */
970833
static struct _frozen * find_frozen(PyObject *);
971834

0 commit comments

Comments
 (0)