Skip to content

Commit 7242396

Browse files
committed
Implement unloading and reparsing bytecode files
1 parent 3b0a9f5 commit 7242396

18 files changed

Lines changed: 555 additions & 159 deletions

File tree

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
2+
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3+
#
4+
# The Universal Permissive License (UPL), Version 1.0
5+
#
6+
# Subject to the condition set forth below, permission is hereby granted to any
7+
# person obtaining a copy of this software, associated documentation and/or
8+
# data (collectively the "Software"), free of charge and under any and all
9+
# copyright rights in the Software, and any and all patent rights owned or
10+
# freely licensable by each licensor hereunder covering either (i) the
11+
# unmodified Software as contributed to or provided by such licensor, or (ii)
12+
# the Larger Works (as defined below), to deal in both
13+
#
14+
# (a) the Software, and
15+
#
16+
# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
17+
# one is included with the Software each a "Larger Work" to which the Software
18+
# is contributed by such licensors),
19+
#
20+
# without restriction, including without limitation the rights to copy, create
21+
# derivative works of, display, perform, and distribute the Software and make,
22+
# use, sell, offer for sale, import, export, have made, and have sold the
23+
# Software and the Larger Work(s), and to sublicense the foregoing rights on
24+
# either these or other terms.
25+
#
26+
# This license is subject to the following condition:
27+
#
28+
# The above copyright notice and either this complete permission notice or at a
29+
# minimum a reference to the UPL must be included in all copies or substantial
30+
# portions of the Software.
31+
#
32+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38+
# SOFTWARE.
39+
40+
import compileall
41+
import contextlib
42+
import os
43+
import re
44+
import socket
45+
import subprocess
46+
import sys
47+
import tempfile
48+
import time
49+
import unittest
50+
from pathlib import Path
51+
52+
SYNC_PREAMBLE = '''
53+
import sys
54+
import socket
55+
56+
with socket.create_connection(('localhost', int(sys.argv[1]))) as sock:
57+
sock.recv(1)
58+
'''
59+
60+
61+
@contextlib.contextmanager
62+
def pyc_reparse(test_content, expect_success=True, python_options=()):
63+
if sys.implementation.name != "graalpy" or not __graalpython__.is_bytecode_dsl_interpreter:
64+
raise unittest.SkipTest("Reparsing tests are only meaningful on bytecode DSL interpreter")
65+
with tempfile.TemporaryDirectory() as tempdir:
66+
tempdir_path = Path(tempdir)
67+
example_module_path = tempdir_path / "example.py"
68+
with open(example_module_path, "w") as f:
69+
f.write(SYNC_PREAMBLE)
70+
f.write(test_content)
71+
# Change mtime of the example module source to the past a bit to avoid mtime resolution issues
72+
os.utime(example_module_path, (time.time() - 1000, time.time() - 1000))
73+
compileall.compile_file(example_module_path, force=True, quiet=True)
74+
pyc_files = list((tempdir_path / '__pycache__').glob('*.pyc'))
75+
assert len(pyc_files) == 1, "Didn't find a .pyc file"
76+
with socket.create_server(('0.0.0.0', 0)) as server:
77+
port = server.getsockname()[1]
78+
env = os.environ.copy()
79+
env['PYTHONPATH'] = str(tempdir_path)
80+
proc = subprocess.Popen(
81+
[sys.executable, *python_options, "-m", "example", str(port)],
82+
env=env,
83+
stdout=subprocess.PIPE,
84+
stderr=subprocess.STDOUT,
85+
text=True,
86+
)
87+
server.settimeout(3.0)
88+
retries = 20
89+
while retries:
90+
try:
91+
with server.accept()[0] as sock:
92+
yield example_module_path, pyc_files[0]
93+
sock.sendall(b"x")
94+
break
95+
except socket.timeout:
96+
assert proc.poll() is None, proc.communicate()[0]
97+
retries -= 1
98+
else:
99+
assert False, "Timed out wating for connection"
100+
out = proc.communicate()[0]
101+
if expect_success:
102+
assert proc.wait() == 0, out
103+
else:
104+
assert proc.wait() == 1 and re.search(r"SystemError:.*--python\.KeepBytecodeInMemory", out), out
105+
106+
107+
TRACING_TEST = '''
108+
import sys
109+
110+
def foo():
111+
a = 42
112+
return a
113+
114+
lines = []
115+
116+
def tracefunc(frame, event, arg):
117+
if event == "line" and frame.f_code is foo.__code__:
118+
lines.append(frame.f_lineno)
119+
return tracefunc
120+
121+
sys.settrace(tracefunc)
122+
assert foo() == 42
123+
firstlineno = foo.__code__.co_firstlineno
124+
assert lines == [firstlineno + 1, firstlineno + 2], "Code didn't trace when expected"
125+
'''
126+
127+
128+
def test_reparse():
129+
with pyc_reparse(TRACING_TEST):
130+
pass
131+
132+
133+
def test_reparse_deleted():
134+
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
135+
pyc_file.unlink()
136+
137+
138+
def test_reparse_truncated():
139+
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
140+
with open(pyc_file, 'r+') as f:
141+
f.truncate()
142+
143+
144+
def test_reparse_truncated_part():
145+
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
146+
with open(pyc_file, 'r+') as f:
147+
f.truncate(30)
148+
149+
150+
def test_reparse_modified():
151+
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
152+
pyc_file.unlink()
153+
with open(example_file, 'w') as f:
154+
f.write(SYNC_PREAMBLE)
155+
f.write(TRACING_TEST.replace('a = 42', 'a = 32'))
156+
compileall.compile_file(example_file, force=True, quiet=True)
157+
assert pyc_file.exists()
158+
159+
160+
def test_reparse_disabled():
161+
with pyc_reparse(TRACING_TEST, python_options=["--python.KeepBytecodeInMemory"], expect_success=True) \
162+
as (example_file, pyc_file):
163+
pyc_file.unlink()
164+
165+
166+
CO_CODE_TEST = '''
167+
def foo():
168+
a = 42
169+
return a
170+
171+
assert foo() == 42
172+
foo.__code__ = foo.__code__.replace(co_code=foo.__code__.co_code)
173+
assert foo() == 42
174+
'''
175+
176+
177+
def test_reparse_co_code():
178+
with pyc_reparse(CO_CODE_TEST):
179+
pass
180+
181+
182+
def test_reparse_co_code_deleted():
183+
with pyc_reparse(CO_CODE_TEST, expect_success=False) as (example_file, pyc_file):
184+
pyc_file.unlink()

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/PythonLanguage.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,16 @@ public final class PythonLanguage extends TruffleLanguage<PythonContext> {
207207
public static final int GRAALVM_MICRO;
208208
public static final String DEV_TAG;
209209

210+
/* Magic number used to mark pyc files */
211+
public static final int MAGIC_NUMBER = 21000 + Compiler.BYTECODE_VERSION * 10;
212+
public static final byte[] MAGIC_NUMBER_BYTES = new byte[4];
213+
214+
static {
215+
PythonUtils.ARRAY_ACCESSOR_LE.putInt(PythonLanguage.MAGIC_NUMBER_BYTES, 0, PythonLanguage.MAGIC_NUMBER);
216+
PythonLanguage.MAGIC_NUMBER_BYTES[2] = '\r';
217+
PythonLanguage.MAGIC_NUMBER_BYTES[3] = '\n';
218+
}
219+
210220
/**
211221
* The version generated at build time is stored in an ASCII-compatible way. Add build time, we
212222
* added the ordinal value of some base character (in this case {@code '!'}) to ensure that we

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,17 @@
4444
import static com.oracle.graal.python.PythonLanguage.GRAALVM_MICRO;
4545
import static com.oracle.graal.python.PythonLanguage.GRAALVM_MINOR;
4646
import static com.oracle.graal.python.PythonLanguage.J_GRAALPYTHON_ID;
47+
import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER;
48+
import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER_BYTES;
4749
import static com.oracle.graal.python.PythonLanguage.RELEASE_LEVEL;
4850
import static com.oracle.graal.python.PythonLanguage.RELEASE_LEVEL_FINAL;
4951
import static com.oracle.graal.python.nodes.BuiltinNames.J_EXTEND;
5052
import static com.oracle.graal.python.nodes.BuiltinNames.J___GRAALPYTHON__;
53+
import static com.oracle.graal.python.nodes.BuiltinNames.T_FORMAT;
54+
import static com.oracle.graal.python.nodes.BuiltinNames.T_MTIME;
5155
import static com.oracle.graal.python.nodes.BuiltinNames.T_SHA3;
56+
import static com.oracle.graal.python.nodes.BuiltinNames.T_SIZE;
57+
import static com.oracle.graal.python.nodes.BuiltinNames.T__IMP;
5258
import static com.oracle.graal.python.nodes.BuiltinNames.T___GRAALPYTHON__;
5359
import static com.oracle.graal.python.nodes.BuiltinNames.T___MAIN__;
5460
import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___NAME__;
@@ -63,6 +69,7 @@
6369
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ImportError;
6470
import static com.oracle.graal.python.runtime.exception.PythonErrorType.SystemError;
6571
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
72+
import static com.oracle.graal.python.util.PythonUtils.ARRAY_ACCESSOR_LE;
6673
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING;
6774
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
6875
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
@@ -128,8 +135,10 @@
128135
import com.oracle.graal.python.builtins.objects.str.StringUtils;
129136
import com.oracle.graal.python.builtins.objects.tuple.PTuple;
130137
import com.oracle.graal.python.lib.OsEnvironGetNode;
138+
import com.oracle.graal.python.lib.PyNumberLongNode;
131139
import com.oracle.graal.python.lib.PyObjectCallMethodObjArgs;
132140
import com.oracle.graal.python.lib.PyObjectGetItem;
141+
import com.oracle.graal.python.lib.PyObjectStrAsTruffleStringNode;
133142
import com.oracle.graal.python.nodes.ErrorMessages;
134143
import com.oracle.graal.python.nodes.PConstructAndRaiseNode;
135144
import com.oracle.graal.python.nodes.PRaiseNode;
@@ -150,6 +159,9 @@
150159
import com.oracle.graal.python.nodes.object.GetClassNode;
151160
import com.oracle.graal.python.nodes.object.GetOrCreateDictNode;
152161
import com.oracle.graal.python.nodes.statement.AbstractImportNode;
162+
import com.oracle.graal.python.nodes.util.CannotCastException;
163+
import com.oracle.graal.python.nodes.util.CastToJavaLongLossyNode;
164+
import com.oracle.graal.python.nodes.util.CastToJavaStringNode;
153165
import com.oracle.graal.python.nodes.util.CastToTruffleStringNode;
154166
import com.oracle.graal.python.nodes.util.ToNativePrimitiveStorageNode;
155167
import com.oracle.graal.python.runtime.ExecutionContext;
@@ -457,6 +469,102 @@ private static Object[] convertToObjectArray(TruffleString[] arr) {
457469
return objectArr;
458470
}
459471

472+
@Builtin(name = "load_bytecode_file", minNumOfPositionalArgs = 3)
473+
@GenerateNodeFactory
474+
abstract static class LoadBytecodeFileNode extends PythonBuiltinNode {
475+
476+
static final TruffleString T_CHECK_HASH_BASED_PYCS = tsLiteral("check_hash_based_pycs");
477+
static final TruffleString T__BOOTSTRAP = tsLiteral("_bootstrap");
478+
public static final TruffleString T__VERBOSE_MESSAGE = tsLiteral("_verbose_message");
479+
public static final TruffleString MESSAGE = tsLiteral("'{} matches {}'");
480+
481+
@Specialization
482+
static Object doit(VirtualFrame frame, Object bytecodePath, Object sourcePath, Object statResult,
483+
@Bind Node inliningTarget,
484+
@Bind PythonContext context,
485+
@Cached("createFor($node)") BoundaryCallData boundaryCallData) {
486+
Object savedState = BoundaryCallContext.enter(frame, boundaryCallData);
487+
try {
488+
return doLoadBytecodeFile(bytecodePath, sourcePath, statResult, inliningTarget, context);
489+
} finally {
490+
BoundaryCallContext.exit(frame, boundaryCallData, savedState);
491+
}
492+
}
493+
494+
@TruffleBoundary
495+
private static Object doLoadBytecodeFile(Object bytecodePath, Object sourcePath, Object statResult, Node inliningTarget, PythonContext context) {
496+
/*
497+
* This builtin is used to load a bytecode file (.pyc) in a way that we can trust that
498+
* it really comes from that file. It enables unloading serialized DSL bytecode from
499+
* memory, so that it can be reparsed later from the same file. It also provides the
500+
* cache key for CallTarget cache in multicontext mode.
501+
*/
502+
try {
503+
// get_data
504+
TruffleString strBytecodePath = PyObjectStrAsTruffleStringNode.executeUncached(bytecodePath);
505+
TruffleFile bytecodeFile = context.getEnv().getPublicTruffleFile(strBytecodePath.toJavaStringUncached());
506+
byte[] bytes = bytecodeFile.readAllBytes();
507+
// _classify_pyc
508+
if (bytes.length < 16 || !Arrays.equals(bytes, 0, 4, MAGIC_NUMBER_BYTES, 0, 4)) {
509+
return PNone.NONE;
510+
}
511+
int flags = ARRAY_ACCESSOR_LE.getInt(bytes, 4);
512+
if ((flags & ~0b11) != 0) {
513+
return PNone.NONE;
514+
}
515+
long cacheKey;
516+
boolean hashBased = (flags & 0b1) != 0;
517+
// Note that mtime-based validation is the default, hashing is opt-in
518+
if (hashBased) {
519+
boolean checkSource = (flags & 0b10) != 0;
520+
cacheKey = ARRAY_ACCESSOR_LE.getLong(bytes, 16);
521+
String checkHashBasedPycs = "";
522+
try {
523+
checkHashBasedPycs = CastToJavaStringNode.getUncached().execute(context.lookupBuiltinModule(T__IMP).getAttribute(T_CHECK_HASH_BASED_PYCS));
524+
} catch (CannotCastException e) {
525+
// ignore
526+
}
527+
if (!checkHashBasedPycs.equals("never") && (checkSource || checkHashBasedPycs.equals("always"))) {
528+
// get_data
529+
TruffleString strSourcePath = PyObjectStrAsTruffleStringNode.executeUncached(sourcePath);
530+
TruffleFile sourceFile = context.getEnv().getPublicTruffleFile(strSourcePath.toJavaStringUncached());
531+
byte[] sourceBytes = sourceFile.readAllBytes();
532+
long sourceHash = ARRAY_ACCESSOR_LE.getLong(ImpModuleBuiltins.SourceHashNode.hashSource(MAGIC_NUMBER, sourceBytes, sourceBytes.length), 0);
533+
// _validate_hash_pyc
534+
if (cacheKey != sourceHash) {
535+
return PNone.NONE;
536+
}
537+
}
538+
} else {
539+
// _validate_timestamp_pyc
540+
Object mTimeObj = PyNumberLongNode.executeUncached(PyObjectGetItem.executeUncached(statResult, T_MTIME));
541+
long mTime = CastToJavaLongLossyNode.executeUncached(mTimeObj);
542+
if (Integer.toUnsignedLong(ARRAY_ACCESSOR_LE.getInt(bytes, 8)) != mTime) {
543+
return PNone.NONE;
544+
}
545+
Object sizeObj = PyObjectGetItem.executeUncached(statResult, T_SIZE);
546+
if (sizeObj != PNone.NONE) {
547+
long size = CastToJavaLongLossyNode.executeUncached(sizeObj);
548+
if (Integer.toUnsignedLong(ARRAY_ACCESSOR_LE.getInt(bytes, 12)) != size) {
549+
return PNone.NONE;
550+
}
551+
}
552+
cacheKey = ARRAY_ACCESSOR_LE.getLong(bytes, 8);
553+
}
554+
if (context.getOption(PythonOptions.VerboseFlag)) {
555+
Object message = PyObjectCallMethodObjArgs.executeUncached(MESSAGE, T_FORMAT, bytecodePath, sourcePath);
556+
CallNode.executeUncached(context.lookupBuiltinModule(T__BOOTSTRAP).getAttribute(T__VERBOSE_MESSAGE), message);
557+
}
558+
return MarshalModuleBuiltins.fromBytecodeFile(context, bytecodeFile, bytes, 16, bytes.length - 16, cacheKey);
559+
} catch (MarshalModuleBuiltins.Marshal.MarshalError me) {
560+
throw PRaiseNode.raiseStatic(inliningTarget, me.type, me.message, me.arguments);
561+
} catch (IOException | SecurityException | UnsupportedOperationException | IllegalArgumentException e) {
562+
LOGGER.fine(() -> PythonUtils.formatJString("Failed to load bytecode file using load_bytecode_file: %s", e));
563+
return PNone.NONE;
564+
}
565+
}
566+
}
567+
460568
@Builtin(name = "read_file", minNumOfPositionalArgs = 1)
461569
@GenerateNodeFactory
462570
public abstract static class ReadFileNode extends PythonUnaryBuiltinNode {

0 commit comments

Comments
 (0)