Skip to content

Commit 170d8fe

Browse files
committed
CPython-compatible marshal format
Unify marshal to a single CPython-compatible format. No separate "cpython_marshal" reader — one format for frozen modules, .pyc files, and the Python-level marshal module. ## Encoding changes - ComparisonOperator: `(cmp_index << 5) | mask` matching COMPARE_OP - MakeFunctionFlag: bit-position matching SET_FUNCTION_ATTRIBUTE - Exception table varint: big-endian (matching Python/assemble.c) - Linetable varint: little-endian (unchanged) - Integer: TYPE_INT (i32) / TYPE_LONG (base-2^15 digits) - Code objects: CPython field order (argcount, posonlyargcount, ..., co_localsplusnames, co_localspluskinds, ..., co_exceptiontable) ## Marshal module features - FLAG_REF / TYPE_REF for object deduplication (version >= 3) - allow_code keyword argument on dumps/loads/dump/load - Subclass rejection (int/float/complex/tuple/list/dict/set/frozenset) - Slice serialization (version >= 5) - Buffer protocol fallback for memoryview/array - Recursion depth limit (2000) for both reads and writes - Streaming load (reads one object, seeks file position) - TYPE_INT64, TYPE_FLOAT (text), TYPE_COMPLEX (text) for compat ## Code object roundtrip serialize_code writes co_localsplusnames/co_localspluskinds from split varnames/cellvars/freevars. deserialize_code splits them back. Cell variable DEREF indices are translated between flat (wire) and cell-relative (internal) representations in both directions. ## eval_ord Replace bitwise trick with match for new ComparisonOperator values. ## test_marshal 21 -> 3 expected failures. Remaining: test_bad_reader (IO layer), test_deterministic_sets (PYTHONHASHSEED), testIntern (string interning).
1 parent 2ef77f8 commit 170d8fe

File tree

8 files changed

+1450
-329
lines changed

8 files changed

+1450
-329
lines changed

Lib/test/test_marshal.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def test_ints(self):
4949
self.helper(expected)
5050
n = n >> 1
5151

52-
@unittest.expectedFailure # TODO: RUSTPYTHON
5352
def test_int64(self):
5453
# Simulate int marshaling with TYPE_INT64.
5554
maxint64 = (1 << 63) - 1
@@ -141,7 +140,6 @@ def test_different_filenames(self):
141140
self.assertEqual(co1.co_filename, "f1")
142141
self.assertEqual(co2.co_filename, "f2")
143142

144-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument allow_code
145143
def test_no_allow_code(self):
146144
data = {'a': [({0},)]}
147145
dump = marshal.dumps(data, allow_code=False)
@@ -234,14 +232,12 @@ def test_bytearray(self):
234232
new = marshal.loads(marshal.dumps(b))
235233
self.assertEqual(type(new), bytes)
236234

237-
@unittest.expectedFailure # TODO: RUSTPYTHON
238235
def test_memoryview(self):
239236
b = memoryview(b"abc")
240237
self.helper(b)
241238
new = marshal.loads(marshal.dumps(b))
242239
self.assertEqual(type(new), bytes)
243240

244-
@unittest.expectedFailure # TODO: RUSTPYTHON
245241
def test_array(self):
246242
a = array.array('B', b"abc")
247243
new = marshal.loads(marshal.dumps(a))
@@ -274,7 +270,6 @@ def test_fuzz(self):
274270
except Exception:
275271
pass
276272

277-
@unittest.expectedFailure # TODO: RUSTPYTHON
278273
def test_loads_recursion(self):
279274
def run_tests(N, check):
280275
# (((...None...),),)
@@ -295,7 +290,6 @@ def check(s):
295290
run_tests(2**20, check)
296291

297292
@unittest.skipIf(support.is_android, "TODO: RUSTPYTHON; segfault")
298-
@unittest.expectedFailure # TODO: RUSTPYTHON; segfault
299293
def test_recursion_limit(self):
300294
# Create a deeply nested structure.
301295
head = last = []
@@ -324,7 +318,6 @@ def test_recursion_limit(self):
324318
last.append([0])
325319
self.assertRaises(ValueError, marshal.dumps, head)
326320

327-
@unittest.expectedFailure # TODO: RUSTPYTHON
328321
def test_exact_type_match(self):
329322
# Former bug:
330323
# >>> class Int(int): pass
@@ -348,7 +341,6 @@ def test_invalid_longs(self):
348341
invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00'
349342
self.assertRaises(ValueError, marshal.loads, invalid_string)
350343

351-
@unittest.expectedFailure # TODO: RUSTPYTHON
352344
def test_multiple_dumps_and_loads(self):
353345
# Issue 12291: marshal.load() should be callable multiple times
354346
# with interleaved data written by non-marshal code
@@ -532,66 +524,56 @@ def helper3(self, rsample, recursive=False, simple=False):
532524
else:
533525
self.assertGreaterEqual(len(s2), len(s3))
534526

535-
@unittest.expectedFailure # TODO: RUSTPYTHON
536527
def testInt(self):
537528
intobj = 123321
538529
self.helper(intobj)
539530
self.helper3(intobj, simple=True)
540531

541-
@unittest.expectedFailure # TODO: RUSTPYTHON
542532
def testFloat(self):
543533
floatobj = 1.2345
544534
self.helper(floatobj)
545535
self.helper3(floatobj)
546536

547-
@unittest.expectedFailure # TODO: RUSTPYTHON
548537
def testStr(self):
549538
strobj = "abcde"*3
550539
self.helper(strobj)
551540
self.helper3(strobj)
552541

553-
@unittest.expectedFailure # TODO: RUSTPYTHON
554542
def testBytes(self):
555543
bytesobj = b"abcde"*3
556544
self.helper(bytesobj)
557545
self.helper3(bytesobj)
558546

559-
@unittest.expectedFailure # TODO: RUSTPYTHON
560547
def testList(self):
561548
for obj in self.keys:
562549
listobj = [obj, obj]
563550
self.helper(listobj)
564551
self.helper3(listobj)
565552

566-
@unittest.expectedFailure # TODO: RUSTPYTHON
567553
def testTuple(self):
568554
for obj in self.keys:
569555
tupleobj = (obj, obj)
570556
self.helper(tupleobj)
571557
self.helper3(tupleobj)
572558

573-
@unittest.expectedFailure # TODO: RUSTPYTHON
574559
def testSet(self):
575560
for obj in self.keys:
576561
setobj = {(obj, 1), (obj, 2)}
577562
self.helper(setobj)
578563
self.helper3(setobj)
579564

580-
@unittest.expectedFailure # TODO: RUSTPYTHON
581565
def testFrozenSet(self):
582566
for obj in self.keys:
583567
frozensetobj = frozenset({(obj, 1), (obj, 2)})
584568
self.helper(frozensetobj)
585569
self.helper3(frozensetobj)
586570

587-
@unittest.expectedFailure # TODO: RUSTPYTHON
588571
def testDict(self):
589572
for obj in self.keys:
590573
dictobj = {"hello": obj, "goodbye": obj, obj: "hello"}
591574
self.helper(dictobj)
592575
self.helper3(dictobj)
593576

594-
@unittest.expectedFailure # TODO: RUSTPYTHON
595577
def testModule(self):
596578
with open(__file__, "rb") as f:
597579
code = f.read()
@@ -651,7 +633,6 @@ def testNoIntern(self):
651633
self.assertNotEqual(id(s2), id(s))
652634

653635
class SliceTestCase(unittest.TestCase, HelperMixin):
654-
@unittest.expectedFailure # TODO: RUSTPYTHON; NotImplementedError: TODO: not implemented yet or marshal unsupported type
655636
def test_slice(self):
656637
for obj in (
657638
slice(None), slice(1), slice(1, 2), slice(1, 2, 3),

crates/compiler-core/src/bytecode.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
use crate::{
55
marshal::MarshalError,
6-
varint::{read_varint, read_varint_with_start, write_varint, write_varint_with_start},
6+
varint::{read_varint, read_varint_with_start, write_varint_be, write_varint_with_start},
77
{OneIndexed, SourceLocation},
88
};
99
use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeSet, fmt, string::String, vec::Vec};
@@ -71,9 +71,9 @@ pub fn encode_exception_table(entries: &[ExceptionTableEntry]) -> alloc::boxed::
7171
let depth_lasti = ((entry.depth as u32) << 1) | (entry.push_lasti as u32);
7272

7373
write_varint_with_start(&mut data, entry.start);
74-
write_varint(&mut data, size);
75-
write_varint(&mut data, entry.target);
76-
write_varint(&mut data, depth_lasti);
74+
write_varint_be(&mut data, size);
75+
write_varint_be(&mut data, entry.target);
76+
write_varint_be(&mut data, depth_lasti);
7777
}
7878
data.into_boxed_slice()
7979
}
@@ -204,7 +204,7 @@ impl PyCodeLocationInfoKind {
204204
}
205205
}
206206

207-
pub trait Constant: Sized {
207+
pub trait Constant: Sized + Clone {
208208
type Name: AsRef<str>;
209209

210210
/// Transforms the given Constant to a BorrowedConstant
@@ -559,6 +559,14 @@ impl Deref for CodeUnits {
559559
}
560560

561561
impl CodeUnits {
562+
/// Disable adaptive specialization by setting all counters to unreachable.
563+
/// Used for CPython-compiled bytecode where specialization may not be safe.
564+
pub fn disable_specialization(&self) {
565+
for counter in self.adaptive_counters.iter() {
566+
counter.store(UNREACHABLE_BACKOFF, Ordering::Relaxed);
567+
}
568+
}
569+
562570
/// Replace the opcode at `index` in-place without changing the arg byte.
563571
/// Uses atomic Release store to ensure prior cache writes are visible
564572
/// to threads that subsequently read the new opcode with Acquire.

crates/compiler-core/src/bytecode/oparg.rs

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -382,15 +382,19 @@ oparg_enum!(
382382
);
383383

384384
bitflagset::bitflag! {
385+
/// `SET_FUNCTION_ATTRIBUTE` flags.
386+
/// Bitmask: Defaults=0x01, KwOnly=0x02, Annotations=0x04,
387+
/// Closure=0x08, TypeParams=0x10, Annotate=0x20.
388+
/// Stored as bit position (0-5) by `bitflag!` macro.
385389
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
386390
#[repr(u8)]
387391
pub enum MakeFunctionFlag {
388-
Closure = 0,
389-
Annotations = 1,
390-
KwOnlyDefaults = 2,
391-
Defaults = 3,
392+
Defaults = 0,
393+
KwOnlyDefaults = 1,
394+
Annotations = 2,
395+
Closure = 3,
392396
TypeParams = 4,
393-
/// PEP 649: __annotate__ function closure (instead of __annotations__ dict)
397+
/// PEP 649
394398
Annotate = 5,
395399
}
396400
}
@@ -403,33 +407,85 @@ bitflagset::bitflagset! {
403407
impl TryFrom<u32> for MakeFunctionFlag {
404408
type Error = MarshalError;
405409

410+
/// Decode from bitmask (0x01, 0x02, ...) or bit position (0-5).
406411
fn try_from(value: u32) -> Result<Self, Self::Error> {
407-
Self::try_from(value as u8).map_err(|_| MarshalError::InvalidBytecode)
412+
if let Ok(f) = Self::try_from(value as u8) {
413+
return Ok(f);
414+
}
415+
if value != 0 && value.is_power_of_two() {
416+
let bit_pos = value.trailing_zeros() as u8;
417+
return Self::try_from(bit_pos).map_err(|_| MarshalError::InvalidBytecode);
418+
}
419+
Err(MarshalError::InvalidBytecode)
408420
}
409421
}
410422

411423
impl From<MakeFunctionFlag> for u32 {
412424
fn from(flag: MakeFunctionFlag) -> Self {
425+
// bit position encoding for internal use
413426
flag as u32
414427
}
415428
}
416429

417430
impl OpArgType for MakeFunctionFlag {}
418431

419-
oparg_enum!(
420-
/// The possible comparison operators.
421-
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
422-
pub enum ComparisonOperator {
423-
// be intentional with bits so that we can do eval_ord with just a bitwise and
424-
// bits: | Equal | Greater | Less |
425-
Less = 0b001,
426-
Greater = 0b010,
427-
NotEqual = 0b011,
428-
Equal = 0b100,
429-
LessOrEqual = 0b101,
430-
GreaterOrEqual = 0b110,
432+
/// `COMPARE_OP` arg is `(cmp_index << 5) | mask`. Only the upper
433+
/// 3 bits identify the comparison; the lower 5 bits are an inline
434+
/// cache mask for adaptive specialization.
435+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
436+
pub enum ComparisonOperator {
437+
Less,
438+
LessOrEqual,
439+
Equal,
440+
NotEqual,
441+
Greater,
442+
GreaterOrEqual,
443+
}
444+
445+
impl TryFrom<u8> for ComparisonOperator {
446+
type Error = MarshalError;
447+
fn try_from(value: u8) -> Result<Self, Self::Error> {
448+
Self::try_from(value as u32)
431449
}
432-
);
450+
}
451+
452+
impl TryFrom<u32> for ComparisonOperator {
453+
type Error = MarshalError;
454+
/// Decode from `COMPARE_OP` arg: `(cmp_index << 5) | mask`.
455+
fn try_from(value: u32) -> Result<Self, Self::Error> {
456+
match value >> 5 {
457+
0 => Ok(Self::Less),
458+
1 => Ok(Self::LessOrEqual),
459+
2 => Ok(Self::Equal),
460+
3 => Ok(Self::NotEqual),
461+
4 => Ok(Self::Greater),
462+
5 => Ok(Self::GreaterOrEqual),
463+
_ => Err(MarshalError::InvalidBytecode),
464+
}
465+
}
466+
}
467+
468+
impl From<ComparisonOperator> for u8 {
469+
/// Encode as `cmp_index << 5` (mask bits zero).
470+
fn from(value: ComparisonOperator) -> Self {
471+
match value {
472+
ComparisonOperator::Less => 0,
473+
ComparisonOperator::LessOrEqual => 1 << 5,
474+
ComparisonOperator::Equal => 2 << 5,
475+
ComparisonOperator::NotEqual => 3 << 5,
476+
ComparisonOperator::Greater => 4 << 5,
477+
ComparisonOperator::GreaterOrEqual => 5 << 5,
478+
}
479+
}
480+
}
481+
482+
impl From<ComparisonOperator> for u32 {
483+
fn from(value: ComparisonOperator) -> Self {
484+
Self::from(u8::from(value))
485+
}
486+
}
487+
488+
impl OpArgType for ComparisonOperator {}
433489

434490
oparg_enum!(
435491
/// The possible Binary operators

0 commit comments

Comments
 (0)