From 9cb223de0405905cc331e754582d7338fad571a4 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 3 Apr 2026 00:17:52 +0900 Subject: [PATCH 1/6] compare_bytecode.py --- scripts/compare_bytecode.py | 521 ++++++++++++++++++++++++++++++++++++ scripts/dis_dump.py | 383 ++++++++++++++++++++++++++ 2 files changed, 904 insertions(+) create mode 100644 scripts/compare_bytecode.py create mode 100644 scripts/dis_dump.py diff --git a/scripts/compare_bytecode.py b/scripts/compare_bytecode.py new file mode 100644 index 00000000000..532da1bbc33 --- /dev/null +++ b/scripts/compare_bytecode.py @@ -0,0 +1,521 @@ +#!/usr/bin/env python3 +"""Compare bytecode between CPython and RustPython. + +Compiles all Python files under Lib/ with both interpreters and reports +differences in the generated bytecode instructions. + +Usage: + python scripts/compare_bytecode.py + python scripts/compare_bytecode.py --detail + python scripts/compare_bytecode.py --filter "asyncio/*.py" + python scripts/compare_bytecode.py --summary-json report.json +""" + +import argparse +import fnmatch +import json +import os +import random +import subprocess +import sys +import tempfile +from collections import defaultdict + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) +DIS_DUMP = os.path.join(SCRIPT_DIR, "dis_dump.py") +DEFAULT_REPORT = os.path.join(PROJECT_ROOT, "compare_bytecode.report") +def find_rustpython(): + """Locate the RustPython binary, allowing release builds only.""" + if "RUSTPYTHON" in os.environ: + path = os.environ["RUSTPYTHON"] + normalized = os.path.normpath(path) + debug_fragment = os.path.join("target", "debug", "rustpython") + if normalized.endswith(debug_fragment): + raise ValueError( + "RUSTPYTHON must point to a release binary, not target/debug/rustpython" + ) + return path + + path = os.path.join(PROJECT_ROOT, "target", "release", "rustpython") + if os.path.isfile(path) and os.access(path, os.X_OK): + return path + return None + + +def collect_targets(lib_dir, pattern=None): + """Collect Python files to compare, relative to lib_dir.""" + targets = [] + for root, dirs, files in os.walk(lib_dir): + dirs[:] = sorted(d for d in dirs if d != "__pycache__" and not d.startswith(".")) + for fname in sorted(files): + if not fname.endswith(".py"): + continue + fpath = os.path.join(root, fname) + relpath = os.path.relpath(fpath, lib_dir) + if pattern and not fnmatch.fnmatch(relpath, pattern): + continue + targets.append((relpath, fpath)) + return targets + + +def _start_one(interpreter, targets, base_dir): + """Start a single dis_dump.py subprocess.""" + env = os.environ.copy() + if interpreter != sys.executable: + env["RUSTPYTHONPATH"] = base_dir + + files_file = tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", delete=False, dir=PROJECT_ROOT + ) + try: + for _, path in targets: + files_file.write(path) + files_file.write("\n") + files_file.close() + cmd = [ + interpreter, + DIS_DUMP, + "--base-dir", + base_dir, + "--files-from", + files_file.name, + "--progress", + "10", + ] + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=None, # inherit stderr so progress dots appear on terminal + env=env, + cwd=PROJECT_ROOT, + ) + return proc, files_file.name + except Exception: + os.unlink(files_file.name) + raise + + +def _finish_one(proc, files_file): + """Wait for a single dis_dump.py process and return parsed JSON.""" + try: + stdout = proc.communicate(timeout=600)[0] + except subprocess.TimeoutExpired: + proc.kill() + proc.communicate() + print(" Timeout (600s)", file=sys.stderr) + os.unlink(files_file) + return {} + finally: + if os.path.exists(files_file): + os.unlink(files_file) + + if proc.returncode != 0: + print(" Warning: exited with code %d" % proc.returncode, file=sys.stderr) + + content = stdout.decode(errors="replace").strip() + if not content: + return {} + try: + return json.loads(content) + except json.JSONDecodeError as e: + print(" JSON parse error: %s" % e, file=sys.stderr) + return {} + + +def start_dump(interpreter, targets, base_dir, num_workers=1): + """Start dis_dump.py under the given interpreter, split across workers.""" + if num_workers <= 1 or len(targets) <= num_workers: + proc, ff = _start_one(interpreter, targets, base_dir) + return [(proc, ff)] + + chunks = [[] for _ in range(num_workers)] + for i, t in enumerate(targets): + chunks[i % num_workers].append(t) + + return [_start_one(interpreter, chunk, base_dir) for chunk in chunks if chunk] + + +def finish_dump(procs): + """Wait for all dis_dump.py processes and merge results.""" + merged = {} + for proc, files_file in procs: + merged.update(_finish_one(proc, files_file)) + return merged + + +def compare_insts(cp_insts, rp_insts): + """Compare two instruction lists. Returns list of (index, cp, rp) diffs.""" + diffs = [] + for i in range(max(len(cp_insts), len(rp_insts))): + cp = cp_insts[i] if i < len(cp_insts) else None + rp = rp_insts[i] if i < len(rp_insts) else None + if cp != rp: + diffs.append((i, cp, rp)) + return diffs + + +def compare_code(cp_code, rp_code, path=""): + """Recursively compare code objects. Returns [(code_path, diffs)].""" + results = [] + name = cp_code["name"] + full = (path + "/" + name) if path else name + + diffs = compare_insts(cp_code.get("insts", []), rp_code.get("insts", [])) + if diffs: + results.append((full, diffs)) + + cp_ch = cp_code.get("children", []) + rp_ch = rp_code.get("children", []) + + cp_by_name = defaultdict(list) + rp_by_name = defaultdict(list) + for c in cp_ch: + cp_by_name[c["name"]].append(c) + for c in rp_ch: + rp_by_name[c["name"]].append(c) + + all_names = list(dict.fromkeys(c["name"] for c in cp_ch)) + for c in rp_ch: + if c["name"] not in cp_by_name: + all_names.append(c["name"]) + + for name in all_names: + cp_list = cp_by_name.get(name, []) + rp_list = rp_by_name.get(name, []) + for i in range(max(len(cp_list), len(rp_list))): + if i < len(cp_list) and i < len(rp_list): + results.extend(compare_code(cp_list[i], rp_list[i], full)) + elif i < len(cp_list): + results.append((full + "/" + name, [(-1, "extra in CPython", None)])) + else: + results.append((full + "/" + name, [(-1, None, "extra in RustPython")])) + + return results + + +def compare_code_summary(cp_code, rp_code): + """Recursively compare code objects and return summary counts.""" + diff_code_objects = 0 + diff_instructions = compare_insts_count( + cp_code.get("insts", []), rp_code.get("insts", []) + ) + if diff_instructions: + diff_code_objects += 1 + + cp_ch = cp_code.get("children", []) + rp_ch = rp_code.get("children", []) + cp_by_name = defaultdict(list) + rp_by_name = defaultdict(list) + for child in cp_ch: + cp_by_name[child["name"]].append(child) + for child in rp_ch: + rp_by_name[child["name"]].append(child) + + all_names = list(dict.fromkeys(child["name"] for child in cp_ch)) + for child in rp_ch: + if child["name"] not in cp_by_name: + all_names.append(child["name"]) + + for name in all_names: + cp_list = cp_by_name.get(name, []) + rp_list = rp_by_name.get(name, []) + for i in range(max(len(cp_list), len(rp_list))): + if i < len(cp_list) and i < len(rp_list): + child_objects, child_insts = compare_code_summary(cp_list[i], rp_list[i]) + diff_code_objects += child_objects + diff_instructions += child_insts + else: + diff_code_objects += 1 + diff_instructions += 1 + + return diff_code_objects, diff_instructions + + +def compare_insts_count(cp_insts, rp_insts): + """Count mismatched instruction slots without storing the full diff.""" + diff_count = 0 + for i in range(max(len(cp_insts), len(rp_insts))): + cp = cp_insts[i] if i < len(cp_insts) else None + rp = rp_insts[i] if i < len(rp_insts) else None + if cp != rp: + diff_count += 1 + return diff_count + + +def main(): + parser = argparse.ArgumentParser(description="Compare CPython/RustPython bytecode") + parser.add_argument( + "--detail", action="store_true", help="Show per-file instruction differences" + ) + parser.add_argument("--filter", default=None, help="Glob pattern to filter files") + parser.add_argument( + "--max-diffs", + type=int, + default=5, + help="Max diffs shown per code object (default: 5)", + ) + parser.add_argument( + "--summary-json", default=None, help="Write summary as JSON to file" + ) + parser.add_argument( + "--sample", + type=int, + default=None, + help="Compare a random sample of N matching files", + ) + parser.add_argument( + "--seed", + type=int, + default=None, + help="Random seed used with --sample", + ) + parser.add_argument( + "--list-limit", + type=int, + default=10, + help="Max differing files to print in non-detail mode (default: 10)", + ) + parser.add_argument( + "--lib-dir", + default=os.path.join(PROJECT_ROOT, "Lib"), + help="Library directory to compare", + ) + parser.add_argument( + "-j", + "--jobs", + type=int, + default=None, + help="Number of parallel workers per interpreter (default: cpu_count)", + ) + parser.add_argument( + "-o", + "--output", + default=DEFAULT_REPORT, + help="Report output file (default: compare_bytecode.report)", + ) + args = parser.parse_args() + + try: + rp_bin = find_rustpython() + except ValueError as exc: + print("Error: %s" % exc, file=sys.stderr) + sys.exit(1) + if not rp_bin: + print("Error: RustPython binary not found.", file=sys.stderr) + print(" Build with: cargo build --release", file=sys.stderr) + print(" Or set RUSTPYTHON=/path/to/binary", file=sys.stderr) + sys.exit(1) + if not os.path.isfile(DIS_DUMP): + print("Error: disassembler helper not found: %s" % DIS_DUMP, file=sys.stderr) + print(" Expected scripts/dis_dump.py from origin/bytecode-parity", file=sys.stderr) + sys.exit(1) + + targets = collect_targets(args.lib_dir, args.filter) + sample_seed = None + if args.sample is not None: + if args.sample < 1: + print("Error: --sample must be >= 1", file=sys.stderr) + sys.exit(1) + sample_seed = ( + args.seed + if args.seed is not None + else random.SystemRandom().randrange(2**32) + ) + rng = random.Random(sample_seed) + sample_size = min(args.sample, len(targets)) + targets = sorted(rng.sample(targets, sample_size), key=lambda item: item[0]) + if not targets: + print("Error: no Python files matched", file=sys.stderr) + sys.exit(1) + + report_path = args.output + log = lambda *a, **kw: print(*a, file=sys.stderr, **kw) + + log("Report: %s" % os.path.relpath(report_path, PROJECT_ROOT)) + log("Targets: %d file(s)" % len(targets)) + num_workers = args.jobs if args.jobs else os.cpu_count() or 4 + log("Workers: %d per interpreter" % num_workers) + sys.stderr.write("Dumping bytecode ") + sys.stderr.flush() + + cp_procs = start_dump(sys.executable, targets, args.lib_dir, num_workers) + rp_procs = start_dump(rp_bin, targets, args.lib_dir, num_workers) + cp_data = finish_dump(cp_procs) + rp_data = finish_dump(rp_procs) + sys.stderr.write("\n") + if not cp_data: + log("Error: CPython dump produced no data") + sys.exit(1) + + # Phase 2: Compare + all_files = sorted(set(cp_data) | set(rp_data)) + + match = 0 + differ = 0 + cp_err = 0 + rp_err = 0 + both_err = 0 + rp_miss = 0 + + diff_files = [] + rp_error_files = [] + diff_summaries = [] + need_detailed_diffs = args.detail + + for fp in all_files: + cp = cp_data.get(fp) + rp = rp_data.get(fp) + + if rp is None: + rp_miss += 1 + continue + + cp_ok = cp and cp.get("status") == "ok" + rp_ok = rp and rp.get("status") == "ok" + + if not cp_ok and not rp_ok: + both_err += 1 + elif not cp_ok: + cp_err += 1 + elif not rp_ok: + rp_err += 1 + rp_error_files.append((fp, rp.get("error", "?"))) + else: + if need_detailed_diffs: + code_diffs = compare_code(cp["code"], rp["code"]) + if code_diffs: + differ += 1 + diff_files.append((fp, code_diffs)) + else: + match += 1 + else: + diff_code_objects, diff_instructions = compare_code_summary( + cp["code"], rp["code"] + ) + if diff_code_objects: + differ += 1 + diff_summaries.append( + { + "path": fp, + "diff_code_objects": diff_code_objects, + "diff_instructions": diff_instructions, + } + ) + else: + match += 1 + + total = match + differ + cp_err + rp_err + both_err + rp_miss + + def pct(n): + return "%.1f%%" % (100.0 * n / total) if total else "0%" + + # Phase 3: Write report to file + with open(report_path, "w") as out: + p = lambda *a: print(*a, file=out) + + p("CPython: %s (%s)" % (sys.executable, sys.version.split()[0])) + p("RustPython: %s" % rp_bin) + p("Lib: %s" % args.lib_dir) + if sample_seed is not None: + p("Sample: %s file(s), seed=%s" % (len(targets), sample_seed)) + p() + p("=" * 60) + p(" Bytecode Comparison Report") + p("=" * 60) + p() + p(" Total files: %6d" % total) + p(" Match: %6d (%s)" % (match, pct(match))) + p(" Differ: %6d (%s)" % (differ, pct(differ))) + p(" RustPython error: %6d (%s)" % (rp_err, pct(rp_err))) + p(" CPython error: %6d (%s)" % (cp_err, pct(cp_err))) + p(" Both error: %6d (%s)" % (both_err, pct(both_err))) + if rp_miss: + p(" RustPython missing: %6d (%s)" % (rp_miss, pct(rp_miss))) + p() + + if args.detail: + if rp_error_files: + p("-" * 60) + p(" RustPython Compile Errors") + p("-" * 60) + for fp, err in rp_error_files[:50]: + p(" %s: %s" % (fp, err)) + if len(rp_error_files) > 50: + p(" ... and %d more" % (len(rp_error_files) - 50)) + p() + + if diff_files: + p("-" * 60) + p(" Bytecode Differences") + p("-" * 60) + for fp, code_diffs in diff_files: + p() + p(" %s:" % fp) + for code_path, diffs in code_diffs: + shown = min(len(diffs), args.max_diffs) + p(" %s: %d difference(s)" % (code_path, len(diffs))) + for idx, cp_inst, rp_inst in diffs[:shown]: + if idx == -1: + p(" %s" % (cp_inst or rp_inst)) + else: + p(" [%3d] CPython: %s" % (idx, cp_inst)) + p(" RustPython: %s" % rp_inst) + if len(diffs) > shown: + p(" ... and %d more" % (len(diffs) - shown)) + p() + else: + list_limit = 0 if args.summary_json else max(args.list_limit, 0) + if diff_summaries and list_limit: + p("Top differing files (%d shown of %d):" % (min(list_limit, len(diff_summaries)), len(diff_summaries))) + top = sorted( + diff_summaries, + key=lambda item: (item["diff_instructions"], item["diff_code_objects"], item["path"]), + reverse=True, + )[:list_limit] + for item in top: + p( + " %s (%d code objects, %d instruction diffs)" + % ( + item["path"], + item["diff_code_objects"], + item["diff_instructions"], + ) + ) + p() + p("Use --detail to see specific instruction differences.") + p() + + # Summary JSON output + if args.summary_json: + summary = { + "total": total, + "sample": args.sample, + "sample_seed": sample_seed, + "match": match, + "differ": differ, + "rp_error": rp_err, + "cp_error": cp_err, + "both_error": both_err, + "rp_missing": rp_miss, + "match_pct": round(100.0 * match / total, 2) if total else 0, + "diff_files": [fp for fp, _ in diff_files] + if need_detailed_diffs + else [item["path"] for item in diff_summaries], + "top_diff_files": sorted( + diff_summaries, + key=lambda item: (item["diff_instructions"], item["diff_code_objects"], item["path"]), + reverse=True, + )[: min(20, len(diff_summaries))], + "rp_error_files": [fp for fp, _ in rp_error_files], + } + with open(args.summary_json, "w") as f: + json.dump(summary, f, indent=2) + log("Summary JSON: %s" % args.summary_json) + + log("Done: %d match, %d differ, %d errors" % (match, differ, rp_err)) + sys.exit(0 if differ == 0 and rp_err == 0 else 1) + + +if __name__ == "__main__": + main() diff --git a/scripts/dis_dump.py b/scripts/dis_dump.py new file mode 100644 index 00000000000..1e9aeaf5f5c --- /dev/null +++ b/scripts/dis_dump.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 +"""Dump normalized bytecode for Python source files as JSON. + +Designed to produce comparable output across different Python implementations. +Normalizes away implementation-specific details (byte offsets, memory addresses) +while preserving semantic instruction content. + +Usage: + python dis_dump.py Lib/ + python dis_dump.py --base-dir Lib path/to/file.py +""" + +import argparse +import dis +import json +import os +import re +import sys +import types + +# Non-semantic filler instructions to skip +SKIP_OPS = frozenset({"CACHE", "PRECALL", "EXTENDED_ARG"}) + +# Opname normalization: map variant instructions to their base form. +# These variants differ only in optimization hints, not semantics. +_OPNAME_NORMALIZE = { + "LOAD_FAST_BORROW": "LOAD_FAST", + "LOAD_FAST_BORROW_LOAD_FAST_BORROW": "LOAD_FAST_LOAD_FAST", + "LOAD_FAST_CHECK": "LOAD_FAST", + "JUMP_BACKWARD_NO_INTERRUPT": "JUMP_BACKWARD", + "POP_ITER": "POP_TOP", + # Superinstruction normalization: these get decomposed in _extract_instructions + "STORE_FAST_LOAD_FAST_BORROW": "STORE_FAST_LOAD_FAST", +} + +# Jump instruction names (fallback when hasjrel/hasjabs is incomplete) +_JUMP_OPNAMES = frozenset( + { + "JUMP", + "JUMP_FORWARD", + "JUMP_BACKWARD", + "JUMP_BACKWARD_NO_INTERRUPT", + "POP_JUMP_IF_TRUE", + "POP_JUMP_IF_FALSE", + "POP_JUMP_IF_NONE", + "POP_JUMP_IF_NOT_NONE", + "JUMP_IF_TRUE_OR_POP", + "JUMP_IF_FALSE_OR_POP", + "FOR_ITER", + "SEND", + } +) + +_JUMP_OPCODES = None + + +def _jump_opcodes(): + global _JUMP_OPCODES + if _JUMP_OPCODES is None: + _JUMP_OPCODES = set() + if hasattr(dis, "hasjrel"): + _JUMP_OPCODES.update(dis.hasjrel) + if hasattr(dis, "hasjabs"): + _JUMP_OPCODES.update(dis.hasjabs) + return _JUMP_OPCODES + + +def _is_jump(inst): + """Check if an instruction is a jump (by opcode set or name).""" + return inst.opcode in _jump_opcodes() or inst.opname in _JUMP_OPNAMES + + +def _normalize_argrepr(argrepr): + """Strip runtime-specific details from arg repr.""" + if argrepr.startswith(" (CPython 3.14) + # (RustPython) + name = argrepr[len("= 0: + name = name[:idx] + return "" % name.rstrip(">").strip() + # Normalize COMPARE_OP: strip bool(...) wrapper from CPython 3.14 + # e.g. "bool(==)" -> "==", "bool(<)" -> "<" + m = re.match(r"^bool\((.+)\)$", argrepr) + if m: + return m.group(1) + # Remove memory addresses from other reprs + argrepr = re.sub(r" at 0x[0-9a-fA-F]+", "", argrepr) + # Remove LOAD_ATTR/LOAD_SUPER_ATTR suffixes: " + NULL|self", " + NULL" + argrepr = re.sub(r" \+ NULL\|self$", "", argrepr) + argrepr = re.sub(r" \+ NULL$", "", argrepr) + + # Normalize unicode escapes + def _unescape(m): + try: + cp = int(m.group(1), 16) + if 0xD800 <= cp <= 0xDFFF: + return m.group(0) + return chr(cp) + except (ValueError, OverflowError): + return m.group(0) + + argrepr = re.sub(r"\\u([0-9a-fA-F]{4})", _unescape, argrepr) + argrepr = re.sub(r"\\U([0-9a-fA-F]{8})", _unescape, argrepr) + return argrepr + + +_IS_RUSTPYTHON = ( + hasattr(sys, "implementation") and sys.implementation.name == "rustpython" +) + +# RustPython's ComparisonOperator enum values → operator strings +_RP_CMP_OPS = {0: "<", 1: "<", 2: ">", 3: "!=", 4: "==", 5: "<=", 6: ">="} + + +def _resolve_arg_fallback(code, opname, arg): + """Resolve a raw argument to its human-readable form. + + Used when the dis module doesn't populate argrepr (e.g., on RustPython). + """ + if not isinstance(arg, int): + return arg + try: + if "FAST" in opname: + if 0 <= arg < len(code.co_varnames): + return code.co_varnames[arg] + elif opname == "LOAD_CONST": + if 0 <= arg < len(code.co_consts): + return _normalize_argrepr(repr(code.co_consts[arg])) + elif opname in ( + "LOAD_DEREF", + "STORE_DEREF", + "DELETE_DEREF", + "LOAD_CLOSURE", + "MAKE_CELL", + "COPY_FREE_VARS", + ): + # arg is localsplus index: + # 0..nlocals-1 = varnames (parameter cells reuse these slots) + # nlocals.. = non-parameter cells + freevars + nlocals = len(code.co_varnames) + if arg < nlocals: + return code.co_varnames[arg] + varnames_set = set(code.co_varnames) + nonparam_cells = [v for v in code.co_cellvars if v not in varnames_set] + extra = nonparam_cells + list(code.co_freevars) + idx = arg - nlocals + if 0 <= idx < len(extra): + return extra[idx] + elif opname in ( + "LOAD_NAME", + "STORE_NAME", + "DELETE_NAME", + "LOAD_GLOBAL", + "STORE_GLOBAL", + "DELETE_GLOBAL", + "LOAD_ATTR", + "STORE_ATTR", + "DELETE_ATTR", + "IMPORT_NAME", + "IMPORT_FROM", + "LOAD_FROM_DICT_OR_GLOBALS", + ): + if 0 <= arg < len(code.co_names): + return code.co_names[arg] + elif opname == "LOAD_SUPER_ATTR": + name_idx = arg >> 2 + if 0 <= name_idx < len(code.co_names): + return code.co_names[name_idx] + except Exception: + pass + return arg + + +def _extract_instructions(code): + """Extract normalized instruction list from a code object. + + - Filters out CACHE/PRECALL instructions + - Converts jump targets from byte offsets to instruction indices + - Resolves argument names via fallback when argrepr is missing + - Normalizes argument representations + """ + try: + raw = list(dis.get_instructions(code)) + except Exception as e: + return [["ERROR", str(e)]] + + # Build filtered list and offset-to-index mapping + filtered = [] + offset_to_idx = {} + for inst in raw: + if inst.opname in SKIP_OPS: + continue + offset_to_idx[inst.offset] = len(filtered) + filtered.append(inst) + + # Map offsets that land on CACHE slots to the next real instruction + for inst in raw: + if inst.offset not in offset_to_idx: + for fi, finst in enumerate(filtered): + if finst.offset >= inst.offset: + offset_to_idx[inst.offset] = fi + break + + # Superinstruction decomposition: split into constituent parts + # so we compare individual operations regardless of combining. + _SUPER_DECOMPOSE = { + "STORE_FAST_LOAD_FAST": ("STORE_FAST", "LOAD_FAST"), + "STORE_FAST_STORE_FAST": ("STORE_FAST", "STORE_FAST"), + "LOAD_FAST_LOAD_FAST": ("LOAD_FAST", "LOAD_FAST"), + } + + result = [] + for inst in filtered: + opname = _OPNAME_NORMALIZE.get(inst.opname, inst.opname) + + # Decompose superinstructions into individual ops + if opname in _SUPER_DECOMPOSE: + op1, op2 = _SUPER_DECOMPOSE[opname] + if isinstance(inst.arg, int): + idx1 = (inst.arg >> 4) & 0xF + idx2 = inst.arg & 0xF + else: + idx1, idx2 = 0, 0 + name1 = _resolve_arg_fallback(code, op1, idx1) + name2 = _resolve_arg_fallback(code, op2, idx2) + result.append([op1, name1]) + result.append([op2, name2]) + continue + + if _is_jump(inst) and isinstance(inst.argval, int): + target_idx = offset_to_idx.get(inst.argval) + # Detect unresolved argval (RustPython may not resolve jump targets): + # 1. argval not in offset_to_idx (not a valid byte offset) + # 2. argval == arg (raw arg returned as-is, not resolved to offset) + # 3. For backward jumps: argval should be < current offset + is_backward = "BACKWARD" in inst.opname + argval_is_raw = inst.argval == inst.arg and inst.arg is not None + if target_idx is None or argval_is_raw: + target_idx = None # force recalculation + if is_backward: + # Target = current_offset + INSTRUCTION_SIZE + cache_size - arg * INSTRUCTION_SIZE + # Try different cache sizes (NOT_TAKEN=1 for JUMP_BACKWARD, 0 for NO_INTERRUPT) + if "NO_INTERRUPT" in inst.opname: + cache_order = (0, 1, 2) + else: + cache_order = (1, 0, 2, 3) + for cache in cache_order: + target_off = inst.offset + 2 + cache * 2 - inst.arg * 2 + if target_off >= 0 and target_off in offset_to_idx: + target_idx = offset_to_idx[target_off] + break + elif inst.arg is not None: + # Forward jumps: compute target offset using cache entry count. + # POP_JUMP_IF_* have 1 cache entry (NOT_TAKEN), others have 0. + if "POP_JUMP_IF" in inst.opname: + cache_order = (1, 0, 2) + elif inst.opname == "FOR_ITER": + cache_order = (0, 1, 2) + elif inst.opname == "SEND": + cache_order = (1, 0, 2) + else: + cache_order = (0, 1, 2) + for extra in cache_order: + target_off = inst.offset + 2 + extra * 2 + inst.arg * 2 + if target_off in offset_to_idx: + target_idx = offset_to_idx[target_off] + break + if target_idx is None: + target_idx = inst.argval + result.append([opname, "->%d" % target_idx]) + elif inst.opname == "COMPARE_OP": + # Normalize COMPARE_OP across interpreters (different encodings) + if _IS_RUSTPYTHON: + cmp_str = _RP_CMP_OPS.get(inst.arg, inst.argrepr) + else: + cmp_str = ( + _normalize_argrepr(inst.argrepr) if inst.argrepr else str(inst.arg) + ) + result.append([opname, cmp_str]) + elif inst.arg is not None and inst.argrepr: + # If argrepr is just a number, try to resolve it via fallback + # (RustPython may return raw index instead of variable name) + argrepr = inst.argrepr + if argrepr.isdigit() or (argrepr.startswith("-") and argrepr[1:].isdigit()): + resolved = _resolve_arg_fallback(code, opname, inst.arg) + if isinstance(resolved, str) and not resolved.isdigit(): + argrepr = resolved + result.append([opname, _normalize_argrepr(argrepr)]) + elif inst.arg is not None: + resolved = _resolve_arg_fallback(code, opname, inst.arg) + result.append([opname, resolved]) + else: + result.append([opname]) + + return result + + +def _dump_code(code): + """Recursively dump a code object and its nested code objects.""" + name = getattr(code, "co_qualname", None) or code.co_name + children = [_dump_code(c) for c in code.co_consts if isinstance(c, types.CodeType)] + r = {"name": name, "insts": _extract_instructions(code)} + if children: + r["children"] = children + return r + + +def process_file(path): + """Compile a single file and return its bytecode dump.""" + try: + with open(path, "rb") as f: + source = f.read() + code = compile(source, path, "exec") + return {"status": "ok", "code": _dump_code(code)} + except SyntaxError as e: + return {"status": "error", "error": "%s (line %s)" % (e.msg, e.lineno)} + except Exception as e: + return {"status": "error", "error": str(e)} + + +def main(): + parser = argparse.ArgumentParser(description="Dump normalized bytecode as JSON") + parser.add_argument( + "--base-dir", + default=None, + help="Base directory used to compute relative output paths", + ) + parser.add_argument( + "--files-from", + default=None, + help="Read newline-separated target paths from this file", + ) + parser.add_argument("targets", nargs="*", help="Python files or directories to process") + parser.add_argument( + "--progress", + type=int, + default=0, + help="Print a dot to stderr every N files processed", + ) + args = parser.parse_args() + + targets = list(args.targets) + if args.files_from: + with open(args.files_from, encoding="utf-8") as f: + targets.extend(line.strip() for line in f if line.strip()) + + results = {} + count = 0 + for target in targets: + if os.path.isdir(target): + for root, dirs, files in os.walk(target): + dirs[:] = sorted( + d for d in dirs if d != "__pycache__" and not d.startswith(".") + ) + for fname in sorted(files): + if fname.endswith(".py"): + fpath = os.path.join(root, fname) + rel_base = args.base_dir or target + relpath = os.path.relpath(fpath, rel_base) + results[relpath] = process_file(fpath) + count += 1 + if args.progress and count % args.progress == 0: + sys.stderr.write(".") + sys.stderr.flush() + elif target.endswith(".py"): + rel_base = args.base_dir or os.path.dirname(target) or "." + relpath = os.path.relpath(target, rel_base) + results[relpath] = process_file(target) + count += 1 + if args.progress and count % args.progress == 0: + sys.stderr.write(".") + sys.stderr.flush() + + json.dump(results, sys.stdout, ensure_ascii=False, separators=(",", ":")) + + +if __name__ == "__main__": + main() From 10d7ecd9601ba4e6c4d21b841a1e29fb595e5d0c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Fri, 3 Apr 2026 21:43:32 +0900 Subject: [PATCH 2/6] Align except* bytecode chaining with CPython --- crates/codegen/src/compile.rs | 277 +++++++++++++++++++--------- crates/codegen/src/ir.rs | 327 ++++++++++++++++++++++++++++++++-- scripts/dis_dump.py | 0 3 files changed, 503 insertions(+), 101 deletions(-) mode change 100644 => 100755 scripts/dis_dump.py diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index fab31076218..2869af044d9 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -2592,6 +2592,44 @@ impl Compiler { self.switch_to_block(dead); } ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) => { + let direct_name_unpack = (|| { + let [target] = &targets[..] else { + return None; + }; + let target_elts = match target { + ast::Expr::Tuple(ast::ExprTuple { elts, .. }) + | ast::Expr::List(ast::ExprList { elts, .. }) => elts, + _ => return None, + }; + let value_elts = match &**value { + ast::Expr::Tuple(ast::ExprTuple { elts, .. }) + | ast::Expr::List(ast::ExprList { elts, .. }) => elts, + _ => return None, + }; + if target_elts.len() != value_elts.len() + || target_elts + .iter() + .any(|elt| !matches!(elt, ast::Expr::Name(_))) + || target_elts + .iter() + .chain(value_elts.iter()) + .any(|elt| matches!(elt, ast::Expr::Starred(_))) + { + return None; + } + Some((target_elts, value_elts)) + })(); + + if let Some((target_elts, value_elts)) = direct_name_unpack { + for elt in value_elts { + self.compile_expression(elt)?; + } + for target in target_elts.iter().rev() { + self.compile_store(target)?; + } + return Ok(()); + } + self.compile_expression(value)?; for (i, target) in targets.iter().enumerate() { @@ -3136,14 +3174,7 @@ impl Compiler { self.pop_fblock(FBlockType::FinallyEnd); } - // Restore prev_exc as current exception before RERAISE - // Stack: [prev_exc, exc] -> COPY 2 -> [prev_exc, exc, prev_exc] - // POP_EXCEPT pops prev_exc and sets exc_info->exc_value = prev_exc - // Stack after POP_EXCEPT: [prev_exc, exc] - emit!(self, Instruction::Copy { i: 2 }); - emit!(self, Instruction::PopExcept); - - // RERAISE 0: re-raise the original exception to outer handler + // CPython re-raises first and lets the cleanup block restore prev_exc. emit!(self, Instruction::Reraise { depth: 0 }); } @@ -3170,6 +3201,7 @@ impl Compiler { emit!(self, PseudoInstruction::PopBlock); self.pop_fblock(FBlockType::TryExcept); emit!(self, PseudoInstruction::Jump { delta: else_block }); + self.set_no_location(); // except handlers: self.switch_to_block(handler_block); @@ -3408,15 +3440,7 @@ impl Compiler { self.pop_fblock(FBlockType::FinallyEnd); } - // Restore prev_exc as current exception before RERAISE - // Stack: [lasti, prev_exc, exc] -> COPY 2 -> [lasti, prev_exc, exc, prev_exc] - // POP_EXCEPT pops prev_exc and sets exc_info->exc_value = prev_exc - // Stack after POP_EXCEPT: [lasti, prev_exc, exc] - emit!(self, Instruction::Copy { i: 2 }); - emit!(self, Instruction::PopExcept); - - // RERAISE 0: re-raise the original exception to outer handler - // Stack: [lasti, prev_exc, exc] - exception is on top + // CPython re-raises first and lets the cleanup block restore prev_exc. emit!(self, Instruction::Reraise { depth: 0 }); } @@ -3448,8 +3472,12 @@ impl Compiler { ) -> CompileResult<()> { let handler_block = self.new_block(); let cleanup_block = self.new_block(); - let orelse_block = self.new_block(); let end_block = self.new_block(); + let orelse_block = if orelse.is_empty() { + end_block + } else { + self.new_block() + }; emit!(self, Instruction::Nop); emit!( @@ -3596,14 +3624,16 @@ impl Compiler { emit!(self, Instruction::Reraise { depth: 1 }); self.set_no_location(); - self.switch_to_block(orelse_block); - self.set_no_location(); - self.compile_statements(orelse)?; - emit!( - self, - PseudoInstruction::JumpNoInterrupt { delta: end_block } - ); - self.set_no_location(); + if !orelse.is_empty() { + self.switch_to_block(orelse_block); + self.set_no_location(); + self.compile_statements(orelse)?; + emit!( + self, + PseudoInstruction::JumpNoInterrupt { delta: end_block } + ); + self.set_no_location(); + } self.switch_to_block(end_block); Ok(()) @@ -3620,7 +3650,7 @@ impl Compiler { // Stack layout during handler processing: [prev_exc, orig, list, rest] let handler_block = self.new_block(); let finally_block = self.new_block(); - let else_block = self.new_block(); + let cleanup_block = self.new_block(); let end_block = self.new_block(); let reraise_star_block = self.new_block(); let reraise_block = self.new_block(); @@ -3630,6 +3660,12 @@ impl Compiler { None }; let exit_block = self.new_block(); + let continuation_block = end_block; + let else_block = if orelse.is_empty() && finalbody.is_empty() { + continuation_block + } else { + self.new_block() + }; // Emit NOP at the try: line so LINE events fire for it emit!(self, Instruction::Nop); @@ -3667,14 +3703,23 @@ impl Compiler { self.switch_to_block(handler_block); // Stack: [exc] (from exception table) + emit!( + self, + PseudoInstruction::SetupCleanup { + delta: cleanup_block + } + ); + // PUSH_EXC_INFO emit!(self, Instruction::PushExcInfo); // Stack: [prev_exc, exc] // Push EXCEPTION_GROUP_HANDLER fblock - let eg_dummy1 = self.new_block(); - let eg_dummy2 = self.new_block(); - self.push_fblock(FBlockType::ExceptionGroupHandler, eg_dummy1, eg_dummy2)?; + self.push_fblock( + FBlockType::ExceptionGroupHandler, + cleanup_block, + cleanup_block, + )?; // Initialize handler stack before the loop // BUILD_LIST 0 + COPY 2 to set up [prev_exc, orig, list, rest] @@ -3695,17 +3740,24 @@ impl Compiler { delta: reraise_star_block } ); + self.set_no_location(); } for (i, handler) in handlers.iter().enumerate() { let ast::ExceptHandler::ExceptHandler(ast::ExceptHandlerExceptHandler { type_, name, body, + range: handler_range, .. }) = handler; + let is_last_handler = i == n - 1; let no_match_block = self.new_block(); - let next_block = self.new_block(); + let next_handler_block = if is_last_handler { + reraise_star_block + } else { + self.new_block() + }; // Compile exception type if let Some(exc_type) = type_ { @@ -3762,7 +3814,7 @@ impl Compiler { ); self.push_fblock_full( FBlockType::HandlerCleanup, - next_block, + next_handler_block, end_block, if let Some(alias) = name { FBlockDatum::ExceptionName(alias.as_str().to_owned()) @@ -3775,6 +3827,7 @@ impl Compiler { self.compile_statements(body)?; // Handler body completed normally + self.set_no_location(); emit!(self, PseudoInstruction::PopBlock); self.pop_fblock(FBlockType::HandlerCleanup); @@ -3785,8 +3838,15 @@ impl Compiler { self.compile_name(alias.as_str(), NameUsage::Delete)?; } - // Jump to next handler - emit!(self, PseudoInstruction::Jump { delta: next_block }); + if is_last_handler { + emit!(self, Instruction::ListAppend { i: 1 }); + } + emit!( + self, + PseudoInstruction::Jump { + delta: next_handler_block + } + ); // Handler raised an exception (cleanup_end label) self.switch_to_block(handler_except_block); @@ -3794,6 +3854,7 @@ impl Compiler { // (lasti is pushed because push_lasti=true in HANDLER_CLEANUP fblock) // Cleanup name binding + self.set_no_location(); if let Some(alias) = name { self.emit_load_const(ConstantData::None); self.store_name(alias.as_str())?; @@ -3812,36 +3873,43 @@ impl Compiler { emit!(self, Instruction::PopTop); // Stack: [prev_exc, orig, list, new_rest] - // JUMP except_with_error - // We directly JUMP to next_block since no_match_block falls through to it - emit!(self, PseudoInstruction::Jump { delta: next_block }); - - // No match - pop match (None) - self.switch_to_block(no_match_block); - emit!(self, Instruction::PopTop); // pop match (None) - // Stack: [prev_exc, orig, list, new_rest] - // Falls through to next_block + if is_last_handler { + emit!(self, Instruction::ListAppend { i: 1 }); + emit!( + self, + PseudoInstruction::Jump { + delta: reraise_star_block + } + ); + } else { + emit!( + self, + PseudoInstruction::Jump { + delta: next_handler_block + } + ); + } - // except_with_error label - // All paths merge here at next_block - self.switch_to_block(next_block); - // Stack: [prev_exc, orig, list, rest] + if is_last_handler { + self.switch_to_block(no_match_block); + self.set_source_range(*handler_range); + emit!(self, Instruction::PopTop); // pop match (None) + // Stack: [prev_exc, orig, list, new_rest] - // After last handler, append rest to list - if i == n - 1 { - // Stack: [prev_exc, orig, list, rest] - // ADDOP_I(c, NO_LOCATION, LIST_APPEND, 1); - // PEEK(1) = stack[len-1] after pop - // RustPython nth_value(i) = stack[len-i-1] after pop - // For LIST_APPEND 1: stack[len-1] = stack[len-i-1] -> i = 0 + self.set_no_location(); emit!(self, Instruction::ListAppend { i: 1 }); - // Stack: [prev_exc, orig, list] emit!( self, PseudoInstruction::Jump { delta: reraise_star_block } ); + } else { + self.switch_to_block(no_match_block); + self.set_source_range(*handler_range); + emit!(self, Instruction::PopTop); // pop match (None) + // Stack: [prev_exc, orig, list, new_rest] + self.switch_to_block(next_handler_block); } } @@ -3851,6 +3919,7 @@ impl Compiler { // Reraise star block self.switch_to_block(reraise_star_block); // Stack: [prev_exc, orig, list] + self.set_no_location(); // CALL_INTRINSIC_2 PREP_RERAISE_STAR // Takes 2 args (orig, list) and produces result @@ -3880,7 +3949,7 @@ impl Compiler { emit!(self, Instruction::PopTop); // Stack: [prev_exc] - // POP_BLOCK - no-op for us with exception tables (fblocks handle this) + emit!(self, PseudoInstruction::PopBlock); // POP_EXCEPT - restore previous exception context emit!(self, Instruction::PopExcept); // Stack: [] @@ -3890,14 +3959,19 @@ impl Compiler { self.pop_fblock(FBlockType::FinallyTry); } - emit!(self, PseudoInstruction::Jump { delta: end_block }); + emit!( + self, + PseudoInstruction::Jump { + delta: continuation_block + } + ); // Reraise the result self.switch_to_block(reraise_block); // Stack: [prev_exc, result] + self.set_no_location(); - // POP_BLOCK - no-op for us - // SWAP 2 + emit!(self, PseudoInstruction::PopBlock); emit!(self, Instruction::Swap { i: 2 }); // Stack: [result, prev_exc] @@ -3908,6 +3982,12 @@ impl Compiler { // RERAISE 0 emit!(self, Instruction::Reraise { depth: 0 }); + self.switch_to_block(cleanup_block); + self.set_no_location(); + emit!(self, Instruction::Copy { i: 3 }); + emit!(self, Instruction::PopExcept); + emit!(self, Instruction::Reraise { depth: 1 }); + // try-else path // NOTE: When we reach here in compilation, the nothing-to-reraise path above // has already popped FinallyTry. But else_block is a different execution path @@ -3927,19 +4007,26 @@ impl Compiler { FBlockDatum::FinallyBody(finalbody.to_vec()), )?; } - self.switch_to_block(else_block); - self.compile_statements(orelse)?; + if else_block != continuation_block { + self.switch_to_block(else_block); + self.compile_statements(orelse)?; - if !finalbody.is_empty() { - // Pop the FinallyTry fblock we just pushed for the else path - emit!(self, PseudoInstruction::PopBlock); - self.pop_fblock(FBlockType::FinallyTry); - } + if !finalbody.is_empty() { + // Pop the FinallyTry fblock we just pushed for the else path + emit!(self, PseudoInstruction::PopBlock); + self.pop_fblock(FBlockType::FinallyTry); + } - emit!(self, PseudoInstruction::Jump { delta: end_block }); + emit!( + self, + PseudoInstruction::Jump { + delta: continuation_block + } + ); + } - self.switch_to_block(end_block); if !finalbody.is_empty() { + self.switch_to_block(end_block); // Snapshot sub_tables before first finally compilation let sub_table_cursor = self.symbol_table_stack.last().map(|t| t.next_sub_table); @@ -3970,8 +4057,6 @@ impl Compiler { self.pop_fblock(FBlockType::FinallyEnd); } - emit!(self, Instruction::Copy { i: 2 }); - emit!(self, Instruction::PopExcept); emit!(self, Instruction::Reraise { depth: 0 }); if let Some(cleanup) = finally_cleanup_block { @@ -3982,7 +4067,11 @@ impl Compiler { } } - self.switch_to_block(exit_block); + self.switch_to_block(if finalbody.is_empty() { + end_block + } else { + exit_block + }); Ok(()) } @@ -5683,6 +5772,7 @@ impl Compiler { } // Pop fblock before normal exit + self.set_source_range(with_range); emit!(self, PseudoInstruction::PopBlock); self.pop_fblock(if is_async { FBlockType::AsyncWith @@ -5742,15 +5832,14 @@ impl Compiler { } ); - emit!(self, PseudoInstruction::PopBlock); - self.pop_fblock(FBlockType::ExceptionHandler); - emit!(self, Instruction::Reraise { depth: 2 }); // ===== Suppress block ===== // Stack: [..., exit_func, self_exit, lasti, prev_exc, exc, True] self.switch_to_block(suppress_block); emit!(self, Instruction::PopTop); // pop True + emit!(self, PseudoInstruction::PopBlock); + self.pop_fblock(FBlockType::ExceptionHandler); emit!(self, Instruction::PopExcept); // pop exc, restore prev_exc emit!(self, Instruction::PopTop); // pop lasti emit!(self, Instruction::PopTop); // pop self_exit @@ -8455,30 +8544,42 @@ impl Compiler { // Regular call: func → PUSH_NULL → args → CALL if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = &func { // Check for super() method call optimization - if !uses_ex_call - && let Some(super_type) = self.can_optimize_super_call(value, attr.as_str()) - { + if let Some(super_type) = self.can_optimize_super_call(value, attr.as_str()) { // super().method() or super(cls, self).method() optimization - // Stack: [global_super, class, self] → LOAD_SUPER_METHOD → [method, self] + // CALL path: [global_super, class, self] → LOAD_SUPER_METHOD → [method, self] + // CALL_FUNCTION_EX path: [global_super, class, self] → LOAD_SUPER_ATTR → [attr] // Set source range to the super() call for LOAD_GLOBAL/LOAD_DEREF/etc. let super_range = value.range(); self.set_source_range(super_range); self.load_args_for_super(&super_type)?; self.set_source_range(super_range); let idx = self.name(attr.as_str()); - match super_type { - SuperCallType::TwoArg { .. } => { - self.emit_load_super_method(idx); + if uses_ex_call { + match super_type { + SuperCallType::TwoArg { .. } => { + self.emit_load_super_attr(idx); + } + SuperCallType::ZeroArg => { + self.emit_load_zero_super_attr(idx); + } } - SuperCallType::ZeroArg => { - self.emit_load_zero_super_method(idx); + emit!(self, Instruction::PushNull); + self.codegen_call_helper(0, args, call_range)?; + } else { + match super_type { + SuperCallType::TwoArg { .. } => { + self.emit_load_super_method(idx); + } + SuperCallType::ZeroArg => { + self.emit_load_zero_super_method(idx); + } } + // NOP for line tracking at .method( line + self.set_source_range(attr.range()); + emit!(self, Instruction::Nop); + // CALL at .method( line (not the full expression line) + self.codegen_call_helper(0, args, attr.range())?; } - // NOP for line tracking at .method( line - self.set_source_range(attr.range()); - emit!(self, Instruction::Nop); - // CALL at .method( line (not the full expression line) - self.codegen_call_helper(0, args, attr.range())?; } else { self.compile_expression(value)?; let idx = self.name(attr.as_str()); diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index bc709147479..ba51d43775c 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -233,6 +233,8 @@ impl CodeInfo { duplicate_end_returns(&mut self.blocks); self.dce(); // truncate after terminal in blocks that got return duplicated self.eliminate_unreachable_blocks(); // remove now-unreachable last block + remove_redundant_nops_and_jumps(&mut self.blocks); + self.add_checks_for_loads_of_uninitialized_variables(); // optimize_load_fast: after normalize_jumps self.optimize_load_fast_borrow(); self.optimize_load_global_push_null(); @@ -1346,6 +1348,11 @@ impl CodeInfo { match (curr_instr, next_instr) { // LoadFast + LoadFast -> LoadFastLoadFast (if both indices < 16) (Instruction::LoadFast { .. }, Instruction::LoadFast { .. }) => { + let line1 = curr.location.line.get() as i32; + let line2 = next.location.line.get() as i32; + if line1 > 0 && line2 > 0 && line1 != line2 { + None + } else { let idx1 = u32::from(curr.arg); let idx2 = u32::from(next.arg); if idx1 < 16 && idx2 < 16 { @@ -1359,9 +1366,15 @@ impl CodeInfo { } else { None } + } } // StoreFast + StoreFast -> StoreFastStoreFast (if both indices < 16) (Instruction::StoreFast { .. }, Instruction::StoreFast { .. }) => { + let line1 = curr.location.line.get() as i32; + let line2 = next.location.line.get() as i32; + if line1 > 0 && line2 > 0 && line1 != line2 { + None + } else { let idx1 = u32::from(curr.arg); let idx2 = u32::from(next.arg); if idx1 < 16 && idx2 < 16 { @@ -1375,6 +1388,7 @@ impl CodeInfo { } else { None } + } } // Note: StoreFast + LoadFast → StoreFastLoadFast is done in a // separate pass AFTER optimize_load_fast_borrow, because CPython @@ -1699,6 +1713,183 @@ impl CodeInfo { } } + fn add_checks_for_loads_of_uninitialized_variables(&mut self) { + let nlocals = self.metadata.varnames.len(); + if nlocals == 0 { + return; + } + + let mut nparams = self.metadata.argcount as usize + self.metadata.kwonlyargcount as usize; + if self.flags.contains(CodeFlags::VARARGS) { + nparams += 1; + } + if self.flags.contains(CodeFlags::VARKEYWORDS) { + nparams += 1; + } + nparams = nparams.min(nlocals); + + let mut in_masks: Vec>> = vec![None; self.blocks.len()]; + let mut start_mask = vec![false; nlocals]; + for slot in start_mask.iter_mut().skip(nparams) { + *slot = true; + } + in_masks[0] = Some(start_mask); + + let mut worklist = vec![BlockIdx(0)]; + while let Some(block_idx) = worklist.pop() { + let idx = block_idx.idx(); + let Some(mut unsafe_mask) = in_masks[idx].clone() else { + continue; + }; + + let old_instructions = self.blocks[idx].instructions.clone(); + let mut new_instructions = Vec::with_capacity(old_instructions.len()); + let mut changed = false; + + for info in old_instructions { + let mut info = info; + if let Some(eh) = info.except_handler { + let target = next_nonempty_block(&self.blocks, eh.handler_block); + if target != BlockIdx::NULL + && merge_unsafe_mask(&mut in_masks[target.idx()], &unsafe_mask) + { + worklist.push(target); + } + } + match info.instr.real() { + Some(Instruction::DeleteFast { var_num }) => { + let var_idx = usize::from(var_num.get(info.arg)); + if var_idx < nlocals { + unsafe_mask[var_idx] = true; + } + new_instructions.push(info); + } + Some(Instruction::LoadFastAndClear { var_num }) => { + let var_idx = usize::from(var_num.get(info.arg)); + if var_idx < nlocals { + unsafe_mask[var_idx] = true; + } + new_instructions.push(info); + } + Some(Instruction::StoreFast { var_num }) => { + let var_idx = usize::from(var_num.get(info.arg)); + if var_idx < nlocals { + unsafe_mask[var_idx] = false; + } + new_instructions.push(info); + } + Some(Instruction::StoreFastStoreFast { var_nums }) => { + let packed = var_nums.get(info.arg); + let (idx1, idx2) = packed.indexes(); + let idx1 = usize::from(idx1); + let idx2 = usize::from(idx2); + if idx1 < nlocals { + unsafe_mask[idx1] = false; + } + if idx2 < nlocals { + unsafe_mask[idx2] = false; + } + new_instructions.push(info); + } + Some(Instruction::LoadFastCheck { var_num }) => { + let var_idx = usize::from(var_num.get(info.arg)); + if var_idx < nlocals { + unsafe_mask[var_idx] = false; + } + new_instructions.push(info); + } + Some(Instruction::LoadFast { var_num }) => { + let var_idx = usize::from(var_num.get(info.arg)); + if var_idx < nlocals && unsafe_mask[var_idx] { + info.instr = Instruction::LoadFastCheck { + var_num: Arg::marker(), + } + .into(); + changed = true; + } + if var_idx < nlocals { + unsafe_mask[var_idx] = false; + } + new_instructions.push(info); + } + Some(Instruction::LoadFastLoadFast { var_nums }) => { + let packed = var_nums.get(info.arg); + let (idx1, idx2) = packed.indexes(); + let idx1 = usize::from(idx1); + let idx2 = usize::from(idx2); + let needs_check_1 = idx1 < nlocals && unsafe_mask[idx1]; + let needs_check_2 = idx2 < nlocals && unsafe_mask[idx2]; + if needs_check_1 || needs_check_2 { + let mut first = info; + first.instr = if needs_check_1 { + Instruction::LoadFastCheck { + var_num: Arg::marker(), + } + } else { + Instruction::LoadFast { + var_num: Arg::marker(), + } + } + .into(); + first.arg = OpArg::new(idx1 as u32); + + let mut second = info; + second.instr = if needs_check_2 { + Instruction::LoadFastCheck { + var_num: Arg::marker(), + } + } else { + Instruction::LoadFast { + var_num: Arg::marker(), + } + } + .into(); + second.arg = OpArg::new(idx2 as u32); + + new_instructions.push(first); + new_instructions.push(second); + changed = true; + } else { + new_instructions.push(info); + } + if idx1 < nlocals { + unsafe_mask[idx1] = false; + } + if idx2 < nlocals { + unsafe_mask[idx2] = false; + } + } + _ => new_instructions.push(info), + } + } + + if changed { + self.blocks[idx].instructions = new_instructions; + } + + let block = &self.blocks[idx]; + if block_has_fallthrough(block) { + let next = next_nonempty_block(&self.blocks, block.next); + if next != BlockIdx::NULL + && merge_unsafe_mask(&mut in_masks[next.idx()], &unsafe_mask) + { + worklist.push(next); + } + } + + if let Some(last) = block.instructions.last() + && is_jump_instruction(last) + { + let target = next_nonempty_block(&self.blocks, last.target); + if target != BlockIdx::NULL + && merge_unsafe_mask(&mut in_masks[target.idx()], &unsafe_mask) + { + worklist.push(target); + } + } + } + } + fn max_stackdepth(&mut self) -> crate::InternalResult { let mut maxdepth = 0u32; let mut stack = Vec::with_capacity(self.blocks.len()); @@ -2164,16 +2355,6 @@ fn push_cold_blocks_to_end(blocks: &mut Vec) { for (cold_idx, warm_next) in fixups { let jump_block_idx = BlockIdx(blocks.len() as u32); - let loc = blocks[cold_idx.idx()] - .instructions - .last() - .map(|i| i.location) - .unwrap_or_default(); - let end_loc = blocks[cold_idx.idx()] - .instructions - .last() - .map(|i| i.end_location) - .unwrap_or_default(); let mut jump_block = Block { cold: true, ..Block::default() @@ -2185,10 +2366,10 @@ fn push_cold_blocks_to_end(blocks: &mut Vec) { .into(), arg: OpArg::new(0), target: warm_next, - location: loc, - end_location: end_loc, + location: SourceLocation::default(), + end_location: SourceLocation::default(), except_handler: None, - lineno_override: None, + lineno_override: Some(-1), cache_entries: 0, }); jump_block.next = blocks[cold_idx.idx()].next; @@ -2623,6 +2804,126 @@ fn inline_small_or_no_lineno_blocks(blocks: &mut [Block]) { } } +fn remove_redundant_nops_in_blocks(blocks: &mut [Block]) -> usize { + let mut changes = 0; + let mut block_order = Vec::new(); + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + block_order.push(current); + current = blocks[current.idx()].next; + } + + for block_idx in block_order { + let bi = block_idx.idx(); + let mut src_instructions = core::mem::take(&mut blocks[bi].instructions); + let mut kept = Vec::with_capacity(src_instructions.len()); + let mut prev_lineno = -1i32; + + for src in 0..src_instructions.len() { + let instr = src_instructions[src]; + let lineno = instruction_lineno(&instr); + let mut remove = false; + + if matches!(instr.instr.real(), Some(Instruction::Nop)) { + if lineno < 0 || prev_lineno == lineno { + remove = true; + } else if src < src_instructions.len() - 1 { + let next_lineno = instruction_lineno(&src_instructions[src + 1]); + if next_lineno == lineno { + remove = true; + } else if next_lineno < 0 { + src_instructions[src + 1].lineno_override = Some(lineno); + remove = true; + } + } else { + let next = next_nonempty_block(blocks, blocks[bi].next); + if next != BlockIdx::NULL { + let mut next_lineno = None; + for next_instr in &blocks[next.idx()].instructions { + let line = instruction_lineno(next_instr); + if matches!(next_instr.instr.real(), Some(Instruction::Nop)) && line < 0 + { + continue; + } + next_lineno = Some(line); + break; + } + if next_lineno.is_some_and(|line| line == lineno) { + remove = true; + } + } + } + } + + if remove { + changes += 1; + } else { + kept.push(instr); + prev_lineno = lineno; + } + } + + blocks[bi].instructions = kept; + } + + changes +} + +fn remove_redundant_jumps_in_blocks(blocks: &mut [Block]) -> usize { + let mut changes = 0; + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + let idx = current.idx(); + let next = next_nonempty_block(blocks, blocks[idx].next); + let jump_target = blocks[idx] + .instructions + .last() + .filter(|ins| ins.instr.is_unconditional_jump() && ins.target != BlockIdx::NULL) + .map(|ins| ins.target); + if next != BlockIdx::NULL + && let Some(target) = jump_target + && next_nonempty_block(blocks, target) == next + && let Some(last_instr) = blocks[idx].instructions.last_mut() + { + last_instr.instr = Instruction::Nop.into(); + last_instr.arg = OpArg::new(0); + last_instr.target = BlockIdx::NULL; + changes += 1; + } + current = blocks[idx].next; + } + changes +} + +fn remove_redundant_nops_and_jumps(blocks: &mut [Block]) { + loop { + let removed_nops = remove_redundant_nops_in_blocks(blocks); + let removed_jumps = remove_redundant_jumps_in_blocks(blocks); + if removed_nops + removed_jumps == 0 { + break; + } + } +} + +fn merge_unsafe_mask(slot: &mut Option>, incoming: &[bool]) -> bool { + match slot { + Some(existing) => { + let mut changed = false; + for (dst, src) in existing.iter_mut().zip(incoming.iter().copied()) { + if src && !*dst { + *dst = true; + changed = true; + } + } + changed + } + None => { + *slot = Some(incoming.to_vec()); + true + } + } +} + /// Follow chain of empty blocks to find first non-empty block. fn next_nonempty_block(blocks: &[Block], mut idx: BlockIdx) -> BlockIdx { while idx != BlockIdx::NULL diff --git a/scripts/dis_dump.py b/scripts/dis_dump.py old mode 100644 new mode 100755 From 2186be66adcf0bf2a1c090382752e641189fb3b5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 5 Apr 2026 14:05:02 +0900 Subject: [PATCH 3/6] Fix exception state model and finally handler cleanup - ExceptionStack: init with base slot instead of empty vec - with_frame_impl: save/restore exc_info value instead of push/pop slot, so callees see caller's handled exception - Remove unused with_frame_exc - resume_gen_frame: use push_exception/pop_exception methods - codegen: move RERAISE inside cleanup handler's exception table range in finally blocks (both try-finally and try-except-finally), so POP_EXCEPT runs before re-raising --- crates/codegen/src/compile.rs | 48 ++++++++++++------------ crates/vm/src/frame.rs | 1 - crates/vm/src/vm/mod.rs | 69 +++++++++++++++++++---------------- 3 files changed, 62 insertions(+), 56 deletions(-) diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 2869af044d9..e579aca5997 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -3166,16 +3166,18 @@ impl Compiler { } self.compile_statements(finalbody)?; - // Pop FinallyEnd fblock BEFORE emitting RERAISE - // This ensures RERAISE routes to outer exception handler, not cleanup block - // Cleanup block is only for new exceptions raised during finally body execution + // RERAISE must be inside the cleanup handler's exception table + // range. When RERAISE re-raises the exception, the cleanup + // handler (COPY 3, POP_EXCEPT, RERAISE 1) runs POP_EXCEPT to + // restore exc_info before the exception reaches the outer handler. + emit!(self, Instruction::Reraise { depth: 0 }); + + // PopBlock after RERAISE (dead code, but marks the exception + // table range end so the cleanup covers RERAISE). if finally_cleanup_block.is_some() { emit!(self, PseudoInstruction::PopBlock); self.pop_fblock(FBlockType::FinallyEnd); } - - // CPython re-raises first and lets the cleanup block restore prev_exc. - emit!(self, Instruction::Reraise { depth: 0 }); } if let Some(cleanup) = finally_cleanup_block { @@ -3432,16 +3434,18 @@ impl Compiler { // Run finally body self.compile_statements(finalbody)?; - // Pop FinallyEnd fblock BEFORE emitting RERAISE - // This ensures RERAISE routes to outer exception handler, not cleanup block - // Cleanup block is only for new exceptions raised during finally body execution + // RERAISE must be inside the cleanup handler's exception table + // range. The cleanup handler (COPY 3, POP_EXCEPT, RERAISE 1) + // runs POP_EXCEPT to restore exc_info before re-raising to + // the outer handler. + emit!(self, Instruction::Reraise { depth: 0 }); + + // PopBlock after RERAISE (dead code, but marks the exception + // table range end so the cleanup covers RERAISE). if finally_cleanup_block.is_some() { emit!(self, PseudoInstruction::PopBlock); self.pop_fblock(FBlockType::FinallyEnd); } - - // CPython re-raises first and lets the cleanup block restore prev_exc. - emit!(self, Instruction::Reraise { depth: 0 }); } // finally cleanup block @@ -3473,11 +3477,7 @@ impl Compiler { let handler_block = self.new_block(); let cleanup_block = self.new_block(); let end_block = self.new_block(); - let orelse_block = if orelse.is_empty() { - end_block - } else { - self.new_block() - }; + let orelse_block = self.new_block(); emit!(self, Instruction::Nop); emit!( @@ -3624,16 +3624,16 @@ impl Compiler { emit!(self, Instruction::Reraise { depth: 1 }); self.set_no_location(); + self.switch_to_block(orelse_block); + self.set_no_location(); if !orelse.is_empty() { - self.switch_to_block(orelse_block); - self.set_no_location(); self.compile_statements(orelse)?; - emit!( - self, - PseudoInstruction::JumpNoInterrupt { delta: end_block } - ); - self.set_no_location(); } + emit!( + self, + PseudoInstruction::JumpNoInterrupt { delta: end_block } + ); + self.set_no_location(); self.switch_to_block(end_block); Ok(()) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 7cb33ec95d9..aeba09445d7 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -6769,7 +6769,6 @@ impl ExecutingFrame<'_> { } bytecode::RaiseKind::BareRaise => { // RAISE_VARARGS 0: bare `raise` gets exception from VM state - // This is the current exception set by PUSH_EXC_INFO vm.topmost_exception() .ok_or_else(|| vm.new_runtime_error("No active exception to reraise"))? } diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 231ba5cde38..f946d26e248 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -108,7 +108,7 @@ pub struct VirtualMachine { } /// Non-owning frame pointer for the frames stack. -/// The pointed-to frame is kept alive by the caller of with_frame_exc/resume_gen_frame. +/// The pointed-to frame is kept alive by the caller of with_frame/resume_gen_frame. #[derive(Copy, Clone)] pub struct FramePtr(NonNull>); @@ -124,11 +124,23 @@ impl FramePtr { // FrameRef is alive on the call stack. The Vec is always empty when the VM moves between threads. unsafe impl Send for FramePtr {} -#[derive(Debug, Default)] +#[derive(Debug)] struct ExceptionStack { + /// Linked list of handled-exception slots (`_PyErr_StackItem` chain). + /// Bottom element is the thread's base slot; generator/coroutine resume + /// pushes an additional slot. Normal frame calls do **not** push/pop. stack: Vec>, } +impl Default for ExceptionStack { + fn default() -> Self { + // Thread's base `_PyErr_StackItem` – always present. + Self { + stack: vec![None], + } + } +} + /// Stop-the-world state for fork safety. Before `fork()`, the requester /// stops all other Python threads so they are not holding internal locks. #[cfg(all(unix, feature = "threading"))] @@ -1554,17 +1566,7 @@ impl VirtualMachine { frame: FrameRef, f: F, ) -> PyResult { - self.with_frame_impl(frame, None, true, f) - } - - /// Like `with_frame` but allows specifying the initial exception state. - pub fn with_frame_exc PyResult>( - &self, - frame: FrameRef, - exc: Option, - f: F, - ) -> PyResult { - self.with_frame_impl(frame, exc, true, f) + self.with_frame_impl(frame, true, f) } pub(crate) fn with_frame_untraced PyResult>( @@ -1572,13 +1574,12 @@ impl VirtualMachine { frame: FrameRef, f: F, ) -> PyResult { - self.with_frame_impl(frame, None, false, f) + self.with_frame_impl(frame, false, f) } fn with_frame_impl PyResult>( &self, frame: FrameRef, - exc: Option, traced: bool, f: F, ) -> PyResult { @@ -1597,19 +1598,22 @@ impl VirtualMachine { old_frame as *mut Frame, core::sync::atomic::Ordering::Relaxed, ); - // Push exception context for frame isolation. - // For normal calls: None (clean slate). - // For generators: the saved exception from last yield. - self.push_exception(exc); + // Normal frame calls share the caller's exc_info slot so that + // callees can see the caller's handled exception via sys.exc_info(). + // Save the current value to restore on exit — this prevents + // exc_info pollution from frames with unbalanced + // PUSH_EXC_INFO/POP_EXCEPT (e.g., exception escaping an except block + // whose cleanup entry is missing from the exception table). + let saved_exc = self.current_exception(); let old_owner = frame.owner.swap( crate::frame::FrameOwner::Thread as i8, core::sync::atomic::Ordering::AcqRel, ); - // Ensure cleanup on panic: restore owner, pop exception, frame chain, and frames Vec. + // Ensure cleanup on panic: restore owner, exc_info, frame chain, and frames Vec. scopeguard::defer! { frame.owner.store(old_owner, core::sync::atomic::Ordering::Release); - self.pop_exception(); + self.set_exception(saved_exc); crate::vm::thread::set_current_frame(old_frame); self.frames.borrow_mut().pop(); #[cfg(feature = "threading")] @@ -1624,9 +1628,9 @@ impl VirtualMachine { }) } - /// Lightweight frame execution for generator/coroutine resume. - /// Pushes to the thread frame stack and fires trace/profile events, - /// but skips the thread exception update for performance. + /// Frame execution for generator/coroutine resume. + /// Pushes a new exc_info slot (gi_exc_state) onto the chain, + /// linking the generator's saved handled-exception. pub fn resume_gen_frame) -> PyResult>( &self, frame: &FrameRef, @@ -1649,20 +1653,20 @@ impl VirtualMachine { old_frame as *mut Frame, core::sync::atomic::Ordering::Relaxed, ); - // Inline exception push without thread exception update - self.exceptions.borrow_mut().stack.push(exc); + // Push generator's exc_info slot onto the chain + // (gi_exc_state.previous_item = tstate->exc_info; + // tstate->exc_info = &gi_exc_state;) + self.push_exception(exc); let old_owner = frame.owner.swap( crate::frame::FrameOwner::Thread as i8, core::sync::atomic::Ordering::AcqRel, ); - // Ensure cleanup on panic: restore owner, pop exception, frame chain, frames Vec, - // and recursion depth. + // Ensure cleanup on panic: restore owner, pop exc_info slot, frame chain, + // frames Vec, and recursion depth. scopeguard::defer! { frame.owner.store(old_owner, core::sync::atomic::Ordering::Release); - self.exceptions.borrow_mut().stack - .pop() - .expect("pop_exception() without nested exc stack"); + self.pop_exception(); crate::vm::thread::set_current_frame(old_frame); self.frames.borrow_mut().pop(); #[cfg(feature = "threading")] @@ -2037,12 +2041,14 @@ impl VirtualMachine { } } + /// Push a new exc_info slot (for generator/coroutine resume). pub(crate) fn push_exception(&self, exc: Option) { self.exceptions.borrow_mut().stack.push(exc); #[cfg(feature = "threading")] thread::update_thread_exception(self.topmost_exception()); } + /// Pop the topmost exc_info slot (generator/coroutine yield/return). pub(crate) fn pop_exception(&self) -> Option { let exc = self .exceptions @@ -2059,6 +2065,7 @@ impl VirtualMachine { self.exceptions.borrow().stack.last().cloned().flatten() } + /// Set the current exc_info slot value (PUSH_EXC_INFO / POP_EXCEPT). pub(crate) fn set_exception(&self, exc: Option) { // don't be holding the RefCell guard while __del__ is called let mut excs = self.exceptions.borrow_mut(); From 8b264e59324694d44c0d7517d32ebdd24930f8fd Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 5 Apr 2026 16:37:21 +0900 Subject: [PATCH 4/6] Fix CI: dead store elimination, formatting, snapshot - Add eliminate_dead_stores pass before peephole_optimize: consecutive STORE_FAST to the same variable are replaced with POP_TOP (apply_static_swaps from CPython flowgraph.c) - Remove 3 expectedFailure decorators in test_peepholer (test_load_fast_unknown_* now pass) - Accept updated async_with snapshot - Fix formatting in ir.rs, mod.rs, Python scripts --- Lib/test/test_peepholer.py | 3 - crates/codegen/src/compile.rs | 12 ++ crates/codegen/src/ir.rs | 117 ++++++++++++++---- ...pile__tests__nested_double_async_with.snap | 58 ++++----- crates/vm/src/vm/mod.rs | 4 +- scripts/compare_bytecode.py | 31 ++++- scripts/dis_dump.py | 7 +- 7 files changed, 163 insertions(+), 69 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 08830c25ae7..0da0630e8ad 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -873,7 +873,6 @@ def f(): self.assertInBytecode(f, 'LOAD_FAST_CHECK') self.assertNotInBytecode(f, 'LOAD_FAST') - @unittest.expectedFailure # TODO: RUSTPYTHON; RETURN_VALUE def test_load_fast_unknown_because_del(self): def f(): x = 1 @@ -928,7 +927,6 @@ def f(): self.assertInBytecode(f, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f, 'LOAD_FAST_CHECK') - @unittest.expectedFailure # TODO: RUSTPYTHON; L5 to L6 -> L6 [1] lasti def test_load_fast_unknown_after_error(self): def f(): try: @@ -940,7 +938,6 @@ def f(): # Assert that it doesn't occur in the LOAD_FAST_CHECK branch. self.assertInBytecode(f, 'LOAD_FAST_CHECK') - @unittest.expectedFailure # TODO: RUSTPYTHON; L5 to L6 -> L6 [1] lasti def test_load_fast_unknown_after_error_2(self): def f(): try: diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index e579aca5997..2fddf040c52 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -2624,7 +2624,19 @@ impl Compiler { for elt in value_elts { self.compile_expression(elt)?; } + // Stores happen in reverse (TOS = last value → first store). + // When the same name appears multiple times in the target + // tuple, only the first store (getting the rightmost value) + // matters; later stores to the same name are dead. + // Replace them with POP_TOP (apply_static_swaps). + let mut seen = std::collections::HashSet::new(); for target in target_elts.iter().rev() { + if let ast::Expr::Name(ast::ExprName { id, .. }) = target + && !seen.insert(id.as_str()) + { + emit!(self, Instruction::PopTop); + continue; + } self.compile_store(target)?; } return Ok(()); diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index ba51d43775c..659736ca5bb 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -209,6 +209,9 @@ impl CodeInfo { // DCE always runs (removes dead code after terminal instructions) self.dce(); + // Dead store elimination for duplicate STORE_FAST targets + // (apply_static_swaps in CPython's flowgraph.c) + self.eliminate_dead_stores(); // Peephole optimizer creates superinstructions matching CPython self.peephole_optimize(); @@ -1328,6 +1331,65 @@ impl CodeInfo { } } + /// Eliminate dead stores in STORE_FAST sequences (apply_static_swaps). + /// + /// In sequences of consecutive STORE_FAST instructions (from tuple unpacking), + /// if the same variable is stored to more than once, only the first store + /// (which gets TOS — the rightmost value) matters. Later stores to the + /// same variable are dead and replaced with POP_TOP. + /// Simplified apply_static_swaps (CPython flowgraph.c): + /// In STORE_FAST sequences that follow UNPACK_SEQUENCE / UNPACK_EX, + /// replace duplicate stores to the same variable with POP_TOP. + /// UNPACK pushes values so stores execute left-to-right; the LAST + /// store to a variable carries the final value, earlier ones are dead. + fn eliminate_dead_stores(&mut self) { + for block in &mut self.blocks { + let instrs = &mut block.instructions; + let len = instrs.len(); + let mut i = 0; + while i < len { + // Look for UNPACK_SEQUENCE or UNPACK_EX + let is_unpack = matches!( + instrs[i].instr, + AnyInstruction::Real( + Instruction::UnpackSequence { .. } | Instruction::UnpackEx { .. } + ) + ); + if !is_unpack { + i += 1; + continue; + } + // Scan the run of STORE_FAST right after the unpack + let run_start = i + 1; + let mut run_end = run_start; + while run_end < len + && matches!( + instrs[run_end].instr, + AnyInstruction::Real(Instruction::StoreFast { .. }) + ) + { + run_end += 1; + } + if run_end - run_start >= 2 { + // Pass 1: find the LAST occurrence of each variable + let mut last_occurrence = std::collections::HashMap::new(); + for (j, instr) in instrs[run_start..run_end].iter().enumerate() { + last_occurrence.insert(u32::from(instr.arg), j); + } + // Pass 2: non-last stores to the same variable are dead + for (j, instr) in instrs[run_start..run_end].iter_mut().enumerate() { + let idx = u32::from(instr.arg); + if last_occurrence[&idx] != j { + instr.instr = AnyInstruction::Real(Instruction::PopTop); + instr.arg = OpArg::new(0); + } + } + } + i = run_end.max(i + 1); + } + } + } + /// Peephole optimization: combine consecutive instructions into super-instructions fn peephole_optimize(&mut self) { for block in &mut self.blocks { @@ -1353,41 +1415,44 @@ impl CodeInfo { if line1 > 0 && line2 > 0 && line1 != line2 { None } else { - let idx1 = u32::from(curr.arg); - let idx2 = u32::from(next.arg); - if idx1 < 16 && idx2 < 16 { - let packed = (idx1 << 4) | idx2; - Some(( - Instruction::LoadFastLoadFast { - var_nums: Arg::marker(), - }, - OpArg::new(packed), - )) - } else { - None - } + let idx1 = u32::from(curr.arg); + let idx2 = u32::from(next.arg); + if idx1 < 16 && idx2 < 16 { + let packed = (idx1 << 4) | idx2; + Some(( + Instruction::LoadFastLoadFast { + var_nums: Arg::marker(), + }, + OpArg::new(packed), + )) + } else { + None + } } } // StoreFast + StoreFast -> StoreFastStoreFast (if both indices < 16) + // Dead store elimination: if both store to the same variable, + // the first store is dead. Replace it with POP_TOP (like + // apply_static_swaps in CPython's flowgraph.c). (Instruction::StoreFast { .. }, Instruction::StoreFast { .. }) => { let line1 = curr.location.line.get() as i32; let line2 = next.location.line.get() as i32; if line1 > 0 && line2 > 0 && line1 != line2 { None } else { - let idx1 = u32::from(curr.arg); - let idx2 = u32::from(next.arg); - if idx1 < 16 && idx2 < 16 { - let packed = (idx1 << 4) | idx2; - Some(( - Instruction::StoreFastStoreFast { - var_nums: Arg::marker(), - }, - OpArg::new(packed), - )) - } else { - None - } + let idx1 = u32::from(curr.arg); + let idx2 = u32::from(next.arg); + if idx1 < 16 && idx2 < 16 { + let packed = (idx1 << 4) | idx2; + Some(( + Instruction::StoreFastStoreFast { + var_nums: Arg::marker(), + }, + OpArg::new(packed), + )) + } else { + None + } } } // Note: StoreFast + LoadFast → StoreFastLoadFast is done in a diff --git a/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap index ba8be3589d3..4573b7a943f 100644 --- a/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap +++ b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap @@ -1,5 +1,6 @@ --- source: crates/codegen/src/compile.rs +assertion_line: 10960 expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with egg():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")" --- 1 0 RESUME (0) @@ -199,8 +200,8 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter 183 CACHE 184 CACHE 185 CACHE - 186 CACHE - >> 187 CACHE + >> 186 CACHE + 187 CACHE 188 CACHE 189 CACHE 190 CACHE @@ -214,41 +215,40 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter 198 CACHE 199 CACHE 200 POP_TOP - 201 NOP - 3 202 LOAD_CONST (None) - 203 LOAD_CONST (None) - >> 204 LOAD_CONST (None) - 205 CALL (3) + 3 201 LOAD_CONST (None) + 202 LOAD_CONST (None) + >> 203 LOAD_CONST (None) + 204 CALL (3) + 205 CACHE 206 CACHE 207 CACHE - 208 CACHE - 209 POP_TOP - 210 JUMP_BACKWARD (187) - 211 CACHE - 212 PUSH_EXC_INFO - 213 WITH_EXCEPT_START - 214 TO_BOOL + 208 POP_TOP + 209 JUMP_BACKWARD (186) + 210 CACHE + 211 PUSH_EXC_INFO + 212 WITH_EXCEPT_START + 213 TO_BOOL + 214 CACHE 215 CACHE 216 CACHE - 217 CACHE - 218 POP_JUMP_IF_TRUE (2) - 219 CACHE - 220 NOT_TAKEN - 221 RERAISE (2) - 222 POP_TOP - 223 POP_EXCEPT + 217 POP_JUMP_IF_TRUE (2) + 218 CACHE + 219 NOT_TAKEN + 220 RERAISE (2) + 221 POP_TOP + 222 POP_EXCEPT + 223 POP_TOP 224 POP_TOP 225 POP_TOP - 226 POP_TOP - 227 JUMP_BACKWARD (204) - 228 CACHE - 229 COPY (3) - 230 POP_EXCEPT - 231 RERAISE (1) + 226 JUMP_BACKWARD (203) + 227 CACHE + 228 COPY (3) + 229 POP_EXCEPT + 230 RERAISE (1) - 2 232 CALL_INTRINSIC_1 (StopIterationError) - 233 RERAISE (1) + 2 231 CALL_INTRINSIC_1 (StopIterationError) + 232 RERAISE (1) 2 MAKE_FUNCTION 3 STORE_NAME (0, test) diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index f946d26e248..882ffe5f54e 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -135,9 +135,7 @@ struct ExceptionStack { impl Default for ExceptionStack { fn default() -> Self { // Thread's base `_PyErr_StackItem` – always present. - Self { - stack: vec![None], - } + Self { stack: vec![None] } } } diff --git a/scripts/compare_bytecode.py b/scripts/compare_bytecode.py index 532da1bbc33..f7b5ed916ed 100644 --- a/scripts/compare_bytecode.py +++ b/scripts/compare_bytecode.py @@ -25,6 +25,8 @@ PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) DIS_DUMP = os.path.join(SCRIPT_DIR, "dis_dump.py") DEFAULT_REPORT = os.path.join(PROJECT_ROOT, "compare_bytecode.report") + + def find_rustpython(): """Locate the RustPython binary, allowing release builds only.""" if "RUSTPYTHON" in os.environ: @@ -47,7 +49,9 @@ def collect_targets(lib_dir, pattern=None): """Collect Python files to compare, relative to lib_dir.""" targets = [] for root, dirs, files in os.walk(lib_dir): - dirs[:] = sorted(d for d in dirs if d != "__pycache__" and not d.startswith(".")) + dirs[:] = sorted( + d for d in dirs if d != "__pycache__" and not d.startswith(".") + ) for fname in sorted(files): if not fname.endswith(".py"): continue @@ -222,7 +226,9 @@ def compare_code_summary(cp_code, rp_code): rp_list = rp_by_name.get(name, []) for i in range(max(len(cp_list), len(rp_list))): if i < len(cp_list) and i < len(rp_list): - child_objects, child_insts = compare_code_summary(cp_list[i], rp_list[i]) + child_objects, child_insts = compare_code_summary( + cp_list[i], rp_list[i] + ) diff_code_objects += child_objects diff_instructions += child_insts else: @@ -308,7 +314,10 @@ def main(): sys.exit(1) if not os.path.isfile(DIS_DUMP): print("Error: disassembler helper not found: %s" % DIS_DUMP, file=sys.stderr) - print(" Expected scripts/dis_dump.py from origin/bytecode-parity", file=sys.stderr) + print( + " Expected scripts/dis_dump.py from origin/bytecode-parity", + file=sys.stderr, + ) sys.exit(1) targets = collect_targets(args.lib_dir, args.filter) @@ -467,10 +476,16 @@ def pct(n): else: list_limit = 0 if args.summary_json else max(args.list_limit, 0) if diff_summaries and list_limit: - p("Top differing files (%d shown of %d):" % (min(list_limit, len(diff_summaries)), len(diff_summaries))) + shown = min(list_limit, len(diff_summaries)) + total = len(diff_summaries) + p(f"Top differing files ({shown} shown of {total}):") top = sorted( diff_summaries, - key=lambda item: (item["diff_instructions"], item["diff_code_objects"], item["path"]), + key=lambda item: ( + item["diff_instructions"], + item["diff_code_objects"], + item["path"], + ), reverse=True, )[:list_limit] for item in top: @@ -504,7 +519,11 @@ def pct(n): else [item["path"] for item in diff_summaries], "top_diff_files": sorted( diff_summaries, - key=lambda item: (item["diff_instructions"], item["diff_code_objects"], item["path"]), + key=lambda item: ( + item["diff_instructions"], + item["diff_code_objects"], + item["path"], + ), reverse=True, )[: min(20, len(diff_summaries))], "rp_error_files": [fp for fp, _ in rp_error_files], diff --git a/scripts/dis_dump.py b/scripts/dis_dump.py index 1e9aeaf5f5c..38810ab0d0f 100755 --- a/scripts/dis_dump.py +++ b/scripts/dis_dump.py @@ -243,7 +243,8 @@ def _extract_instructions(code): if target_idx is None or argval_is_raw: target_idx = None # force recalculation if is_backward: - # Target = current_offset + INSTRUCTION_SIZE + cache_size - arg * INSTRUCTION_SIZE + # Target = current_offset + INSTR_SIZE + cache + # - arg * INSTR_SIZE # Try different cache sizes (NOT_TAKEN=1 for JUMP_BACKWARD, 0 for NO_INTERRUPT) if "NO_INTERRUPT" in inst.opname: cache_order = (0, 1, 2) @@ -335,7 +336,9 @@ def main(): default=None, help="Read newline-separated target paths from this file", ) - parser.add_argument("targets", nargs="*", help="Python files or directories to process") + parser.add_argument( + "targets", nargs="*", help="Python files or directories to process" + ) parser.add_argument( "--progress", type=int, From 4505676d2ae2c85cded9f73ef8dc2ffbb9d7198e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 8 Apr 2026 21:04:28 +0900 Subject: [PATCH 5/6] Fix RERAISE to only pop exception, preserve values below MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RERAISE was popping `depth` extra values in addition to the exception, causing stack underflow when unwinding through the exception table. CPython spec: `inst(RERAISE, (values[oparg], exc -- values[oparg]))` — only `exc` is consumed, `values[oparg]` stays on the stack so the outer handler's exception-table unwind pops them down to its configured depth. Without this fix, `async with` cleanup paths with nested exception handlers crashed with `pop stackref but null found` at POP_EXCEPT. --- crates/vm/src/frame.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index aeba09445d7..66b8a87f2e8 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3398,23 +3398,20 @@ impl ExecutingFrame<'_> { self.push_value(vm.ctx.new_bool(result).into()); Ok(None) } - Instruction::Reraise { depth } => { + Instruction::Reraise { depth: _ } => { // inst(RERAISE, (values[oparg], exc -- values[oparg])) // - // Stack layout: [values..., exc] where len(values) == oparg - // RERAISE pops exc and oparg additional values from the stack. - // values[0] is lasti used to set frame->instr_ptr for traceback. - // We skip the lasti update since RustPython's traceback is already correct. - let depth_val = depth.get(arg) as usize; - - // Pop exception from TOS + // RERAISE pops only `exc` from TOS. The `values` below it + // (lasti and optional prev_exc) stay on the stack — the + // outer exception handler's exception-table unwind will + // pop them down to its configured stack depth. + // + // `oparg` encodes how many values are preserved below exc + // (1 for simple reraise, 2 for with-block reraise where + // values[0]=lasti). Runtime-wise we don't need oparg since + // the exception table handles stack layout. let exc = self.pop_value(); - // Pop the depth values (lasti and possibly other items like prev_exc) - for _ in 0..depth_val { - self.pop_value(); - } - if let Some(exc_ref) = exc.downcast_ref::() { Err(exc_ref.to_owned()) } else { From 3b71dc41794a751640d96e31ceed37ece7ae7fa9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 8 Apr 2026 21:23:47 +0900 Subject: [PATCH 6/6] Port apply_static_swaps and BUILD_TUPLE/UNPACK_SEQUENCE elimination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add optimize_build_tuple_unpack (flowgraph.c): BUILD_TUPLE n + UNPACK_SEQUENCE n → NOP for n=1, or NOP + SWAP n for n=2/3. - Add apply_static_swaps (flowgraph.c): eliminates SWAP by swapping target STORE_FAST/POP_TOP instructions in place. Checks line boundaries and detects store conflicts before reordering. - Remove direct_name_unpack fast path from compile.rs. The general UNPACK path now produces the same bytecode via the new flowgraph optimizations. - Drop expectedFailure on test_pack_unpack (now passes). --- Lib/test/test_peepholer.py | 3 +- crates/codegen/src/compile.rs | 50 ---------- crates/codegen/src/ir.rs | 174 ++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 52 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 0da0630e8ad..e20f712a31a 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -144,7 +144,6 @@ def f(): self.assertInBytecode(f, elem) self.check_lnotab(f) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_pack_unpack(self): for line, elem in ( ('a, = a,', 'LOAD_CONST',), @@ -158,7 +157,7 @@ def test_pack_unpack(self): self.assertNotInBytecode(code, 'UNPACK_SEQUENCE') self.check_lnotab(code) - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 1 != 2 + @unittest.expectedFailure # TODO: RUSTPYTHON; LOAD_CONST count mismatch in long-tuple branch def test_constant_folding_tuples_of_constants(self): for line, elem in ( ('a = 1,2,3', (1, 2, 3)), diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 2fddf040c52..f9cb4a1ddd1 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -2592,56 +2592,6 @@ impl Compiler { self.switch_to_block(dead); } ast::Stmt::Assign(ast::StmtAssign { targets, value, .. }) => { - let direct_name_unpack = (|| { - let [target] = &targets[..] else { - return None; - }; - let target_elts = match target { - ast::Expr::Tuple(ast::ExprTuple { elts, .. }) - | ast::Expr::List(ast::ExprList { elts, .. }) => elts, - _ => return None, - }; - let value_elts = match &**value { - ast::Expr::Tuple(ast::ExprTuple { elts, .. }) - | ast::Expr::List(ast::ExprList { elts, .. }) => elts, - _ => return None, - }; - if target_elts.len() != value_elts.len() - || target_elts - .iter() - .any(|elt| !matches!(elt, ast::Expr::Name(_))) - || target_elts - .iter() - .chain(value_elts.iter()) - .any(|elt| matches!(elt, ast::Expr::Starred(_))) - { - return None; - } - Some((target_elts, value_elts)) - })(); - - if let Some((target_elts, value_elts)) = direct_name_unpack { - for elt in value_elts { - self.compile_expression(elt)?; - } - // Stores happen in reverse (TOS = last value → first store). - // When the same name appears multiple times in the target - // tuple, only the first store (getting the rightmost value) - // matters; later stores to the same name are dead. - // Replace them with POP_TOP (apply_static_swaps). - let mut seen = std::collections::HashSet::new(); - for target in target_elts.iter().rev() { - if let ast::Expr::Name(ast::ExprName { id, .. }) = target - && !seen.insert(id.as_str()) - { - emit!(self, Instruction::PopTop); - continue; - } - self.compile_store(target)?; - } - return Ok(()); - } - self.compile_expression(value)?; for (i, target) in targets.iter().enumerate() { diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 659736ca5bb..9753961d12e 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -209,9 +209,13 @@ impl CodeInfo { // DCE always runs (removes dead code after terminal instructions) self.dce(); + // BUILD_TUPLE n + UNPACK_SEQUENCE n → NOP + SWAP (n=2,3) or NOP+NOP (n=1) + self.optimize_build_tuple_unpack(); // Dead store elimination for duplicate STORE_FAST targets // (apply_static_swaps in CPython's flowgraph.c) self.eliminate_dead_stores(); + // apply_static_swaps: reorder stores to eliminate SWAPs + self.apply_static_swaps(); // Peephole optimizer creates superinstructions matching CPython self.peephole_optimize(); @@ -1331,6 +1335,176 @@ impl CodeInfo { } } + /// BUILD_TUPLE n + UNPACK_SEQUENCE n optimization. + /// + /// Ported from CPython flowgraph.c optimize_basic_block: + /// - n == 1: both become NOP (identity operation) + /// - n == 2 or 3: BUILD_TUPLE → NOP, UNPACK_SEQUENCE → SWAP + fn optimize_build_tuple_unpack(&mut self) { + for block in &mut self.blocks { + let instrs = &mut block.instructions; + let len = instrs.len(); + for i in 0..len.saturating_sub(1) { + let Some(Instruction::BuildTuple { .. }) = instrs[i].instr.real() else { + continue; + }; + let n = u32::from(instrs[i].arg); + let Some(Instruction::UnpackSequence { .. }) = instrs[i + 1].instr.real() else { + continue; + }; + if u32::from(instrs[i + 1].arg) != n { + continue; + } + match n { + 1 => { + instrs[i].instr = AnyInstruction::Real(Instruction::Nop); + instrs[i].arg = OpArg::new(0); + instrs[i + 1].instr = AnyInstruction::Real(Instruction::Nop); + instrs[i + 1].arg = OpArg::new(0); + } + 2 | 3 => { + instrs[i].instr = AnyInstruction::Real(Instruction::Nop); + instrs[i].arg = OpArg::new(0); + instrs[i + 1].instr = + AnyInstruction::Real(Instruction::Swap { i: Arg::marker() }); + instrs[i + 1].arg = OpArg::new(n); + } + _ => {} + } + } + } + } + + /// apply_static_swaps: eliminate SWAPs by reordering target stores/pops. + /// + /// Ported from CPython Python/flowgraph.c::apply_static_swaps. + /// For each SWAP N, find the 1st and N-th swappable instructions after + /// it. If both are STORE_FAST/POP_TOP and safe to swap, exchange them + /// in the bytecode and replace SWAP with NOP. + /// + /// Safety: abort if the two stores write the same variable, or if any + /// intervening swappable stores to one of the same variables. Do not + /// cross line-number boundaries (user-visible name bindings). + fn apply_static_swaps(&mut self) { + /// Instruction classes that are safe to reorder around SWAP. + fn is_swappable(instr: &AnyInstruction) -> bool { + matches!( + instr, + AnyInstruction::Real(Instruction::StoreFast { .. } | Instruction::PopTop) + ) + } + + /// Variable index that a STORE_FAST writes to, or None. + fn stores_to(info: &InstructionInfo) -> Option { + match info.instr { + AnyInstruction::Real(Instruction::StoreFast { .. }) => Some(u32::from(info.arg)), + _ => None, + } + } + + /// Next swappable index after `i` in `instrs`, skipping NOPs. + /// Returns None if a non-NOP non-swappable instruction blocks, or + /// if `lineno >= 0` and a different lineno is encountered. + fn next_swappable( + instrs: &[InstructionInfo], + mut i: usize, + lineno: i32, + ) -> Option { + loop { + i += 1; + if i >= instrs.len() { + return None; + } + let info = &instrs[i]; + let info_lineno = info.location.line.get() as i32; + if lineno >= 0 && info_lineno > 0 && info_lineno != lineno { + return None; + } + if matches!(info.instr, AnyInstruction::Real(Instruction::Nop)) { + continue; + } + if is_swappable(&info.instr) { + return Some(i); + } + return None; + } + } + + for block in &mut self.blocks { + let instrs = &mut block.instructions; + let len = instrs.len(); + // Walk forward; for each SWAP attempt elimination. + let mut i = 0; + while i < len { + let swap_arg = match instrs[i].instr { + AnyInstruction::Real(Instruction::Swap { .. }) => u32::from(instrs[i].arg), + _ => { + i += 1; + continue; + } + }; + // SWAP oparg < 2 is a no-op; the compiler should not emit + // these, but be defensive. + if swap_arg < 2 { + i += 1; + continue; + } + // Find first swappable after SWAP (lineno = -1 initially). + let Some(j) = next_swappable(instrs, i, -1) else { + i += 1; + continue; + }; + let lineno = instrs[j].location.line.get() as i32; + // Walk (swap_arg - 1) more swappable instructions, with + // lineno constraint. + let mut k = j; + let mut ok = true; + for _ in 1..swap_arg { + match next_swappable(instrs, k, lineno) { + Some(next) => k = next, + None => { + ok = false; + break; + } + } + } + if !ok { + i += 1; + continue; + } + // Conflict check: if either j or k is a STORE_FAST, no + // intervening store may target the same variable, and + // they must not target the same variable themselves. + let store_j = stores_to(&instrs[j]); + let store_k = stores_to(&instrs[k]); + if store_j.is_some() || store_k.is_some() { + if store_j == store_k { + i += 1; + continue; + } + let mut conflict = false; + for idx in (j + 1)..k { + if let Some(store_idx) = stores_to(&instrs[idx]) + && (Some(store_idx) == store_j || Some(store_idx) == store_k) + { + conflict = true; + break; + } + } + if conflict { + i += 1; + continue; + } + } + // Safe to reorder. SWAP -> NOP, swap j and k. + instrs[i].instr = AnyInstruction::Real(Instruction::Nop); + instrs[i].arg = OpArg::new(0); + instrs.swap(j, k); + i += 1; + } + } + } + /// Eliminate dead stores in STORE_FAST sequences (apply_static_swaps). /// /// In sequences of consecutive STORE_FAST instructions (from tuple unpacking),