Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Bytecode parity
 Compiler changes:
    - Remove PUSH_NULL from decorator cal
ls, use CALL 0
    - Collect __static_attributes__ from self.xxx = patterns
    - Sort __static_attributes__ alphabetically
    - Move __classdict__ init before __doc__ in class prologue
    - Fold unary negative constants
    - Fold constant list/set literals (3+ elements)
    - Use BUILD_MAP 0 + MAP_ADD for 16+ dict pairs
    - Always run peephole optimizer for s
uperinstructions
    - Emit RETURN_GENERATOR for generator
 functions
    - Add is_generator flag to SymbolTabl
e
  • Loading branch information
youknowone committed Mar 25, 2026
commit d0cb8e1c64ba580b30d384b331536a60b62d7394
177 changes: 145 additions & 32 deletions crates/codegen/src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1200,8 +1200,10 @@ impl Compiler {
/// Emit RESUME instruction with proper handling for async preamble and module lineno.
/// codegen_enter_scope equivalent for RESUME emission.
fn emit_resume_for_scope(&mut self, scope_type: CompilerScope, lineno: u32) {
// For async functions/coroutines, emit RETURN_GENERATOR + POP_TOP before RESUME
if scope_type == CompilerScope::AsyncFunction {
// For generators and async functions, emit RETURN_GENERATOR + POP_TOP before RESUME
let is_gen = scope_type == CompilerScope::AsyncFunction
|| self.current_symbol_table().is_generator;
if is_gen {
Comment on lines +1203 to +1206
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Propagate generator classification to the async return value check.

This preamble now knows a scope is a generator before body compilation, but the async-generator guard still keys off CodeFlags::GENERATOR later in compile_statement. async def f(): return 1; yield 2 will still compile because the return is visited before mark_generator() runs. Either set the generator flag here when self.current_symbol_table().is_generator is true, or switch the Line 2498 check to the symbol-table bit.

Possible fix
-        let is_gen =
-            scope_type == CompilerScope::AsyncFunction || self.current_symbol_table().is_generator;
-        if is_gen {
+        let is_generator = self.current_symbol_table().is_generator;
+        let needs_return_generator =
+            scope_type == CompilerScope::AsyncFunction || is_generator;
+        if is_generator {
+            self.current_code_info().flags |= bytecode::CodeFlags::GENERATOR;
+        }
+        if needs_return_generator {
             emit!(self, Instruction::ReturnGenerator);
             emit!(self, Instruction::PopTop);
         }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// For generators and async functions, emit RETURN_GENERATOR + POP_TOP before RESUME
let is_gen =
scope_type == CompilerScope::AsyncFunction || self.current_symbol_table().is_generator;
if is_gen {
// For generators and async functions, emit RETURN_GENERATOR + POP_TOP before RESUME
let is_generator = self.current_symbol_table().is_generator;
let needs_return_generator =
scope_type == CompilerScope::AsyncFunction || is_generator;
if is_generator {
self.current_code_info().flags |= bytecode::CodeFlags::GENERATOR;
}
if needs_return_generator {
emit!(self, Instruction::ReturnGenerator);
emit!(self, Instruction::PopTop);
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/codegen/src/compile.rs` around lines 1203 - 1206, The async-generator
check is using CodeFlags::GENERATOR in compile_statement which runs before
mark_generator(), so async defs that are generators slip through; when you
compute is_gen (using scope_type == CompilerScope::AsyncFunction ||
self.current_symbol_table().is_generator) propagate that generator
classification into the code flags immediately (or alternatively change the
later check in compile_statement to consult
self.current_symbol_table().is_generator instead of CodeFlags::GENERATOR).
Concretely, set the generator bit on the current CodeFlags/state when is_gen is
true (or update the Line 2498 check to use current_symbol_table().is_generator)
so the async return-value guard sees the generator status before
mark_generator() is called.

emit!(self, Instruction::ReturnGenerator);
emit!(self, Instruction::PopTop);
}
Expand Down Expand Up @@ -2758,18 +2760,15 @@ impl Compiler {
fn prepare_decorators(&mut self, decorator_list: &[ast::Decorator]) -> CompileResult<()> {
for decorator in decorator_list {
self.compile_expression(&decorator.expression)?;
emit!(self, Instruction::PushNull);
}
Ok(())
}

/// Apply decorators in reverse order (LIFO from stack).
/// Stack [dec1, NULL, dec2, NULL, func] -> dec2(func) -> dec1(dec2(func))
/// The forward loop works because each Call pops from TOS, naturally
/// applying decorators bottom-up (innermost first).
/// Apply decorators: each decorator calls the function below it.
/// Stack: [dec1, dec2, func] → CALL 0 → [dec1, dec2(func)] → CALL 0 → [dec1(dec2(func))]
fn apply_decorators(&mut self, decorator_list: &[ast::Decorator]) {
for _ in decorator_list {
emit!(self, Instruction::Call { argc: 1 });
emit!(self, Instruction::Call { argc: 0 });
}
}

Expand Down Expand Up @@ -4510,6 +4509,100 @@ impl Compiler {
Ok(())
}

/// Collect attribute names assigned via `self.xxx = ...` in methods.
/// These are stored as __static_attributes__ in the class dict.
fn collect_static_attributes(body: &[ast::Stmt], attrs: Option<&mut IndexSet<String>>) {
let Some(attrs) = attrs else { return };
for stmt in body {
// Only scan def/async def at class body level
let (params, func_body) = match stmt {
ast::Stmt::FunctionDef(f) => (&f.parameters, &f.body),
_ => continue,
};
// Get first parameter name (usually "self" or "cls")
let first_param = params
.args
.first()
.or(params.posonlyargs.first())
.map(|p| &p.parameter.name);
let Some(self_name) = first_param else {
continue;
};
// Scan function body for self.xxx = ... (STORE_ATTR on first param)
Self::scan_store_attrs(func_body, self_name.as_str(), attrs);
}
}

/// Recursively scan statements for `name.attr = value` patterns.
fn scan_store_attrs(stmts: &[ast::Stmt], name: &str, attrs: &mut IndexSet<String>) {
for stmt in stmts {
match stmt {
ast::Stmt::Assign(a) => {
for target in &a.targets {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = target
{
if let ast::Expr::Name(n) = value.as_ref() {
if n.id.as_str() == name {
attrs.insert(attr.to_string());
}
}
}
}
}
ast::Stmt::AnnAssign(a) => {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) =
a.target.as_ref()
{
if let ast::Expr::Name(n) = value.as_ref() {
if n.id.as_str() == name {
attrs.insert(attr.to_string());
}
}
}
}
ast::Stmt::AugAssign(a) => {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) =
a.target.as_ref()
{
if let ast::Expr::Name(n) = value.as_ref() {
if n.id.as_str() == name {
attrs.insert(attr.to_string());
}
}
}
}
ast::Stmt::If(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
for clause in &s.elif_else_clauses {
Self::scan_store_attrs(&clause.body, name, attrs);
}
}
ast::Stmt::For(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
Self::scan_store_attrs(&s.orelse, name, attrs);
}
ast::Stmt::While(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
Self::scan_store_attrs(&s.orelse, name, attrs);
}
ast::Stmt::Try(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
for handler in &s.handlers {
if let ast::ExceptHandler::ExceptHandler(h) = handler {
Self::scan_store_attrs(&h.body, name, attrs);
}
}
Self::scan_store_attrs(&s.orelse, name, attrs);
Self::scan_store_attrs(&s.finalbody, name, attrs);
}
ast::Stmt::With(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
}
_ => {}
}
}
}
Comment on lines +4512 to +4597
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

The static-attribute scan misses some valid method shapes.

Two gaps here produce incomplete __static_attributes__: Line 4524 picks args before posonlyargs, so def f(self, /, x): self.a = 1 is skipped, and scan_store_attrs never descends into match cases. Both break parity on valid class bodies.

Possible fix
@@
-            let first_param = params
-                .args
-                .first()
-                .or(params.posonlyargs.first())
-                .map(|p| &p.parameter.name);
+            let first_param = params
+                .posonlyargs
+                .first()
+                .or(params.args.first())
+                .map(|p| &p.parameter.name);
@@
                 ast::Stmt::With(s) => {
                     Self::scan_store_attrs(&s.body, name, attrs);
                 }
+                ast::Stmt::Match(s) => {
+                    for case in &s.cases {
+                        Self::scan_store_attrs(&case.body, name, attrs);
+                    }
+                }
                 _ => {}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
/// Collect attribute names assigned via `self.xxx = ...` in methods.
/// These are stored as __static_attributes__ in the class dict.
fn collect_static_attributes(body: &[ast::Stmt], attrs: Option<&mut IndexSet<String>>) {
let Some(attrs) = attrs else { return };
for stmt in body {
// Only scan def/async def at class body level
let (params, func_body) = match stmt {
ast::Stmt::FunctionDef(f) => (&f.parameters, &f.body),
_ => continue,
};
// Get first parameter name (usually "self" or "cls")
let first_param = params
.args
.first()
.or(params.posonlyargs.first())
.map(|p| &p.parameter.name);
let Some(self_name) = first_param else {
continue;
};
// Scan function body for self.xxx = ... (STORE_ATTR on first param)
Self::scan_store_attrs(func_body, self_name.as_str(), attrs);
}
}
/// Recursively scan statements for `name.attr = value` patterns.
fn scan_store_attrs(stmts: &[ast::Stmt], name: &str, attrs: &mut IndexSet<String>) {
for stmt in stmts {
match stmt {
ast::Stmt::Assign(a) => {
for target in &a.targets {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = target
&& let ast::Expr::Name(n) = value.as_ref()
&& n.id.as_str() == name
{
attrs.insert(attr.to_string());
}
}
}
ast::Stmt::AnnAssign(a) => {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) =
a.target.as_ref()
&& let ast::Expr::Name(n) = value.as_ref()
&& n.id.as_str() == name
{
attrs.insert(attr.to_string());
}
}
ast::Stmt::AugAssign(a) => {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) =
a.target.as_ref()
&& let ast::Expr::Name(n) = value.as_ref()
&& n.id.as_str() == name
{
attrs.insert(attr.to_string());
}
}
ast::Stmt::If(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
for clause in &s.elif_else_clauses {
Self::scan_store_attrs(&clause.body, name, attrs);
}
}
ast::Stmt::For(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
Self::scan_store_attrs(&s.orelse, name, attrs);
}
ast::Stmt::While(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
Self::scan_store_attrs(&s.orelse, name, attrs);
}
ast::Stmt::Try(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
for handler in &s.handlers {
let ast::ExceptHandler::ExceptHandler(h) = handler;
Self::scan_store_attrs(&h.body, name, attrs);
}
Self::scan_store_attrs(&s.orelse, name, attrs);
Self::scan_store_attrs(&s.finalbody, name, attrs);
}
ast::Stmt::With(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
}
_ => {}
}
}
}
/// Collect attribute names assigned via `self.xxx = ...` in methods.
/// These are stored as __static_attributes__ in the class dict.
fn collect_static_attributes(body: &[ast::Stmt], attrs: Option<&mut IndexSet<String>>) {
let Some(attrs) = attrs else { return };
for stmt in body {
// Only scan def/async def at class body level
let (params, func_body) = match stmt {
ast::Stmt::FunctionDef(f) => (&f.parameters, &f.body),
_ => continue,
};
// Get first parameter name (usually "self" or "cls")
let first_param = params
.posonlyargs
.first()
.or(params.args.first())
.map(|p| &p.parameter.name);
let Some(self_name) = first_param else {
continue;
};
// Scan function body for self.xxx = ... (STORE_ATTR on first param)
Self::scan_store_attrs(func_body, self_name.as_str(), attrs);
}
}
/// Recursively scan statements for `name.attr = value` patterns.
fn scan_store_attrs(stmts: &[ast::Stmt], name: &str, attrs: &mut IndexSet<String>) {
for stmt in stmts {
match stmt {
ast::Stmt::Assign(a) => {
for target in &a.targets {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = target
&& let ast::Expr::Name(n) = value.as_ref()
&& n.id.as_str() == name
{
attrs.insert(attr.to_string());
}
}
}
ast::Stmt::AnnAssign(a) => {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) =
a.target.as_ref()
&& let ast::Expr::Name(n) = value.as_ref()
&& n.id.as_str() == name
{
attrs.insert(attr.to_string());
}
}
ast::Stmt::AugAssign(a) => {
if let ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) =
a.target.as_ref()
&& let ast::Expr::Name(n) = value.as_ref()
&& n.id.as_str() == name
{
attrs.insert(attr.to_string());
}
}
ast::Stmt::If(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
for clause in &s.elif_else_clauses {
Self::scan_store_attrs(&clause.body, name, attrs);
}
}
ast::Stmt::For(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
Self::scan_store_attrs(&s.orelse, name, attrs);
}
ast::Stmt::While(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
Self::scan_store_attrs(&s.orelse, name, attrs);
}
ast::Stmt::Try(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
for handler in &s.handlers {
let ast::ExceptHandler::ExceptHandler(h) = handler;
Self::scan_store_attrs(&h.body, name, attrs);
}
Self::scan_store_attrs(&s.orelse, name, attrs);
Self::scan_store_attrs(&s.finalbody, name, attrs);
}
ast::Stmt::With(s) => {
Self::scan_store_attrs(&s.body, name, attrs);
}
ast::Stmt::Match(s) => {
for case in &s.cases {
Self::scan_store_attrs(&case.body, name, attrs);
}
}
_ => {}
}
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@crates/codegen/src/compile.rs` around lines 4512 - 4597, The collector misses
methods with a pos-only first parameter and doesn't descend into match
statements; update collect_static_attributes to pick the first param by checking
params.posonlyargs.first() before params.args.first() (or otherwise consider
both lists) when computing self_name in collect_static_attributes, and extend
scan_store_attrs to handle the match statement AST node (iterate each match
case/body and scan them) so assignments inside match arms are discovered; refer
to the functions collect_static_attributes and scan_store_attrs to locate where
to change the parameter selection and add the new match-arm recursion.


// Python/compile.c find_ann
fn find_ann(body: &[ast::Stmt]) -> bool {
for statement in body {
Expand Down Expand Up @@ -4617,6 +4710,13 @@ impl Compiler {
}
);

// PEP 649: Initialize __classdict__ cell (before __doc__)
if self.current_symbol_table().needs_classdict {
emit!(self, Instruction::LoadLocals);
let classdict_idx = self.get_cell_var_index("__classdict__")?;
emit!(self, Instruction::StoreDeref { i: classdict_idx });
}

// Store __doc__ only if there's an explicit docstring
if let Some(doc) = doc_str {
self.emit_load_const(ConstantData::Str { value: doc.into() });
Expand Down Expand Up @@ -4645,13 +4745,6 @@ impl Compiler {
);
}

// PEP 649: Initialize __classdict__ cell for class annotation scope
if self.current_symbol_table().needs_classdict {
emit!(self, Instruction::LoadLocals);
let classdict_idx = self.get_cell_var_index("__classdict__")?;
emit!(self, Instruction::StoreDeref { i: classdict_idx });
}

// Handle class annotations based on future_annotations flag
if Self::find_ann(body) {
if self.future_annotations {
Expand All @@ -4669,6 +4762,16 @@ impl Compiler {
}
}

// Collect __static_attributes__: scan methods for self.xxx = ... patterns
Self::collect_static_attributes(
body,
self.code_stack
.last_mut()
.unwrap()
.static_attributes
.as_mut(),
);

// 3. Compile the class body
self.compile_statements(body)?;

Expand All @@ -4684,14 +4787,15 @@ impl Compiler {

// Emit __static_attributes__ tuple
{
let attrs: Vec<String> = self
let mut attrs: Vec<String> = self
.code_stack
.last()
.unwrap()
.static_attributes
.as_ref()
.map(|s| s.iter().cloned().collect())
.unwrap_or_default();
attrs.sort();
self.emit_load_const(ConstantData::Tuple {
elements: attrs
.into_iter()
Expand Down Expand Up @@ -5091,8 +5195,7 @@ impl Compiler {
method: SpecialMethod::AEnter
}
); // [bound_aexit, bound_aenter]
// bound_aenter is already bound, call with NULL self_or_null
emit!(self, Instruction::PushNull); // [bound_aexit, bound_aenter, NULL]
emit!(self, Instruction::PushNull);
emit!(self, Instruction::Call { argc: 0 }); // [bound_aexit, awaitable]
emit!(self, Instruction::GetAwaitable { r#where: 1 });
self.emit_load_const(ConstantData::None);
Expand All @@ -5112,8 +5215,7 @@ impl Compiler {
method: SpecialMethod::Enter
}
); // [bound_exit, bound_enter]
// bound_enter is already bound, call with NULL self_or_null
emit!(self, Instruction::PushNull); // [bound_exit, bound_enter, NULL]
emit!(self, Instruction::PushNull);
emit!(self, Instruction::Call { argc: 0 }); // [bound_exit, enter_result]
}

Expand Down Expand Up @@ -5168,8 +5270,8 @@ impl Compiler {
});

// ===== Normal exit path =====
// Stack: [..., __exit__]
// Call __exit__(None, None, None)
// Stack: [..., bound_exit]
// Call bound_exit(None, None, None)
self.set_source_range(with_range);
emit!(self, Instruction::PushNull);
self.emit_load_const(ConstantData::None);
Expand Down Expand Up @@ -6894,17 +6996,28 @@ impl Compiler {
let has_unpacking = items.iter().any(|item| item.key.is_none());

if !has_unpacking {
// Simple case: no ** unpacking, build all pairs directly
for item in items {
self.compile_expression(item.key.as_ref().unwrap())?;
self.compile_expression(&item.value)?;
}
emit!(
self,
Instruction::BuildMap {
count: u32::try_from(items.len()).expect("too many dict items"),
// STACK_USE_GUIDELINE: for large dicts (16+ pairs), use
// BUILD_MAP 0 + MAP_ADD to avoid excessive stack usage
let big = items.len() * 2 > 30; // ~15 pairs threshold
if big {
emit!(self, Instruction::BuildMap { count: 0 });
for item in items {
self.compile_expression(item.key.as_ref().unwrap())?;
self.compile_expression(&item.value)?;
emit!(self, Instruction::MapAdd { i: 1 });
}
);
} else {
for item in items {
self.compile_expression(item.key.as_ref().unwrap())?;
self.compile_expression(&item.value)?;
}
emit!(
self,
Instruction::BuildMap {
count: u32::try_from(items.len()).expect("too many dict items"),
}
);
}
return Ok(());
}

Expand Down
Loading