Skip to content

Commit 940c41b

Browse files
committed
Reduce lalrpop generated code size by about half. Relates to issue RustPython#173.
1 parent bc23e60 commit 940c41b

File tree

7 files changed

+203
-140
lines changed

7 files changed

+203
-140
lines changed

parser/src/ast.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ pub struct Node {
1212
}
1313
*/
1414

15+
#[derive(Debug, PartialEq)]
16+
pub enum Top {
17+
Program(Program),
18+
Statement(LocatedStatement),
19+
Expression(Expression),
20+
}
21+
1522
#[derive(Debug, PartialEq)]
1623
pub struct Program {
1724
pub statements: Vec<LocatedStatement>,

parser/src/parser.rs

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@ extern crate lalrpop_util;
33
use std::error::Error;
44
use std::fs::File;
55
use std::io::Read;
6+
use std::iter;
67
use std::path::Path;
78

89
use super::ast;
910
use super::lexer;
1011
use super::python;
12+
use super::token;
1113

1214
pub fn read_file(filename: &Path) -> Result<String, String> {
1315
match File::open(&filename) {
@@ -40,32 +42,35 @@ pub fn parse(filename: &Path) -> Result<ast::Program, String> {
4042
}
4143
}
4244

45+
macro_rules! do_lalr_parsing {
46+
($input: expr, $pat: ident, $tok: ident) => {{
47+
let lxr = lexer::Lexer::new($input);
48+
let marker_token = (Default::default(), token::Tok::$tok, Default::default());
49+
let tokenizer = iter::once(Ok(marker_token)).chain(lxr);
50+
51+
match python::TopParser::new().parse(tokenizer) {
52+
Err(why) => Err(format!("{:?}", why)),
53+
Ok(top) => {
54+
if let ast::Top::$pat(x) = top {
55+
Ok(x)
56+
} else {
57+
unreachable!()
58+
}
59+
}
60+
}
61+
}};
62+
}
63+
4364
pub fn parse_program(source: &str) -> Result<ast::Program, String> {
44-
let lxr = lexer::Lexer::new(&source);
45-
match python::ProgramParser::new().parse(lxr) {
46-
Err(lalrpop_util::ParseError::UnrecognizedToken {
47-
token: None,
48-
expected: _,
49-
}) => Err(String::from("Unexpected end of input.")),
50-
Err(why) => Err(String::from(format!("{:?}", why))),
51-
Ok(p) => Ok(p),
52-
}
65+
do_lalr_parsing!(source, Program, StartProgram)
5366
}
5467

5568
pub fn parse_statement(source: &str) -> Result<ast::LocatedStatement, String> {
56-
let lxr = lexer::Lexer::new(&source);
57-
match python::StatementParser::new().parse(lxr) {
58-
Err(why) => Err(String::from(format!("{:?}", why))),
59-
Ok(p) => Ok(p),
60-
}
69+
do_lalr_parsing!(source, Statement, StartStatement)
6170
}
6271

6372
pub fn parse_expression(source: &str) -> Result<ast::Expression, String> {
64-
let lxr = lexer::Lexer::new(&source);
65-
match python::ExpressionParser::new().parse(lxr) {
66-
Err(why) => Err(String::from(format!("{:?}", why))),
67-
Ok(p) => Ok(p),
68-
}
73+
do_lalr_parsing!(source, Expression, StartExpression)
6974
}
7075

7176
#[cfg(test)]

parser/src/python.lalrpop

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,16 @@ use std::str::FromStr;
1111

1212
grammar;
1313

14-
pub Program: ast::Program = {
14+
// This is a hack to reduce the amount of lalrpop tables generated:
15+
// For each public entry point, a full parse table is generated.
16+
// By having only a single pub function, we reduce this to one.
17+
pub Top: ast::Top = {
18+
StartProgram <p:Program> => ast::Top::Program(p),
19+
StartStatement <s:Statement> => ast::Top::Statement(s),
20+
StartExpression <e:Expression> => ast::Top::Expression(e),
21+
};
22+
23+
Program: ast::Program = {
1524
<lines:FileLine*> => ast::Program { statements: Vec::from_iter(lines.into_iter().filter_map(|e| e)) },
1625
};
1726

@@ -26,7 +35,7 @@ Suite: Vec<ast::LocatedStatement> = {
2635
"\n" indent <s:Statement+> dedent => s,
2736
};
2837

29-
pub Statement: ast::LocatedStatement = {
38+
Statement: ast::LocatedStatement = {
3039
SimpleStatement,
3140
CompoundStatement,
3241
};
@@ -608,7 +617,7 @@ CompOp: ast::Comparison = {
608617
"is" "not" => ast::Comparison::IsNot,
609618
};
610619

611-
pub Expression: ast::Expression = {
620+
Expression: ast::Expression = {
612621
<e1:Expression> "|" <e2:XorExpression> => ast::Expression::Binop { a: Box::new(e1), op: ast::Operator::BitOr, b: Box::new(e2) },
613622
<e:XorExpression> => e,
614623
};
@@ -914,6 +923,9 @@ extern {
914923
enum lexer::Tok {
915924
indent => lexer::Tok::Indent,
916925
dedent => lexer::Tok::Dedent,
926+
StartProgram => lexer::Tok::StartProgram,
927+
StartStatement => lexer::Tok::StartStatement,
928+
StartExpression => lexer::Tok::StartExpression,
917929
"+" => lexer::Tok::Plus,
918930
"-" => lexer::Tok::Minus,
919931
":" => lexer::Tok::Colon,

parser/src/token.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ pub enum Tok {
77
Newline,
88
Indent,
99
Dedent,
10+
StartProgram,
11+
StartStatement,
12+
StartExpression,
1013
Lpar,
1114
Rpar,
1215
Lsqb,

vm/src/bytecode.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@ pub struct CodeObject {
2121
pub instructions: Vec<Instruction>,
2222
pub label_map: HashMap<Label, usize>,
2323
pub locations: Vec<ast::Location>,
24-
pub arg_names: Vec<String>,
24+
pub arg_names: Vec<String>, // Names of positional arguments
25+
pub varargs: Option<String>,
2526
pub source_path: Option<String>,
2627
pub obj_name: String, // Name of the object that created this code object
2728
}
2829

2930
impl CodeObject {
3031
pub fn new(
3132
arg_names: Vec<String>,
33+
varargs: Option<String>,
3234
source_path: Option<String>,
3335
obj_name: String,
3436
) -> CodeObject {
@@ -37,6 +39,7 @@ impl CodeObject {
3739
label_map: HashMap::new(),
3840
locations: Vec::new(),
3941
arg_names: arg_names,
42+
varargs: varargs,
4043
source_path: source_path,
4144
obj_name: obj_name,
4245
}

vm/src/compile.rs

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,12 @@ impl Compiler {
8383
}
8484

8585
fn push_new_code_object(&mut self, source_path: Option<String>, obj_name: String) {
86-
self.code_object_stack
87-
.push(CodeObject::new(Vec::new(), source_path.clone(), obj_name));
86+
self.code_object_stack.push(CodeObject::new(
87+
Vec::new(),
88+
None,
89+
source_path.clone(),
90+
obj_name,
91+
));
8892
}
8993

9094
fn pop_code_object(&mut self) -> CodeObject {
@@ -384,30 +388,15 @@ impl Compiler {
384388
decorator_list,
385389
} => {
386390
// Create bytecode for this function:
387-
let have_kwargs = args.defaults.len() > 0;
388-
if have_kwargs {
389-
// Construct a tuple:
390-
let size = args.defaults.len();
391-
for element in &args.defaults {
392-
self.compile_expression(element)?;
393-
}
394-
self.emit(Instruction::BuildTuple { size });
395-
}
396-
397-
self.code_object_stack.push(CodeObject::new(
398-
args.args.clone(),
399-
self.source_path.clone(),
400-
name.clone(),
401-
));
391+
let flags = self.enter_function(name, args)?;
402392
self.compile_statements(body)?;
403393

404394
// Emit None at end:
405395
self.emit(Instruction::LoadConst {
406396
value: bytecode::Constant::None,
407397
});
408398
self.emit(Instruction::ReturnValue);
409-
410-
let code = self.code_object_stack.pop().unwrap();
399+
let code = self.pop_code_object();
411400

412401
self.prepare_decorators(decorator_list)?;
413402
self.emit(Instruction::LoadConst {
@@ -420,10 +409,6 @@ impl Compiler {
420409
});
421410

422411
// Turn code object into function object:
423-
let mut flags = bytecode::FunctionOpArg::empty();
424-
if have_kwargs {
425-
flags = flags | bytecode::FunctionOpArg::HAS_DEFAULTS;
426-
}
427412
self.emit(Instruction::MakeFunction { flags: flags });
428413
self.apply_decorators(decorator_list);
429414

@@ -442,6 +427,7 @@ impl Compiler {
442427
self.emit(Instruction::LoadBuildClass);
443428
self.code_object_stack.push(CodeObject::new(
444429
vec![String::from("__locals__")],
430+
None,
445431
self.source_path.clone(),
446432
name.clone(),
447433
));
@@ -455,7 +441,7 @@ impl Compiler {
455441
});
456442
self.emit(Instruction::ReturnValue);
457443

458-
let code = self.code_object_stack.pop().unwrap();
444+
let code = self.pop_code_object();
459445
self.emit(Instruction::LoadConst {
460446
value: bytecode::Constant::Code { code: code },
461447
});
@@ -587,6 +573,36 @@ impl Compiler {
587573
Ok(())
588574
}
589575

576+
fn enter_function(
577+
&mut self,
578+
name: &String,
579+
args: &ast::Parameters,
580+
) -> Result<bytecode::FunctionOpArg, String> {
581+
let have_kwargs = args.defaults.len() > 0;
582+
if have_kwargs {
583+
// Construct a tuple:
584+
let size = args.defaults.len();
585+
for element in &args.defaults {
586+
self.compile_expression(element)?;
587+
}
588+
self.emit(Instruction::BuildTuple { size });
589+
}
590+
591+
self.code_object_stack.push(CodeObject::new(
592+
args.args.clone(),
593+
args.vararg.clone(),
594+
self.source_path.clone(),
595+
name.clone(),
596+
));
597+
598+
let mut flags = bytecode::FunctionOpArg::empty();
599+
if have_kwargs {
600+
flags = flags | bytecode::FunctionOpArg::HAS_DEFAULTS;
601+
}
602+
603+
Ok(flags)
604+
}
605+
590606
fn prepare_decorators(&mut self, decorator_list: &Vec<ast::Expression>) -> Result<(), String> {
591607
for decorator in decorator_list {
592608
self.compile_expression(decorator)?;
@@ -886,26 +902,19 @@ impl Compiler {
886902
});
887903
}
888904
ast::Expression::Lambda { args, body } => {
889-
self.code_object_stack.push(CodeObject::new(
890-
args.args.clone(),
891-
self.source_path.clone(),
892-
"<lambda>".to_string(),
893-
));
905+
let name = "<lambda>".to_string();
906+
let flags = self.enter_function(&name, args)?;
894907
self.compile_expression(body)?;
895908
self.emit(Instruction::ReturnValue);
896-
let code = self.code_object_stack.pop().unwrap();
909+
let code = self.pop_code_object();
897910
self.emit(Instruction::LoadConst {
898911
value: bytecode::Constant::Code { code: code },
899912
});
900913
self.emit(Instruction::LoadConst {
901-
value: bytecode::Constant::String {
902-
value: String::from("<lambda>"),
903-
},
914+
value: bytecode::Constant::String { value: name },
904915
});
905916
// Turn code object into function object:
906-
self.emit(Instruction::MakeFunction {
907-
flags: bytecode::FunctionOpArg::empty(),
908-
});
917+
self.emit(Instruction::MakeFunction { flags: flags });
909918
}
910919
ast::Expression::Comprehension { kind, generators } => {
911920
self.compile_comprehension(kind, generators)?;
@@ -943,6 +952,7 @@ impl Compiler {
943952
// Create magnificent function <listcomp>:
944953
self.code_object_stack.push(CodeObject::new(
945954
vec![".0".to_string()],
955+
None,
946956
self.source_path.clone(),
947957
name,
948958
));
@@ -1033,7 +1043,7 @@ impl Compiler {
10331043
self.emit(Instruction::ReturnValue);
10341044

10351045
// Fetch code for listcomp function:
1036-
let code = self.code_object_stack.pop().unwrap();
1046+
let code = self.pop_code_object();
10371047

10381048
// List comprehension code:
10391049
self.emit(Instruction::LoadConst {

0 commit comments

Comments
 (0)