Skip to content

Commit 400ad80

Browse files
fstrings
1 parent d048d0a commit 400ad80

File tree

10 files changed

+210
-23
lines changed

10 files changed

+210
-23
lines changed

parser/src/ast.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ pub enum Expression {
197197
elements: Vec<Expression>,
198198
},
199199
String {
200-
value: String,
200+
value: StringGroup,
201201
},
202202
Bytes {
203203
value: Vec<u8>,
@@ -312,3 +312,10 @@ pub enum Number {
312312
Float { value: f64 },
313313
Complex { real: f64, imag: f64 },
314314
}
315+
316+
#[derive(Debug, PartialEq)]
317+
pub enum StringGroup {
318+
Constant { value: String },
319+
FormattedValue { value: Box<Expression> },
320+
Joined { values: Vec<StringGroup> },
321+
}

parser/src/lexer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,6 @@ where
274274
let mut saw_f = false;
275275
loop {
276276
// Detect r"", f"", b"" and u""
277-
// TODO: handle f-strings
278277
if !(saw_b || saw_u || saw_f) && (self.chr0 == Some('b') || self.chr0 == Some('B')) {
279278
saw_b = true;
280279
} else if !(saw_b || saw_r || saw_u || saw_f)
@@ -442,7 +441,7 @@ where
442441
is_bytes: bool,
443442
is_raw: bool,
444443
_is_unicode: bool,
445-
_is_fstring: bool,
444+
is_fstring: bool,
446445
) -> Spanned<Tok> {
447446
let quote_char = self.next_char().unwrap();
448447
let mut string_content = String::new();
@@ -533,6 +532,7 @@ where
533532
} else {
534533
Tok::String {
535534
value: string_content,
535+
is_fstring,
536536
}
537537
};
538538

parser/src/parser.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@ use std::error::Error;
44
use std::fs::File;
55
use std::io::Read;
66
use std::iter;
7+
use std::mem;
78
use std::path::Path;
89

10+
use self::lalrpop_util::ParseError;
11+
912
use super::ast;
1013
use super::lexer;
1114
use super::python;
@@ -92,6 +95,96 @@ pub fn parse_expression(source: &str) -> Result<ast::Expression, String> {
9295
do_lalr_parsing!(source, Expression, StartExpression)
9396
}
9497

98+
pub enum FStringError {
99+
UnclosedLbrace,
100+
UnopenedRbrace,
101+
InvalidExpression,
102+
}
103+
104+
impl From<FStringError> for ParseError<lexer::Location, token::Tok, lexer::LexicalError> {
105+
fn from(err: FStringError) -> Self {
106+
// TODO: we should have our own top-level ParseError to properly propagate f-string (and
107+
// other) syntax errors
108+
ParseError::User {
109+
error: lexer::LexicalError::StringError,
110+
}
111+
}
112+
}
113+
114+
pub fn parse_fstring(source: &str) -> Result<ast::StringGroup, FStringError> {
115+
let mut values = vec![];
116+
let mut start = 0;
117+
let mut depth = 0;
118+
let mut escaped = false;
119+
let mut content = String::new();
120+
121+
let mut chars = source.char_indices().peekable();
122+
while let Some((pos, ch)) = chars.next() {
123+
match ch {
124+
'{' | '}' if escaped => {
125+
content.push(ch);
126+
escaped = false;
127+
}
128+
'{' => {
129+
if let Some((_, '{')) = chars.peek() {
130+
escaped = true;
131+
continue;
132+
}
133+
134+
if depth == 0 {
135+
values.push(ast::StringGroup::Constant {
136+
value: mem::replace(&mut content, String::new()),
137+
});
138+
139+
start = pos + 1;
140+
}
141+
depth += 1;
142+
}
143+
'}' => {
144+
if let Some((_, '}')) = chars.peek() {
145+
escaped = true;
146+
continue;
147+
}
148+
149+
if depth == 0 {
150+
return Err(FStringError::UnopenedRbrace);
151+
}
152+
153+
depth -= 1;
154+
if depth == 0 {
155+
values.push(ast::StringGroup::FormattedValue {
156+
value: Box::new(match parse_expression(source[start..pos].trim()) {
157+
Ok(expr) => expr,
158+
Err(_) => return Err(FStringError::InvalidExpression),
159+
}),
160+
});
161+
}
162+
}
163+
ch => {
164+
if depth == 0 {
165+
content.push(ch);
166+
}
167+
}
168+
}
169+
}
170+
171+
if depth != 0 {
172+
return Err(FStringError::UnclosedLbrace);
173+
}
174+
175+
if !content.is_empty() {
176+
values.push(ast::StringGroup::Constant { value: content })
177+
}
178+
179+
Ok(match values.len() {
180+
0 => ast::StringGroup::Constant {
181+
value: "".to_string(),
182+
},
183+
1 => values.into_iter().next().unwrap(),
184+
_ => ast::StringGroup::Joined { values },
185+
})
186+
}
187+
95188
#[cfg(test)]
96189
mod tests {
97190
use super::ast;

parser/src/python.lalrpop

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
use super::ast;
88
use super::lexer;
9+
use super::parser;
910
use std::iter::FromIterator;
1011
use num_bigint::BigInt;
1112

@@ -789,7 +790,8 @@ SliceOp: ast::Expression = {
789790
}
790791

791792
Atom: ast::Expression = {
792-
StringConstant,
793+
<s:StringGroup> => ast::Expression::String { value: s },
794+
<b:Bytes> => ast::Expression::Bytes { value: b },
793795
<n:Number> => ast::Expression::Number { value: n },
794796
<i:Identifier> => ast::Expression::Identifier { name: i },
795797
"[" <e:TestListComp?> "]" => {
@@ -992,16 +994,30 @@ Number: ast::Number = {
992994
<s:complex> => { ast::Number::Complex { real: s.0, imag: s.1 } },
993995
};
994996

995-
StringConstant: ast::Expression = {
996-
<s:string+> => {
997-
let glued = s.join("");
998-
ast::Expression::String { value: glued }
997+
StringGroup: ast::StringGroup = {
998+
<s:string+> =>? {
999+
let mut values = vec![];
1000+
for (value, is_fstring) in s {
1001+
values.push(if is_fstring {
1002+
parser::parse_fstring(&value)?
1003+
} else {
1004+
ast::StringGroup::Constant { value }
1005+
})
1006+
}
1007+
1008+
Ok(if values.len() > 1 {
1009+
ast::StringGroup::Joined { values }
1010+
} else {
1011+
values.into_iter().next().unwrap()
1012+
})
9991013
},
1014+
};
1015+
1016+
Bytes: Vec<u8> = {
10001017
<s:bytes+> => {
1001-
let glued = s.into_iter().flatten().collect::<Vec<u8>>();
1002-
ast::Expression::Bytes { value: glued }
1018+
s.into_iter().flatten().collect::<Vec<u8>>()
10031019
},
1004-
};
1020+
}
10051021

10061022
Identifier: String = <s:name> => s;
10071023

@@ -1096,7 +1112,7 @@ extern {
10961112
int => lexer::Tok::Int { value: <BigInt> },
10971113
float => lexer::Tok::Float { value: <f64> },
10981114
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
1099-
string => lexer::Tok::String { value: <String> },
1115+
string => lexer::Tok::String { value: <String>, is_fstring: <bool> },
11001116
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
11011117
name => lexer::Tok::Name { name: <String> },
11021118
"\n" => lexer::Tok::Newline,

parser/src/token.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ pub enum Tok {
99
Int { value: BigInt },
1010
Float { value: f64 },
1111
Complex { real: f64, imag: f64 },
12-
String { value: String },
12+
String { value: String, is_fstring: bool },
1313
Bytes { value: Vec<u8> },
1414
Newline,
1515
Indent,

tests/snippets/fstrings.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
foo = 'bar'
2+
3+
assert f"{''}" == ''
4+
assert f"{f'{foo}'}" == 'bar'
5+
assert f"foo{foo}" == 'foobar'
6+
assert f"{foo}foo" == 'barfoo'
7+
assert f"foo{foo}foo" == 'foobarfoo'
8+
assert f"{{foo}}" == '{foo}'
9+
assert f"{ {foo} }" == "{'bar'}"

vm/src/bytecode.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ pub enum Instruction {
125125
Raise {
126126
argc: usize,
127127
},
128+
BuildString {
129+
size: usize,
130+
},
128131
BuildTuple {
129132
size: usize,
130133
unpack: bool,
@@ -164,6 +167,7 @@ pub enum Instruction {
164167
after: usize,
165168
},
166169
Unpack,
170+
FormatValue,
167171
}
168172

169173
#[derive(Debug, Clone, PartialEq)]

vm/src/compile.rs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -980,11 +980,7 @@ impl Compiler {
980980
});
981981
}
982982
ast::Expression::String { value } => {
983-
self.emit(Instruction::LoadConst {
984-
value: bytecode::Constant::String {
985-
value: value.to_string(),
986-
},
987-
});
983+
self.compile_string(value)?;
988984
}
989985
ast::Expression::Bytes { value } => {
990986
self.emit(Instruction::LoadConst {
@@ -1316,6 +1312,29 @@ impl Compiler {
13161312
Ok(())
13171313
}
13181314

1315+
fn compile_string(&mut self, string: &ast::StringGroup) -> Result<(), String> {
1316+
match string {
1317+
ast::StringGroup::Joined { values } => {
1318+
for value in values {
1319+
self.compile_string(value)?;
1320+
}
1321+
self.emit(Instruction::BuildString { size: values.len() })
1322+
}
1323+
ast::StringGroup::Constant { value } => {
1324+
self.emit(Instruction::LoadConst {
1325+
value: bytecode::Constant::String {
1326+
value: value.to_string(),
1327+
},
1328+
});
1329+
}
1330+
ast::StringGroup::FormattedValue { value } => {
1331+
self.compile_expression(value)?;
1332+
self.emit(Instruction::FormatValue);
1333+
}
1334+
}
1335+
Ok(())
1336+
}
1337+
13191338
// Low level helper functions:
13201339
fn emit(&mut self, instruction: Instruction) {
13211340
self.current_code_object().instructions.push(instruction);

vm/src/frame.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,16 @@ impl Frame {
218218
}
219219
Ok(None)
220220
}
221+
bytecode::Instruction::BuildString { size } => {
222+
let s = self
223+
.pop_multiple(*size)
224+
.into_iter()
225+
.map(|pyobj| objstr::get_value(&pyobj))
226+
.collect::<String>();
227+
let str_obj = vm.ctx.new_str(s);
228+
self.push_value(str_obj);
229+
Ok(None)
230+
}
221231
bytecode::Instruction::BuildList { size, unpack } => {
222232
let elements = self.get_elements(vm, *size, *unpack)?;
223233
let list_obj = vm.ctx.new_list(elements);
@@ -630,6 +640,12 @@ impl Frame {
630640
}
631641
Ok(None)
632642
}
643+
bytecode::Instruction::FormatValue => {
644+
let value = self.pop_value();
645+
let formatted = vm.to_pystr(&value)?;
646+
self.push_value(vm.new_str(formatted));
647+
Ok(None)
648+
}
633649
}
634650
}
635651

vm/src/stdlib/ast.rs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -516,11 +516,7 @@ fn expression_to_ast(ctx: &PyContext, expression: &ast::Expression) -> PyObjectR
516516

517517
node
518518
}
519-
ast::Expression::String { value } => {
520-
let node = create_node(ctx, "Str");
521-
ctx.set_attr(&node, "s", ctx.new_str(value.clone()));
522-
node
523-
}
519+
ast::Expression::String { value } => string_to_ast(ctx, value),
524520
ast::Expression::Bytes { value } => {
525521
let node = create_node(ctx, "Bytes");
526522
ctx.set_attr(&node, "s", ctx.new_bytes(value.clone()));
@@ -567,6 +563,33 @@ fn comprehension_to_ast(ctx: &PyContext, comprehension: &ast::Comprehension) ->
567563
node
568564
}
569565

566+
fn string_to_ast(ctx: &PyContext, string: &ast::StringGroup) -> PyObjectRef {
567+
match string {
568+
ast::StringGroup::Constant { value } => {
569+
let node = create_node(ctx, "Str");
570+
ctx.set_attr(&node, "s", ctx.new_str(value.clone()));
571+
node
572+
}
573+
ast::StringGroup::FormattedValue { value } => {
574+
let node = create_node(ctx, "FormattedValue");
575+
let py_value = expression_to_ast(ctx, value);
576+
ctx.set_attr(&node, "value", py_value);
577+
node
578+
}
579+
ast::StringGroup::Joined { values } => {
580+
let node = create_node(ctx, "JoinedStr");
581+
let py_values = ctx.new_list(
582+
values
583+
.iter()
584+
.map(|value| string_to_ast(ctx, value))
585+
.collect(),
586+
);
587+
ctx.set_attr(&node, "values", py_values);
588+
node
589+
}
590+
}
591+
}
592+
570593
fn ast_parse(vm: &mut VirtualMachine, args: PyFuncArgs) -> PyResult {
571594
arg_check!(vm, args, required = [(source, Some(vm.ctx.str_type()))]);
572595

0 commit comments

Comments
 (0)