Skip to content

Commit 7338bb7

Browse files
committed
Support octet escaping
1 parent 395fd6b commit 7338bb7

1 file changed

Lines changed: 44 additions & 0 deletions

File tree

parser/src/lexer.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,8 +1238,27 @@ fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
12381238
let mut escape = false; //flag if previous was \
12391239
let mut hex_on = false; // hex mode on or off
12401240
let mut hex_value = String::new();
1241+
let mut octet_on = false;
1242+
let mut octet_value = String::new();
12411243

12421244
for c in s.chars() {
1245+
if octet_on {
1246+
let mut should_skip = false; // flag to indicate if we should skip the escape sequence.
1247+
if let '0'..='7' = c {
1248+
octet_value.push(c);
1249+
if octet_value.len() < 3 {
1250+
continue;
1251+
}
1252+
should_skip = true;
1253+
}
1254+
res.push(u8::from_str_radix(&octet_value, 8).unwrap());
1255+
octet_on = false;
1256+
escape = false;
1257+
octet_value.clear();
1258+
if should_skip {
1259+
continue;
1260+
}
1261+
}
12431262
if hex_on {
12441263
if c.is_ascii_hexdigit() {
12451264
if hex_value.is_empty() {
@@ -1269,6 +1288,11 @@ fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
12691288
('n', false) => res.push(b'n'),
12701289
('r', true) => res.push(b'\r'),
12711290
('r', false) => res.push(b'r'),
1291+
(val @ '0'..='7', true) => {
1292+
octet_on = true;
1293+
octet_value.push(val);
1294+
continue;
1295+
}
12721296
(x, true) => {
12731297
res.push(b'\\');
12741298
res.push(x as u8);
@@ -1278,6 +1302,11 @@ fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
12781302
escape = false;
12791303
}
12801304
}
1305+
if octet_on {
1306+
res.push(u8::from_str_radix(&octet_value, 8).unwrap());
1307+
} else if hex_on {
1308+
return Err(LexicalErrorType::StringError);
1309+
}
12811310
Ok(res)
12821311
}
12831312

@@ -1715,4 +1744,19 @@ mod tests {
17151744
]
17161745
)
17171746
}
1747+
1748+
#[test]
1749+
fn test_escape_octet() {
1750+
let source = r##"b'\43a\4\1234'"##;
1751+
let tokens = lex_source(source);
1752+
assert_eq!(
1753+
tokens,
1754+
vec![
1755+
Tok::Bytes {
1756+
value: b"#a\x04S4".to_vec()
1757+
},
1758+
Tok::Newline
1759+
]
1760+
)
1761+
}
17181762
}

0 commit comments

Comments
 (0)