Skip to content

Commit 1a8de8f

Browse files
committed
Refactor lex_byte
1 parent efb6f6a commit 1a8de8f

File tree

1 file changed

+69
-62
lines changed

1 file changed

+69
-62
lines changed

parser/src/lexer.rs

Lines changed: 69 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,79 +1233,86 @@ where
12331233
}
12341234
}
12351235

1236+
#[derive(Debug)]
1237+
enum EscapeMode {
1238+
NORMAL,
1239+
HEX,
1240+
OCTET,
1241+
}
1242+
12361243
fn lex_byte(s: String) -> Result<Vec<u8>, LexicalErrorType> {
12371244
let mut res = vec![];
1238-
let mut escape = false; //flag if previous was \
1239-
let mut hex_on = false; // hex mode on or off
1240-
let mut hex_value = String::new();
1241-
let mut octet_on = false;
1242-
let mut octet_value = String::new();
1243-
1244-
for c in s.chars() {
1245-
if octet_on {
1246-
let mut should_skip = false; // flag to indicate if we should skip the escape sequence.
1247-
if let '0'..='7' = c {
1248-
octet_value.push(c);
1249-
if octet_value.len() < 3 {
1250-
continue;
1245+
let mut escape: Option<EscapeMode> = None;
1246+
let mut escape_buffer = String::new();
1247+
1248+
let mut chars_iter = s.chars();
1249+
let mut next_char = chars_iter.next();
1250+
1251+
while let Some(c) = next_char {
1252+
match escape {
1253+
Some(EscapeMode::OCTET) => {
1254+
if let '0'..='7' = c {
1255+
escape_buffer.push(c);
1256+
next_char = chars_iter.next();
1257+
if escape_buffer.len() < 3 {
1258+
continue;
1259+
}
12511260
}
1252-
should_skip = true;
1253-
}
1254-
res.push(u8::from_str_radix(&octet_value, 8).unwrap());
1255-
octet_on = false;
1256-
escape = false;
1257-
octet_value.clear();
1258-
if should_skip {
1259-
continue;
1260-
}
1261-
}
1262-
if hex_on {
1263-
if c.is_ascii_hexdigit() {
1264-
if hex_value.is_empty() {
1265-
hex_value.push(c);
1266-
continue;
1261+
res.push(u8::from_str_radix(&escape_buffer, 8).unwrap());
1262+
escape = None;
1263+
escape_buffer.clear();
1264+
}
1265+
Some(EscapeMode::HEX) => {
1266+
if c.is_ascii_hexdigit() {
1267+
if escape_buffer.is_empty() {
1268+
escape_buffer.push(c);
1269+
} else {
1270+
escape_buffer.push(c);
1271+
res.push(u8::from_str_radix(&escape_buffer, 16).unwrap());
1272+
escape = None;
1273+
escape_buffer.clear();
1274+
}
1275+
next_char = chars_iter.next();
12671276
} else {
1268-
hex_value.push(c);
1269-
res.push(u8::from_str_radix(&hex_value, 16).unwrap());
1270-
hex_on = false;
1271-
hex_value.clear();
1277+
return Err(LexicalErrorType::StringError);
12721278
}
1273-
} else {
1274-
return Err(LexicalErrorType::StringError);
12751279
}
1276-
} else {
1277-
match (c, escape) {
1278-
('\\', true) => res.push(b'\\'),
1279-
('\\', false) => {
1280-
escape = true;
1281-
continue;
1282-
}
1283-
('x', true) => hex_on = true,
1284-
('x', false) => res.push(b'x'),
1285-
('t', true) => res.push(b'\t'),
1286-
('t', false) => res.push(b't'),
1287-
('n', true) => res.push(b'\n'),
1288-
('n', false) => res.push(b'n'),
1289-
('r', true) => res.push(b'\r'),
1290-
('r', false) => res.push(b'r'),
1291-
(val @ '0'..='7', true) => {
1292-
octet_on = true;
1293-
octet_value.push(val);
1294-
continue;
1280+
Some(EscapeMode::NORMAL) => {
1281+
match c {
1282+
'\\' => res.push(b'\\'),
1283+
'x' => {
1284+
escape = Some(EscapeMode::HEX);
1285+
next_char = chars_iter.next();
1286+
continue;
1287+
}
1288+
't' => res.push(b'\t'),
1289+
'n' => res.push(b'\n'),
1290+
'r' => res.push(b'\r'),
1291+
'0'..='7' => {
1292+
escape = Some(EscapeMode::OCTET);
1293+
continue;
1294+
}
1295+
x => {
1296+
res.push(b'\\');
1297+
res.push(x as u8);
1298+
}
12951299
}
1296-
(x, true) => {
1297-
res.push(b'\\');
1298-
res.push(x as u8);
1300+
escape = None;
1301+
next_char = chars_iter.next();
1302+
}
1303+
None => {
1304+
match c {
1305+
'\\' => escape = Some(EscapeMode::NORMAL),
1306+
x => res.push(x as u8),
12991307
}
1300-
(x, false) => res.push(x as u8),
1308+
next_char = chars_iter.next();
13011309
}
1302-
escape = false;
13031310
}
13041311
}
1305-
if octet_on {
1306-
res.push(u8::from_str_radix(&octet_value, 8).unwrap());
1307-
} else if hex_on {
1308-
return Err(LexicalErrorType::StringError);
1312+
match escape {
1313+
Some(EscapeMode::OCTET) => res.push(u8::from_str_radix(&escape_buffer, 8).unwrap()),
1314+
Some(EscapeMode::HEX) => return Err(LexicalErrorType::StringError),
1315+
_ => (),
13091316
}
13101317
Ok(res)
13111318
}

0 commit comments

Comments
 (0)