From 93e3bff6373ec677d6205d56bb2e4151321e1d82 Mon Sep 17 00:00:00 2001 From: Protowalker Date: Fri, 3 Dec 2021 16:33:54 -0500 Subject: [PATCH 1/5] format + fix #136 --- web/src/index.html | 83 +++++++++++---------- web/src/index.js | 178 +++++++++++++++++++++++---------------------- 2 files changed, 139 insertions(+), 122 deletions(-) diff --git a/web/src/index.html b/web/src/index.html index bff98653..663220d4 100644 --- a/web/src/index.html +++ b/web/src/index.html @@ -1,45 +1,54 @@ - - - EndBASIC - + + + EndBASIC + - - - - + + + + - - - EndBASIC terminal window. - + + + EndBASIC terminal window. + - + -
- - - - - -
+
+ + + + + +
- - + + diff --git a/web/src/index.js b/web/src/index.js index 89f88c42..aa7253d5 100644 --- a/web/src/index.js +++ b/web/src/index.js @@ -17,27 +17,29 @@ import * as endbasic_web from "endbasic_web"; import $ from "jquery"; var UA = navigator.userAgent; -var isMobile = ( - /\b(BlackBerry|webOS|iPhone|IEMobile)\b/i.test(UA) || - /\b(Android|Windows Phone|iPad|iPod)\b/i.test(UA) || - (/\b(Macintosh)\b/i.test(UA) && 'ontouchend' in document) // For iPad Pro. -); +var isMobile = + /\b(BlackBerry|webOS|iPhone|IEMobile)\b/i.test(UA) || + /\b(Android|Windows Phone|iPad|iPod)\b/i.test(UA) || + (/\b(Macintosh)\b/i.test(UA) && "ontouchend" in document); // For iPad Pro. var isAndroid = /\bAndroid\b/i.test(UA); var buildId = endbasic_web.get_build_id(); -$('#build-id').text(buildId); +$("#build-id").text(buildId); var template = "Build ID: " + buildId; -$('#report-issue').attr( - "href", "https://github.com/endbasic/endbasic/issues/new?body=" + template); +$("#report-issue").attr( + "href", + "https://github.com/endbasic/endbasic/issues/new?body=" + template +); -let terminal = document.getElementById('terminal'); +let terminal = document.getElementById("terminal"); function fitTerminal() { - let footer = document.getElementsByTagName('footer'); - terminal.style.margin = "15px"; - terminal.width = document.documentElement.clientWidth - 30; - terminal.height = document.documentElement.clientHeight - footer[0].clientHeight - 30; + let footer = document.getElementsByTagName("footer"); + terminal.style.margin = "15px"; + terminal.width = document.documentElement.clientWidth - 30; + terminal.height = + document.documentElement.clientHeight - footer[0].clientHeight - 30; } fitTerminal(); @@ -46,91 +48,97 @@ fitTerminal(); // We only do this on the desktop because mobile browsers will change the size every time they // show the on-screen keyboard and that's not what we really want here. if (!isMobile) { - window.onresize = function() { - let label = document.getElementById('terminal-size'); - label.innerText = "SIZE CHANGED; MUST RELOAD PAGE"; - }; + window.onresize = function () { + let label = document.getElementById("terminal-size"); + label.innerText = "SIZE CHANGED; MUST RELOAD PAGE"; + }; } var wt = new endbasic_web.WebTerminal(terminal); var osk = wt.on_screen_keyboard(); -var mobileInput = document.getElementById('mobile-input'); +var mobileInput = document.getElementById("mobile-input"); if (isMobile) { - $('#button-esc').on('click', function() { - osk.press_escape(); - mobileInput.focus(); - }); - $('#button-up').on('click', function() { - osk.press_arrow_up(); - mobileInput.focus(); - }); - $('#button-down').on('click', function() { - osk.press_arrow_down(); - mobileInput.focus(); - }); - $('#button-left').on('click', function() { - osk.press_arrow_left(); - mobileInput.focus(); - }); - $('#button-right').on('click', function() { - osk.press_arrow_right(); - mobileInput.focus(); - }); + $("#button-esc").on("click", function () { + osk.press_escape(); + mobileInput.focus(); + }); + $("#button-up").on("click", function () { + osk.press_arrow_up(); + mobileInput.focus(); + }); + $("#button-down").on("click", function () { + osk.press_arrow_down(); + mobileInput.focus(); + }); + $("#button-left").on("click", function () { + osk.press_arrow_left(); + mobileInput.focus(); + }); + $("#button-right").on("click", function () { + osk.press_arrow_right(); + mobileInput.focus(); + }); - $('#controls').css('visibility', 'visible'); + $("#controls").css("visibility", "visible"); - if (isAndroid) { - // Handling the keyboard on Android is messy. If we have a real keyboard, we get keydown - // events as we expect. But if we have a soft keyboard, the keydown events are always - // "empty". Fortunately, we can use the input event to get data from the soft keyboard. - // Unfortunately, we can have both real and soft keyboards on the same device, which means - // we have to deal with possibly-duplicate events. And to make things worse, even if we - // just have a soft keyboard, some events (like Enter) come in only as keydown while others - // (like letter presses) come in as both keydown and input. - // - // To cope with these cases, we install handlers for both keydown and input, but we only - // recognize one of them within a short time period to avoid duplicate input. This is quite - // a hack that I'm sure has deficiencies, so restrict its use to Android. - // - // https://stackoverflow.com/questions/30743490/capture-keys-typed-on-android-virtual-keyboard-using-javascript + if (isAndroid) { + // Handling the keyboard on Android is messy. If we have a real keyboard, we get keydown + // events as we expect. But if we have a soft keyboard, the keydown events are always + // "empty". Fortunately, we can use the input event to get data from the soft keyboard. + // Unfortunately, we can have both real and soft keyboards on the same device, which means + // we have to deal with possibly-duplicate events. And to make things worse, even if we + // just have a soft keyboard, some events (like Enter) come in only as keydown while others + // (like letter presses) come in as both keydown and input. + // + // To cope with these cases, we install handlers for both keydown and input, but we only + // recognize one of them within a short time period to avoid duplicate input. This is quite + // a hack that I'm sure has deficiencies, so restrict its use to Android. + // + // https://stackoverflow.com/questions/30743490/capture-keys-typed-on-android-virtual-keyboard-using-javascript - var ignoreLastInput = false; - mobileInput.oninput = function(key) { - mobileInput.value = ""; - if (!ignoreLastInput && key.data != '') { - osk.inject_input_event(key); - ignoreLastInput = true; - setTimeout(function() { ignoreLastInput = false; }, 5); - } - }; - mobileInput.onkeydown = function(key) { - mobileInput.value = ""; - if (!ignoreLastInput && key.keyCode != 229) { - osk.inject_keyboard_event(key); - ignoreLastInput = true; - setTimeout(function() { ignoreLastInput = false; }, 5); - } - }; - } else { - mobileInput.onkeydown = function(key) { - mobileInput.value = ""; - osk.inject_keyboard_event(key); - }; - } - terminal.onclick = function() { - mobileInput.focus(); - } + var ignoreLastInput = false; + mobileInput.oninput = function (key) { + mobileInput.value = ""; + if (!ignoreLastInput && key.data != "") { + osk.inject_input_event(key); + ignoreLastInput = true; + setTimeout(function () { + ignoreLastInput = false; + }, 5); + } + }; + mobileInput.onkeydown = function (key) { + mobileInput.value = ""; + if (!ignoreLastInput && key.keyCode != 229) { + osk.inject_keyboard_event(key); + ignoreLastInput = true; + setTimeout(function () { + ignoreLastInput = false; + }, 5); + } + }; + } else { + mobileInput.onkeydown = function (key) { + mobileInput.value = ""; + osk.inject_keyboard_event(key); + }; + } + terminal.onclick = function () { mobileInput.focus(); + }; + mobileInput.focus(); } else { - mobileInput.hidden = true; - window.onkeydown = function(key) { - osk.inject_keyboard_event(key); - } - terminal.focus(); + mobileInput.hidden = true; + + $(document).keydown(function (evt) { + osk.inject_keyboard_event(evt); + if (evt.key === "Tab") return false; + }); + terminal.focus(); } var sizeInChars = wt.size_description(); -$('#terminal-size').text(sizeInChars); +$("#terminal-size").text(sizeInChars); wt.run_repl_loop(); From 816f64bee0ecb8bdcd83893c9bea7258da499e64 Mon Sep 17 00:00:00 2001 From: Protowalker Date: Fri, 3 Dec 2021 20:44:58 -0500 Subject: [PATCH 2/5] Revert "format + fix #136" This reverts commit 93e3bff6373ec677d6205d56bb2e4151321e1d82. --- web/src/index.html | 83 ++++++++++----------- web/src/index.js | 178 ++++++++++++++++++++++----------------------- 2 files changed, 122 insertions(+), 139 deletions(-) diff --git a/web/src/index.html b/web/src/index.html index 663220d4..bff98653 100644 --- a/web/src/index.html +++ b/web/src/index.html @@ -1,54 +1,45 @@ - - - EndBASIC - + + + EndBASIC + - - - - + + + + - - - EndBASIC terminal window. - + + + EndBASIC terminal window. + - + -
- - - - - -
+
+ + + + + +
- - + + diff --git a/web/src/index.js b/web/src/index.js index aa7253d5..89f88c42 100644 --- a/web/src/index.js +++ b/web/src/index.js @@ -17,29 +17,27 @@ import * as endbasic_web from "endbasic_web"; import $ from "jquery"; var UA = navigator.userAgent; -var isMobile = - /\b(BlackBerry|webOS|iPhone|IEMobile)\b/i.test(UA) || - /\b(Android|Windows Phone|iPad|iPod)\b/i.test(UA) || - (/\b(Macintosh)\b/i.test(UA) && "ontouchend" in document); // For iPad Pro. +var isMobile = ( + /\b(BlackBerry|webOS|iPhone|IEMobile)\b/i.test(UA) || + /\b(Android|Windows Phone|iPad|iPod)\b/i.test(UA) || + (/\b(Macintosh)\b/i.test(UA) && 'ontouchend' in document) // For iPad Pro. +); var isAndroid = /\bAndroid\b/i.test(UA); var buildId = endbasic_web.get_build_id(); -$("#build-id").text(buildId); +$('#build-id').text(buildId); var template = "Build ID: " + buildId; -$("#report-issue").attr( - "href", - "https://github.com/endbasic/endbasic/issues/new?body=" + template -); +$('#report-issue').attr( + "href", "https://github.com/endbasic/endbasic/issues/new?body=" + template); -let terminal = document.getElementById("terminal"); +let terminal = document.getElementById('terminal'); function fitTerminal() { - let footer = document.getElementsByTagName("footer"); - terminal.style.margin = "15px"; - terminal.width = document.documentElement.clientWidth - 30; - terminal.height = - document.documentElement.clientHeight - footer[0].clientHeight - 30; + let footer = document.getElementsByTagName('footer'); + terminal.style.margin = "15px"; + terminal.width = document.documentElement.clientWidth - 30; + terminal.height = document.documentElement.clientHeight - footer[0].clientHeight - 30; } fitTerminal(); @@ -48,97 +46,91 @@ fitTerminal(); // We only do this on the desktop because mobile browsers will change the size every time they // show the on-screen keyboard and that's not what we really want here. if (!isMobile) { - window.onresize = function () { - let label = document.getElementById("terminal-size"); - label.innerText = "SIZE CHANGED; MUST RELOAD PAGE"; - }; + window.onresize = function() { + let label = document.getElementById('terminal-size'); + label.innerText = "SIZE CHANGED; MUST RELOAD PAGE"; + }; } var wt = new endbasic_web.WebTerminal(terminal); var osk = wt.on_screen_keyboard(); -var mobileInput = document.getElementById("mobile-input"); +var mobileInput = document.getElementById('mobile-input'); if (isMobile) { - $("#button-esc").on("click", function () { - osk.press_escape(); - mobileInput.focus(); - }); - $("#button-up").on("click", function () { - osk.press_arrow_up(); - mobileInput.focus(); - }); - $("#button-down").on("click", function () { - osk.press_arrow_down(); - mobileInput.focus(); - }); - $("#button-left").on("click", function () { - osk.press_arrow_left(); - mobileInput.focus(); - }); - $("#button-right").on("click", function () { - osk.press_arrow_right(); - mobileInput.focus(); - }); + $('#button-esc').on('click', function() { + osk.press_escape(); + mobileInput.focus(); + }); + $('#button-up').on('click', function() { + osk.press_arrow_up(); + mobileInput.focus(); + }); + $('#button-down').on('click', function() { + osk.press_arrow_down(); + mobileInput.focus(); + }); + $('#button-left').on('click', function() { + osk.press_arrow_left(); + mobileInput.focus(); + }); + $('#button-right').on('click', function() { + osk.press_arrow_right(); + mobileInput.focus(); + }); - $("#controls").css("visibility", "visible"); + $('#controls').css('visibility', 'visible'); - if (isAndroid) { - // Handling the keyboard on Android is messy. If we have a real keyboard, we get keydown - // events as we expect. But if we have a soft keyboard, the keydown events are always - // "empty". Fortunately, we can use the input event to get data from the soft keyboard. - // Unfortunately, we can have both real and soft keyboards on the same device, which means - // we have to deal with possibly-duplicate events. And to make things worse, even if we - // just have a soft keyboard, some events (like Enter) come in only as keydown while others - // (like letter presses) come in as both keydown and input. - // - // To cope with these cases, we install handlers for both keydown and input, but we only - // recognize one of them within a short time period to avoid duplicate input. This is quite - // a hack that I'm sure has deficiencies, so restrict its use to Android. - // - // https://stackoverflow.com/questions/30743490/capture-keys-typed-on-android-virtual-keyboard-using-javascript + if (isAndroid) { + // Handling the keyboard on Android is messy. If we have a real keyboard, we get keydown + // events as we expect. But if we have a soft keyboard, the keydown events are always + // "empty". Fortunately, we can use the input event to get data from the soft keyboard. + // Unfortunately, we can have both real and soft keyboards on the same device, which means + // we have to deal with possibly-duplicate events. And to make things worse, even if we + // just have a soft keyboard, some events (like Enter) come in only as keydown while others + // (like letter presses) come in as both keydown and input. + // + // To cope with these cases, we install handlers for both keydown and input, but we only + // recognize one of them within a short time period to avoid duplicate input. This is quite + // a hack that I'm sure has deficiencies, so restrict its use to Android. + // + // https://stackoverflow.com/questions/30743490/capture-keys-typed-on-android-virtual-keyboard-using-javascript - var ignoreLastInput = false; - mobileInput.oninput = function (key) { - mobileInput.value = ""; - if (!ignoreLastInput && key.data != "") { - osk.inject_input_event(key); - ignoreLastInput = true; - setTimeout(function () { - ignoreLastInput = false; - }, 5); - } - }; - mobileInput.onkeydown = function (key) { - mobileInput.value = ""; - if (!ignoreLastInput && key.keyCode != 229) { - osk.inject_keyboard_event(key); - ignoreLastInput = true; - setTimeout(function () { - ignoreLastInput = false; - }, 5); - } - }; - } else { - mobileInput.onkeydown = function (key) { - mobileInput.value = ""; - osk.inject_keyboard_event(key); - }; - } - terminal.onclick = function () { + var ignoreLastInput = false; + mobileInput.oninput = function(key) { + mobileInput.value = ""; + if (!ignoreLastInput && key.data != '') { + osk.inject_input_event(key); + ignoreLastInput = true; + setTimeout(function() { ignoreLastInput = false; }, 5); + } + }; + mobileInput.onkeydown = function(key) { + mobileInput.value = ""; + if (!ignoreLastInput && key.keyCode != 229) { + osk.inject_keyboard_event(key); + ignoreLastInput = true; + setTimeout(function() { ignoreLastInput = false; }, 5); + } + }; + } else { + mobileInput.onkeydown = function(key) { + mobileInput.value = ""; + osk.inject_keyboard_event(key); + }; + } + terminal.onclick = function() { + mobileInput.focus(); + } mobileInput.focus(); - }; - mobileInput.focus(); } else { - mobileInput.hidden = true; - - $(document).keydown(function (evt) { - osk.inject_keyboard_event(evt); - if (evt.key === "Tab") return false; - }); - terminal.focus(); + mobileInput.hidden = true; + window.onkeydown = function(key) { + osk.inject_keyboard_event(key); + } + terminal.focus(); } var sizeInChars = wt.size_description(); -$("#terminal-size").text(sizeInChars); +$('#terminal-size').text(sizeInChars); wt.run_repl_loop(); From 04dd5e74ed716b812d714e60a3bf87d1eb8afda0 Mon Sep 17 00:00:00 2001 From: Protowalker Date: Fri, 3 Dec 2021 20:49:39 -0500 Subject: [PATCH 3/5] fix #136 --- web/src/index.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/web/src/index.js b/web/src/index.js index 89f88c42..95e7386a 100644 --- a/web/src/index.js +++ b/web/src/index.js @@ -124,6 +124,12 @@ if (isMobile) { mobileInput.focus(); } else { mobileInput.hidden = true; + $(document).keydown((evt) => { + osk.inject_keyboard_event(evt.originalEvent); + // Returning false here cancels all other keyboard events for the page, preventing tab from losing focus. + // TODO(protowalker): Come up with a solution that captures the input better for embedded scenarios (i.e. iframes) + return false; + }); window.onkeydown = function(key) { osk.inject_keyboard_event(key); } From a428904a3e410026c9dc5be1628c1a3d0bc61ada Mon Sep 17 00:00:00 2001 From: Protowalker Date: Fri, 3 Dec 2021 23:36:21 -0500 Subject: [PATCH 4/5] Add tokenspan --- core/src/lexer.rs | 230 ++++++++++++++++++++++++++++------------------ 1 file changed, 140 insertions(+), 90 deletions(-) diff --git a/core/src/lexer.rs b/core/src/lexer.rs index 46e29ac9..9adc2d0b 100644 --- a/core/src/lexer.rs +++ b/core/src/lexer.rs @@ -20,6 +20,20 @@ use crate::reader::CharReader; use std::io; use std::iter::Peekable; +#[derive(Clone, Debug, PartialEq)] +pub struct TokenSpan { + token: Token, + line: usize, + character: usize, + length: usize, +} + +impl TokenSpan { + pub fn new(token: Token, line: usize, character: usize, length: usize) -> Self { + Self { token, line, character, length } + } +} + /// Collection of valid tokens. /// /// Of special interest are the `Eof` and `Bad` tokens, both of which denote exceptional @@ -124,43 +138,71 @@ impl CharOps for char { pub struct Lexer<'a> { /// Peekable iterator over the characters to scan. input: Peekable>, + current_line: usize, + current_char: usize, } impl<'a> Lexer<'a> { /// Creates a new lexer from the given readable. pub fn from(input: &'a mut dyn io::Read) -> Self { - Self { input: CharReader::from(input).peekable() } + Self { input: CharReader::from(input).peekable(), current_line: 0, current_char: 0 } + } + + /// Moves the iterator forward. Also properly manages the values of current_line and + /// current_char. + fn next(&mut self) -> Option> { + let character = self.input.next(); + if let Some(Ok('\n')) = character { + self.current_line += 1; + self.current_char = 0; + } else if let Some(Ok(_)) = character { + self.current_char += 1; + } + + character + } + + /// Creates a TokenSpan that starts at the current character and goes forward len characters + fn token_span(&self, token: Token, len: usize) -> TokenSpan { + TokenSpan::new(token, self.current_line, self.current_char, len) + } + + /// Creates a TokenSpan that ends at the current character, starting len characters back + fn token_span_backward(&self, token: Token, len: usize) -> TokenSpan { + // TODO(protowalker): implement line memory so that token spans can go over multiple lines + // For now, using a saturating_sub + TokenSpan::new(token, self.current_line, self.current_char.saturating_sub(len - 1), len) } /// Handles a `input.read()` call that returned an unexpected character. /// /// This returns a `Token::Bad` with the provided `msg` and skips characters in the input /// stream until a field separator is found. - fn handle_bad_read>(&mut self, msg: S) -> io::Result { + fn handle_bad_read>(&mut self, msg: S) -> io::Result { loop { match self.input.peek() { Some(Ok(ch)) if ch.is_separator() => break, Some(Ok(_)) => { - self.input.next().unwrap()?; + self.next().unwrap()?; } - Some(Err(_)) => return Err(self.input.next().unwrap().unwrap_err()), + Some(Err(_)) => return Err(self.next().unwrap().unwrap_err()), None => break, } } - Ok(Token::Bad(msg.into())) + Ok(self.token_span(Token::Bad(msg.into()), 1)) } /// Handles a `input.peek()` call that returned an unexpected character. /// /// This returns a `Token::Bad` with the provided `msg`, consumes the peeked character, and /// then skips characters in the input stream until a field separator is found. - fn handle_bad_peek>(&mut self, msg: S) -> io::Result { - self.input.next(); + fn handle_bad_peek>(&mut self, msg: S) -> io::Result { + self.next(); self.handle_bad_read(msg) } /// Consumes the number at the current position, whose first digit is `first`. - fn consume_number(&mut self, first: char) -> io::Result { + fn consume_number(&mut self, first: char) -> io::Result { let mut s = String::new(); let mut found_dot = false; s.push(first); @@ -170,16 +212,16 @@ impl<'a> Lexer<'a> { if found_dot { return self.handle_bad_peek("Too many dots in numeric literal"); } - s.push(self.input.next().unwrap()?); + s.push(self.next().unwrap()?); found_dot = true; } - Some(Ok(ch)) if ch.is_digit(10) => s.push(self.input.next().unwrap()?), + Some(Ok(ch)) if ch.is_digit(10) => s.push(self.next().unwrap()?), Some(Ok(ch)) if ch.is_separator() => break, Some(Ok(ch)) => { let msg = format!("Unexpected character in numeric literal: {}", ch); return self.handle_bad_peek(msg); } - Some(Err(_)) => return Err(self.input.next().unwrap().unwrap_err()), + Some(Err(_)) => return Err(self.next().unwrap().unwrap_err()), None => break, } } @@ -191,38 +233,38 @@ impl<'a> Lexer<'a> { return self.handle_bad_read("Unknown character: ."); } match s.parse::() { - Ok(d) => Ok(Token::Double(d)), + Ok(d) => Ok(self.token_span(Token::Double(d), s.len())), Err(e) => self.handle_bad_read(format!("Bad double {}: {}", s, e)), } } else { match s.parse::() { - Ok(i) => Ok(Token::Integer(i)), + Ok(i) => Ok(self.token_span(Token::Integer(i), s.len())), Err(e) => self.handle_bad_read(format!("Bad integer {}: {}", s, e)), } } } /// Consumes the operator at the current position, whose first character is `first`. - fn consume_operator(&mut self, first: char) -> io::Result { + fn consume_operator(&mut self, first: char) -> io::Result { match (first, self.input.peek()) { - (_, Some(Err(_))) => Err(self.input.next().unwrap().unwrap_err()), + (_, Some(Err(_))) => Err(self.next().unwrap().unwrap_err()), ('<', Some(Ok('>'))) => { - self.input.next().unwrap()?; - Ok(Token::NotEqual) + self.next().unwrap()?; + Ok(self.token_span_backward(Token::NotEqual, 2)) } ('<', Some(Ok('='))) => { - self.input.next().unwrap()?; - Ok(Token::LessEqual) + self.next().unwrap()?; + Ok(self.token_span_backward(Token::LessEqual, 2)) } - ('<', _) => Ok(Token::Less), + ('<', _) => Ok(self.token_span(Token::Less, 1)), ('>', Some(Ok('='))) => { - self.input.next().unwrap()?; - Ok(Token::GreaterEqual) + self.next().unwrap()?; + Ok(self.token_span_backward(Token::GreaterEqual, 2)) } - ('>', _) => Ok(Token::Greater), + ('>', _) => Ok(self.token_span(Token::Greater, 1)), (_, _) => panic!("Should not have been called"), } @@ -231,110 +273,114 @@ impl<'a> Lexer<'a> { /// Consumes the symbol or keyword at the current position, whose first letter is `first`. /// /// The symbol may be a bare name, but it may also contain an optional type annotation. - fn consume_symbol(&mut self, first: char) -> io::Result { + fn consume_symbol(&mut self, first: char) -> io::Result { let mut s = String::new(); s.push(first); let mut vtype = VarType::Auto; loop { match self.input.peek() { - Some(Ok(ch)) if ch.is_word() => s.push(self.input.next().unwrap()?), + Some(Ok(ch)) if ch.is_word() => s.push(self.next().unwrap()?), Some(Ok(ch)) if ch.is_separator() => break, Some(Ok('?')) => { vtype = VarType::Boolean; - self.input.next().unwrap()?; + self.next().unwrap()?; break; } Some(Ok('#')) => { vtype = VarType::Double; - self.input.next().unwrap()?; + self.next().unwrap()?; break; } Some(Ok('%')) => { vtype = VarType::Integer; - self.input.next().unwrap()?; + self.next().unwrap()?; break; } Some(Ok('$')) => { vtype = VarType::Text; - self.input.next().unwrap()?; + self.next().unwrap()?; break; } Some(Ok(ch)) => { let msg = format!("Unexpected character in symbol: {}", ch); return self.handle_bad_peek(msg); } - Some(Err(_)) => return Err(self.input.next().unwrap().unwrap_err()), + Some(Err(_)) => return Err(self.next().unwrap().unwrap_err()), None => break, } } - match s.to_uppercase().as_str() { - "AND" => Ok(Token::And), - "AS" => Ok(Token::As), - "BOOLEAN" => Ok(Token::BooleanName), - "DIM" => Ok(Token::Dim), - "DOUBLE" => Ok(Token::DoubleName), - "ELSE" => Ok(Token::Else), - "ELSEIF" => Ok(Token::Elseif), - "END" => Ok(Token::End), - "FALSE" => Ok(Token::Boolean(false)), - "FOR" => Ok(Token::For), - "IF" => Ok(Token::If), - "INTEGER" => Ok(Token::IntegerName), - "MOD" => Ok(Token::Modulo), - "NEXT" => Ok(Token::Next), - "NOT" => Ok(Token::Not), - "OR" => Ok(Token::Or), - "REM" => self.consume_rest_of_line(), - "STEP" => Ok(Token::Step), - "STRING" => Ok(Token::TextName), - "THEN" => Ok(Token::Then), - "TO" => Ok(Token::To), - "TRUE" => Ok(Token::Boolean(true)), - "WEND" => Ok(Token::Wend), - "WHILE" => Ok(Token::While), - "XOR" => Ok(Token::Xor), - _ => Ok(Token::Symbol(VarRef::new(s, vtype))), - } + + let token_len = s.len(); + let tok = match s.to_uppercase().as_str() { + "AND" => Token::And, + "AS" => Token::As, + "BOOLEAN" => Token::BooleanName, + "DIM" => Token::Dim, + "DOUBLE" => Token::DoubleName, + "ELSE" => Token::Else, + "ELSEIF" => Token::Elseif, + "END" => Token::End, + "FALSE" => Token::Boolean(false), + "FOR" => Token::For, + "IF" => Token::If, + "INTEGER" => Token::IntegerName, + "MOD" => Token::Modulo, + "NEXT" => Token::Next, + "NOT" => Token::Not, + "OR" => Token::Or, + "REM" => return self.consume_rest_of_line(), + "STEP" => Token::Step, + "STRING" => Token::TextName, + "THEN" => Token::Then, + "TO" => Token::To, + "TRUE" => Token::Boolean(true), + "WEND" => Token::Wend, + "WHILE" => Token::While, + "XOR" => Token::Xor, + _ => Token::Symbol(VarRef::new(s, vtype)), + }; + Ok(self.token_span_backward(tok, token_len)) } /// Consumes the string at the current position, which was has to end with `delim`. /// /// This handles quoted characters within the string. - fn consume_text(&mut self, delim: char) -> io::Result { + fn consume_text(&mut self, delim: char) -> io::Result { let mut s = String::new(); let mut escaping = false; loop { match self.input.peek() { Some(Ok(ch)) => { if escaping { - s.push(self.input.next().unwrap()?); + s.push(self.next().unwrap()?); escaping = false; } else if *ch == '\\' { - self.input.next().unwrap()?; + self.next().unwrap()?; escaping = true; } else if *ch == delim { - self.input.next().unwrap()?; + self.next().unwrap()?; break; } else { - s.push(self.input.next().unwrap()?); + s.push(self.next().unwrap()?); } } - Some(Err(_)) => return Err(self.input.next().unwrap().unwrap_err()), + Some(Err(_)) => return Err(self.next().unwrap().unwrap_err()), None => { return self.handle_bad_peek(format!("Incomplete string due to EOF: {}", s)) } } } - Ok(Token::Text(s)) + let token_len = s.len(); + Ok(self.token_span_backward(Token::Text(s), token_len)) } /// Consumes the remainder of the line and returns the token that was encountered at the end /// (which may be EOF or end of line). - fn consume_rest_of_line(&mut self) -> io::Result { + fn consume_rest_of_line(&mut self) -> io::Result { loop { - match self.input.next() { - None => return Ok(Token::Eof), - Some(Ok('\n')) => return Ok(Token::Eol), + match self.next() { + None => return Ok(self.token_span(Token::Eof, 1)), + Some(Ok('\n')) => return Ok(self.token_span(Token::Eol, 1)), Some(Err(e)) => return Err(e), Some(Ok(_)) => (), } @@ -345,7 +391,7 @@ impl<'a> Lexer<'a> { /// character. fn advance_and_read_next(&mut self) -> io::Result> { loop { - match self.input.next() { + match self.next() { Some(Ok(ch)) if ch.is_space() => (), Some(Ok(ch)) => return Ok(Some(ch)), Some(Err(e)) => return Err(e), @@ -358,30 +404,30 @@ impl<'a> Lexer<'a> { /// /// Note that this returns errors only on fatal I/O conditions. EOF and malformed tokens are /// both returned as the special token types `Token::Eof` and `Token::Bad` respectively. - pub fn read(&mut self) -> io::Result { + pub fn read(&mut self) -> io::Result { let ch = self.advance_and_read_next()?; if ch.is_none() { - return Ok(Token::Eof); + return Ok(self.token_span(Token::Eof, 1)); } let ch = ch.unwrap(); match ch { - '\n' | ':' => Ok(Token::Eol), + '\n' | ':' => Ok(self.token_span(Token::Eol, 1)), '\'' => self.consume_rest_of_line(), '"' => self.consume_text('"'), - ';' => Ok(Token::Semicolon), - ',' => Ok(Token::Comma), + ';' => Ok(self.token_span(Token::Semicolon, 1)), + ',' => Ok(self.token_span(Token::Comma, 1)), - '(' => Ok(Token::LeftParen), - ')' => Ok(Token::RightParen), + '(' => Ok(self.token_span(Token::LeftParen, 1)), + ')' => Ok(self.token_span(Token::RightParen, 1)), - '+' => Ok(Token::Plus), - '-' => Ok(Token::Minus), - '*' => Ok(Token::Multiply), - '/' => Ok(Token::Divide), + '+' => Ok(self.token_span(Token::Plus, 1)), + '-' => Ok(self.token_span(Token::Minus, 1)), + '*' => Ok(self.token_span(Token::Multiply, 1)), + '/' => Ok(self.token_span(Token::Divide, 1)), - '=' => Ok(Token::Equal), + '=' => Ok(self.token_span(Token::Equal, 1)), '<' | '>' => self.consume_operator(ch), ch if ch.is_digit(10) => self.consume_number(ch), @@ -439,7 +485,7 @@ impl<'a> PeekableLexer<'a> { pub fn read(&mut self) -> io::Result { match self.peeked.take() { Some(t) => Ok(t), - None => self.lexer.read(), + None => self.lexer.read().map(|t| t.token), } } } @@ -455,9 +501,10 @@ mod tests { let mut input = input.as_bytes(); let mut lexer = Lexer::from(&mut input); + //TODO(protowalker): Add spans to tests let mut tokens = vec![]; loop { - let token = lexer.read().expect("Lexing failed"); + let token = lexer.read().expect("Lexing failed").token; if token == Token::Eof { break; } @@ -471,8 +518,9 @@ mod tests { fn test_empty() { let mut input = b"".as_ref(); let mut lexer = Lexer::from(&mut input); - assert_eq!(Token::Eof, lexer.read().unwrap()); - assert_eq!(Token::Eof, lexer.read().unwrap()); + //TODO(protowalker): Add spans to tests + assert_eq!(Token::Eof, lexer.read().unwrap().token); + assert_eq!(Token::Eof, lexer.read().unwrap().token); } #[test] @@ -816,10 +864,12 @@ mod tests { fn test_unrecoverable_io_error() { let mut reader = FaultyReader::new("3 + 5\n"); let mut lexer = Lexer::from(&mut reader); - assert_eq!(Token::Integer(3), lexer.read().unwrap()); - assert_eq!(Token::Plus, lexer.read().unwrap()); - assert_eq!(Token::Integer(5), lexer.read().unwrap()); - assert_eq!(Token::Eol, lexer.read().unwrap()); + + //TODO(protowalker): Add spans to tests + assert_eq!(Token::Integer(3), lexer.read().unwrap().token); + assert_eq!(Token::Plus, lexer.read().unwrap().token); + assert_eq!(Token::Integer(5), lexer.read().unwrap().token); + assert_eq!(Token::Eol, lexer.read().unwrap().token); let e = lexer.read().unwrap_err(); assert_eq!(io::ErrorKind::InvalidData, e.kind()); let e = lexer.read().unwrap_err(); From cffa3fe54e0de717c95113c6f8e93c5b887d64da Mon Sep 17 00:00:00 2001 From: Protowalker Date: Fri, 3 Dec 2021 23:52:56 -0500 Subject: [PATCH 5/5] change parser to use tokenspan --- core/src/lexer.rs | 35 ++++++++++++------------ core/src/parser.rs | 66 ++++++++++++++++++++++------------------------ 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/core/src/lexer.rs b/core/src/lexer.rs index 9adc2d0b..c37e8d84 100644 --- a/core/src/lexer.rs +++ b/core/src/lexer.rs @@ -22,10 +22,10 @@ use std::iter::Peekable; #[derive(Clone, Debug, PartialEq)] pub struct TokenSpan { - token: Token, - line: usize, - character: usize, - length: usize, + pub token: Token, + pub line: usize, + pub character: usize, + pub length: usize, } impl TokenSpan { @@ -453,7 +453,7 @@ pub struct PeekableLexer<'a> { /// If not none, contains the character read by `peek`, which will be consumed by the next call /// to `read` or `consume_peeked`. - peeked: Option, + peeked: Option, } impl<'a> PeekableLexer<'a> { @@ -461,7 +461,7 @@ impl<'a> PeekableLexer<'a> { /// /// Because `peek` reports read errors, this assumes that the caller already handled those /// errors and is thus not going to call this when an error is present. - pub fn consume_peeked(&mut self) -> Token { + pub fn consume_peeked(&mut self) -> TokenSpan { assert!(self.peeked.is_some()); self.peeked.take().unwrap() } @@ -470,7 +470,7 @@ impl<'a> PeekableLexer<'a> { /// /// It is OK to call this function several times on the same token before extracting it from /// the lexer. - pub fn peek(&mut self) -> io::Result<&Token> { + pub fn peek(&mut self) -> io::Result<&TokenSpan> { if self.peeked.is_none() { let n = self.read()?; self.peeked.replace(n); @@ -482,10 +482,10 @@ impl<'a> PeekableLexer<'a> { /// /// If the next token is invalid and results in a read error, the stream will remain valid and /// further tokens can be obtained with subsequent calls. - pub fn read(&mut self) -> io::Result { + pub fn read(&mut self) -> io::Result { match self.peeked.take() { Some(t) => Ok(t), - None => self.lexer.read().map(|t| t.token), + None => self.lexer.read(), } } } @@ -753,14 +753,15 @@ mod tests { fn test_peekable_lexer() { let mut input = b"a b 123".as_ref(); let mut lexer = Lexer::from(&mut input).peekable(); - assert_eq!(&new_auto_symbol("a"), lexer.peek().unwrap()); - assert_eq!(&new_auto_symbol("a"), lexer.peek().unwrap()); - assert_eq!(new_auto_symbol("a"), lexer.read().unwrap()); - assert_eq!(new_auto_symbol("b"), lexer.read().unwrap()); - assert_eq!(&Token::Integer(123), lexer.peek().unwrap()); - assert_eq!(Token::Integer(123), lexer.read().unwrap()); - assert_eq!(&Token::Eof, lexer.peek().unwrap()); - assert_eq!(Token::Eof, lexer.read().unwrap()); + //TODO(protowalker): Add spans to tests + assert_eq!(new_auto_symbol("a"), lexer.peek().unwrap().token); + assert_eq!(new_auto_symbol("a"), lexer.peek().unwrap().token); + assert_eq!(new_auto_symbol("a"), lexer.read().unwrap().token); + assert_eq!(new_auto_symbol("b"), lexer.read().unwrap().token); + assert_eq!(Token::Integer(123), lexer.peek().unwrap().token); + assert_eq!(Token::Integer(123), lexer.read().unwrap().token); + assert_eq!(Token::Eof, lexer.peek().unwrap().token); + assert_eq!(Token::Eof, lexer.read().unwrap().token); } #[test] diff --git a/core/src/parser.rs b/core/src/parser.rs index 523f5d92..23dd8b93 100644 --- a/core/src/parser.rs +++ b/core/src/parser.rs @@ -178,7 +178,7 @@ impl<'a> Parser<'a> { /// stream and fails with error `err`. fn expect_and_consume(&mut self, t: Token, err: &'static str) -> Result<()> { let peeked = self.lexer.peek()?; - if *peeked != t { + if peeked.token != t { return Err(Error::Bad(err.to_owned())); } self.lexer.consume_peeked(); @@ -191,9 +191,9 @@ impl<'a> Parser<'a> { let mut stmts = vec![]; loop { let peeked = self.lexer.peek()?; - if delims.contains(peeked) { + if delims.contains(&peeked.token) { break; - } else if *peeked == Token::Eol { + } else if peeked.token == Token::Eol { self.lexer.consume_peeked(); continue; } @@ -213,7 +213,7 @@ impl<'a> Parser<'a> { }; let next = self.lexer.peek()?; - match next { + match next.token { Token::Eof | Token::Eol => (), _ => return Err(Error::Bad("Unexpected token in assignment".to_owned())), } @@ -229,7 +229,7 @@ impl<'a> Parser<'a> { }; let next = self.lexer.peek()?; - match next { + match next.token { Token::Eof | Token::Eol => (), _ => return Err(Error::Bad("Unexpected token in array assignment".to_owned())), } @@ -249,7 +249,7 @@ impl<'a> Parser<'a> { let expr = self.parse_expr(first.take())?; let peeked = self.lexer.peek()?; - match peeked { + match peeked.token { Token::Eof | Token::Eol => { if expr.is_some() || !args.is_empty() { args.push((expr, ArgSep::End)); @@ -277,11 +277,11 @@ impl<'a> Parser<'a> { /// Starts processing either an array reference or a builtin call and disambiguates between the /// two. fn parse_array_or_builtin_call(&mut self, vref: VarRef) -> Result { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::LeftParen => { self.lexer.consume_peeked(); let mut exprs = self.parse_comma_separated_exprs()?; - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Equal => { self.lexer.consume_peeked(); self.parse_array_assignment(vref, exprs) @@ -302,11 +302,11 @@ impl<'a> Parser<'a> { /// `AS` token. fn parse_dim_as(&mut self) -> Result { let peeked = self.lexer.peek()?; - let vartype = match peeked { + let vartype = match peeked.token { Token::Eof | Token::Eol => VarType::Integer, Token::As => { self.lexer.consume_peeked(); - match self.lexer.read()? { + match self.lexer.read()?.token { Token::BooleanName => VarType::Boolean, Token::DoubleName => VarType::Double, Token::IntegerName => VarType::Integer, @@ -323,7 +323,7 @@ impl<'a> Parser<'a> { }; let next = self.lexer.peek()?; - match next { + match next.token { Token::Eof | Token::Eol => (), _ => return Err(Error::Bad("Unexpected token in DIM statement".to_owned())), } @@ -333,14 +333,14 @@ impl<'a> Parser<'a> { /// Parses a `DIM` statement. fn parse_dim(&mut self) -> Result { - let vref = match self.lexer.read()? { + let vref = match self.lexer.read()?.token { Token::Symbol(vref) => vref, _ => return Err(Error::Bad("Expected variable name after DIM".to_owned())), }; let name = vref.into_unannotated_string()?; let peeked = self.lexer.peek()?; - match peeked { + match peeked.token { Token::LeftParen => { self.lexer.consume_peeked(); let subscripts = self.parse_comma_separated_exprs()?; @@ -369,7 +369,7 @@ impl<'a> Parser<'a> { loop { let peeked = self.lexer.peek()?; - match peeked { + match peeked.token { Token::RightParen => { self.lexer.consume_peeked(); break; @@ -424,7 +424,7 @@ impl<'a> Parser<'a> { // Stop processing if we encounter an expression separator, but don't consume it because // the caller needs to have access to it. - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eof | Token::Eol | Token::Comma @@ -446,7 +446,7 @@ impl<'a> Parser<'a> { }; let token = self.lexer.consume_peeked(); - match token { + match token.token { Token::Boolean(b) => handle_operand(Expr::Boolean(b))?, Token::Double(d) => handle_operand(Expr::Double(d))?, Token::Integer(i) => handle_operand(Expr::Integer(i))?, @@ -536,7 +536,7 @@ impl<'a> Parser<'a> { | Token::And | Token::Or | Token::Xor => { - let op = ExprOp::from(token); + let op = ExprOp::from(token.token); while let Some(op2) = ops.last() { if *op2 == ExprOp::LeftParen || op2.priority() < op.priority() { break; @@ -606,7 +606,7 @@ impl<'a> Parser<'a> { vec![(expr, self.parse_until(&[Token::Elseif, Token::Else, Token::End])?)]; loop { let peeked = self.lexer.peek()?; - match peeked { + match peeked.token { Token::Elseif => { self.lexer.consume_peeked(); let expr = match self.parse_expr(None)? { @@ -625,12 +625,12 @@ impl<'a> Parser<'a> { } let peeked = self.lexer.peek()?; - if *peeked == Token::Else { + if peeked.token == Token::Else { self.lexer.consume_peeked(); self.expect_and_consume(Token::Eol, "Expecting newline after ELSE")?; let stmts2 = self.parse_until(&[Token::Elseif, Token::Else, Token::End])?; let peeked = self.lexer.peek()?; - match *peeked { + match peeked.token { Token::Elseif => return Err(Error::Bad("Unexpected ELSEIF after ELSE".to_owned())), Token::Else => return Err(Error::Bad("Duplicate ELSE after ELSE".to_owned())), _ => (), @@ -647,7 +647,7 @@ impl<'a> Parser<'a> { /// Advances until the next statement after failing to parse an `IF` statement. fn reset_if(&mut self) -> Result<()> { loop { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eof => break, Token::End => { self.lexer.consume_peeked(); @@ -664,22 +664,20 @@ impl<'a> Parser<'a> { /// Extracts the optional `STEP` part of a `FOR` statement, with a default of 1. fn parse_step(&mut self) -> Result { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Step => self.lexer.consume_peeked(), _ => return Ok(1), }; - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Integer(i) => { - let i = *i; self.lexer.consume_peeked(); Ok(i) } Token::Minus => { self.lexer.consume_peeked(); - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Integer(i) => { - let i = *i; self.lexer.consume_peeked(); Ok(-i) } @@ -692,7 +690,7 @@ impl<'a> Parser<'a> { /// Parses a `FOR` statement. fn parse_for(&mut self) -> Result { - let iterator = match self.lexer.read()? { + let iterator = match self.lexer.read()?.token { Token::Symbol(iterator) => match iterator.ref_type() { // TODO(jmmv): Should we support doubles in for loops? VarType::Auto | VarType::Integer => iterator, @@ -744,7 +742,7 @@ impl<'a> Parser<'a> { /// Advances until the next statement after failing to parse a `FOR` statement. fn reset_for(&mut self) -> Result<()> { loop { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eof => break, Token::Next => { self.lexer.consume_peeked(); @@ -775,7 +773,7 @@ impl<'a> Parser<'a> { /// Advances until the next statement after failing to parse a `WHILE` statement. fn reset_while(&mut self) -> Result<()> { loop { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eof => break, Token::End => { self.lexer.consume_peeked(); @@ -796,7 +794,7 @@ impl<'a> Parser<'a> { /// On failure, the caller must advance the stream to the next statement by calling `reset`. fn parse_one(&mut self) -> Result> { loop { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eol => { self.lexer.consume_peeked(); } @@ -804,7 +802,7 @@ impl<'a> Parser<'a> { _ => break, } } - let res = match self.lexer.read()? { + let res = match self.lexer.read()?.token { Token::Eof => return Ok(None), Token::Eol => Ok(None), Token::Dim => Ok(Some(self.parse_dim()?)), @@ -824,7 +822,7 @@ impl<'a> Parser<'a> { } Token::Symbol(vref) => { let peeked = self.lexer.peek()?; - if *peeked == Token::Equal { + if peeked.token == Token::Equal { self.lexer.consume_peeked(); Ok(Some(self.parse_assignment(vref)?)) } else { @@ -841,7 +839,7 @@ impl<'a> Parser<'a> { t => return Err(Error::Bad(format!("Unexpected token {:?} in statement", t))), }; - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eof => (), Token::Eol => { self.lexer.consume_peeked(); @@ -855,7 +853,7 @@ impl<'a> Parser<'a> { /// Advances until the next statement after failing to parse a single statement. fn reset(&mut self) -> Result<()> { loop { - match self.lexer.peek()? { + match self.lexer.peek()?.token { Token::Eof => break, Token::Eol => { self.lexer.consume_peeked();