Skip to content

Commit 23fc730

Browse files
committed
Refactor lexer to use regular expressions
1 parent e5e69d9 commit 23fc730

2 files changed

Lines changed: 56 additions & 103 deletions

File tree

src/parser.js

Lines changed: 47 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ function lex(text, parseStringsForObjects){
3232
index = 0,
3333
json = [],
3434
ch,
35-
lastCh = ':'; // can start regexp
35+
lastCh = ':';
3636

3737
while (index < text.length) {
3838
ch = text.charAt(index);
@@ -71,6 +71,9 @@ function lex(text, parseStringsForObjects){
7171
lastCh = ch;
7272
}
7373
return tokens;
74+
75+
76+
//////////////////////////////////////////////
7477

7578
function is(chars) {
7679
return chars.indexOf(ch) != -1;
@@ -95,10 +98,6 @@ function lex(text, parseStringsForObjects){
9598
'A' <= ch && ch <= 'Z' ||
9699
'_' == ch || ch == '$';
97100
}
98-
function isExpOperator(ch) {
99-
return ch == '-' || ch == '+' || isNumber(ch);
100-
}
101-
102101
function throwError(error, start, end) {
103102
end = end || index;
104103
throw Error("Lexer Error: " + error + " at column" +
@@ -107,103 +106,61 @@ function lex(text, parseStringsForObjects){
107106
" " + end) +
108107
" in expression [" + text + "].");
109108
}
109+
110+
function consume(regexp, processToken, errorMsg) {
111+
var match = text.substr(index).match(regexp);
112+
var token = {index: index};
113+
var start = index;
114+
if (!match) throwError(errorMsg);
115+
index += match[0].length;
116+
processToken(token, token.text = match[0], start);
117+
tokens.push(token);
118+
}
110119

111120
function readNumber() {
112-
var number = "";
113-
var start = index;
114-
while (index < text.length) {
115-
var ch = lowercase(text.charAt(index));
116-
if (ch == '.' || isNumber(ch)) {
117-
number += ch;
118-
} else {
119-
var peekCh = peek();
120-
if (ch == 'e' && isExpOperator(peekCh)) {
121-
number += ch;
122-
} else if (isExpOperator(ch) &&
123-
peekCh && isNumber(peekCh) &&
124-
number.charAt(number.length - 1) == 'e') {
125-
number += ch;
126-
} else if (isExpOperator(ch) &&
127-
(!peekCh || !isNumber(peekCh)) &&
128-
number.charAt(number.length - 1) == 'e') {
129-
throwError('Invalid exponent');
130-
} else {
131-
break;
132-
}
133-
}
134-
index++;
135-
}
136-
number = 1 * number;
137-
tokens.push({index:start, text:number, json:true,
138-
fn:function(){return number;}});
121+
consume(/^(\d+)?(\.\d+)?([eE][+-]?\d+)?/, function(token, number){
122+
token.text = number = 1 * number;
123+
token.json = true;
124+
token.fn = valueFn(number);
125+
}, "Not a valid number");
139126
}
127+
140128
function readIdent() {
141-
var ident = "";
142-
var start = index;
143-
var fn;
144-
while (index < text.length) {
145-
var ch = text.charAt(index);
146-
if (ch == '.' || isIdent(ch) || isNumber(ch)) {
147-
ident += ch;
148-
} else {
149-
break;
129+
consume(/^[\w_\$][\w_\$\d]*(\.[\w_\$][\w_\$\d]*)*/, function(token, ident){
130+
fn = OPERATORS[ident];
131+
if (!fn) {
132+
fn = getterFn(ident);
133+
fn.isAssignable = ident;
150134
}
151-
index++;
152-
}
153-
fn = OPERATORS[ident];
154-
tokens.push({
155-
index:start,
156-
text:ident,
157-
json: fn,
158-
fn:fn||extend(getterFn(ident), {
135+
token.fn = OPERATORS[ident]||extend(getterFn(ident), {
159136
assign:function(self, value){
160137
return setter(self, ident, value);
161138
}
162-
})
139+
});
140+
token.json = OPERATORS[ident];
163141
});
164142
}
165143

166144
function readString(quote) {
167-
var start = index;
168-
index++;
169-
var string = "";
170-
var rawString = quote;
171-
var escape = false;
172-
while (index < text.length) {
173-
var ch = text.charAt(index);
174-
rawString += ch;
175-
if (escape) {
176-
if (ch == 'u') {
177-
var hex = text.substring(index + 1, index + 5);
178-
if (!hex.match(/[\da-f]{4}/i))
179-
throwError( "Invalid unicode escape [\\u" + hex + "]");
180-
index += 4;
181-
string += String.fromCharCode(parseInt(hex, 16));
182-
} else {
183-
var rep = ESCAPE[ch];
184-
if (rep) {
185-
string += rep;
186-
} else {
187-
string += ch;
188-
}
189-
}
190-
escape = false;
191-
} else if (ch == '\\') {
192-
escape = true;
193-
} else if (ch == quote) {
194-
index++;
195-
tokens.push({index:start, text:rawString, string:string, json:true,
196-
fn:function(){
197-
return (string.length == dateParseLength) ?
198-
angular['String']['toDate'](string) : string;
199-
}});
200-
return;
201-
} else {
202-
string += ch;
203-
}
204-
index++;
205-
}
206-
throwError("Unterminated quote", start);
145+
consume(/^(('(\\'|[^'])*')|("(\\"|[^"])*"))/, function(token, rawString, start){
146+
var hasError;
147+
var string = token.string = rawString.substr(1, rawString.length - 2).
148+
replace(/(\\u(.?.?.?.?))|(\\(.))/g,
149+
function(match, wholeUnicode, unicode, wholeEscape, escape){
150+
if (unicode && !unicode.match(/[\da-fA-F]{4}/))
151+
hasError = hasError || bind(null, throwError, "Invalid unicode escape [\\u" + unicode + "]", start);
152+
return unicode ?
153+
String.fromCharCode(parseInt(unicode, 16)) :
154+
ESCAPE[escape] || escape;
155+
});
156+
(hasError||noop)();
157+
token.json = true;
158+
token.fn = function(){
159+
return (string.length == dateParseLength) ?
160+
angular['String']['toDate'](string) :
161+
string;
162+
};
163+
}, "Unterminated string");
207164
}
208165
}
209166

test/ParserSpec.js

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,15 @@ describe('parser', function() {
8282
expect(tokens.length).toEqual(1);
8383
expect(tokens[0].string).toEqual('\u00a0');
8484
});
85+
86+
it('should error when non terminated string', function(){
87+
expect(function(){
88+
lex('ignore "text');
89+
}).toThrow(new Error('Lexer Error: Unterminated string at column 7 in expression [ignore "text].'));
90+
});
8591

8692
it('should ignore whitespace', function() {
87-
var tokens = lex("a \t \n \r b");
93+
var tokens = lex("a \t \n \r \u00A0 b");
8894
expect(tokens[0].text).toEqual('a');
8995
expect(tokens[1].text).toEqual('b');
9096
});
@@ -130,25 +136,15 @@ describe('parser', function() {
130136
expect(tokens[0].text).toEqual(0.5E+10);
131137
});
132138

133-
it('should throws exception for invalid exponent', function() {
134-
expect(function() {
135-
lex("0.5E-");
136-
}).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-].'));
137-
138-
expect(function() {
139-
lex("0.5E-A");
140-
}).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-A].'));
141-
});
142-
143139
it('should tokenize number starting with a dot', function() {
144140
var tokens = lex(".5");
145141
expect(tokens[0].text).toEqual(0.5);
146142
});
147143

148144
it('should throw error on invalid unicode', function() {
149145
expect(function() {
150-
lex("'\\u1''bla'");
151-
}).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1''b] at column 2 in expression ['\\u1''bla']."));
146+
lex("'\\u1xbla'");
147+
}).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1xbl] at columns 0-9 ['\\u1xbla'] in expression ['\\u1xbla']."));
152148
});
153149
});
154150

0 commit comments

Comments
 (0)