Skip to content

Commit d11088e

Browse files
committed
Revert "Refactor lexer to use regular expressions"
We came across a major performance regression due to this change. I'm reverting it until we find a better solution. This reverts commit 23fc730.
1 parent a5df1fc commit d11088e

2 files changed

Lines changed: 103 additions & 56 deletions

File tree

src/parser.js

Lines changed: 90 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ function lex(text, parseStringsForObjects){
3232
index = 0,
3333
json = [],
3434
ch,
35-
lastCh = ':';
35+
lastCh = ':'; // can start regexp
3636

3737
while (index < text.length) {
3838
ch = text.charAt(index);
@@ -76,9 +76,6 @@ function lex(text, parseStringsForObjects){
7676
lastCh = ch;
7777
}
7878
return tokens;
79-
80-
81-
//////////////////////////////////////////////
8279

8380
function is(chars) {
8481
return chars.indexOf(ch) != -1;
@@ -103,6 +100,10 @@ function lex(text, parseStringsForObjects){
103100
'A' <= ch && ch <= 'Z' ||
104101
'_' == ch || ch == '$';
105102
}
103+
function isExpOperator(ch) {
104+
return ch == '-' || ch == '+' || isNumber(ch);
105+
}
106+
106107
function throwError(error, start, end) {
107108
end = end || index;
108109
throw Error("Lexer Error: " + error + " at column" +
@@ -111,61 +112,103 @@ function lex(text, parseStringsForObjects){
111112
" " + end) +
112113
" in expression [" + text + "].");
113114
}
114-
115-
function consume(regexp, processToken, errorMsg) {
116-
var match = text.substr(index).match(regexp);
117-
var token = {index: index};
118-
var start = index;
119-
if (!match) throwError(errorMsg);
120-
index += match[0].length;
121-
processToken(token, token.text = match[0], start);
122-
tokens.push(token);
123-
}
124115

125116
function readNumber() {
126-
consume(/^(\d+)?(\.\d+)?([eE][+-]?\d+)?/, function(token, number){
127-
token.text = number = 1 * number;
128-
token.json = true;
129-
token.fn = valueFn(number);
130-
}, "Not a valid number");
117+
var number = "";
118+
var start = index;
119+
while (index < text.length) {
120+
var ch = lowercase(text.charAt(index));
121+
if (ch == '.' || isNumber(ch)) {
122+
number += ch;
123+
} else {
124+
var peekCh = peek();
125+
if (ch == 'e' && isExpOperator(peekCh)) {
126+
number += ch;
127+
} else if (isExpOperator(ch) &&
128+
peekCh && isNumber(peekCh) &&
129+
number.charAt(number.length - 1) == 'e') {
130+
number += ch;
131+
} else if (isExpOperator(ch) &&
132+
(!peekCh || !isNumber(peekCh)) &&
133+
number.charAt(number.length - 1) == 'e') {
134+
throwError('Invalid exponent');
135+
} else {
136+
break;
137+
}
138+
}
139+
index++;
140+
}
141+
number = 1 * number;
142+
tokens.push({index:start, text:number, json:true,
143+
fn:function(){return number;}});
131144
}
132-
133145
function readIdent() {
134-
consume(/^[\w_\$][\w_\$\d]*(\.[\w_\$][\w_\$\d]*)*/, function(token, ident){
135-
fn = OPERATORS[ident];
136-
if (!fn) {
137-
fn = getterFn(ident);
138-
fn.isAssignable = ident;
146+
var ident = "";
147+
var start = index;
148+
var fn;
149+
while (index < text.length) {
150+
var ch = text.charAt(index);
151+
if (ch == '.' || isIdent(ch) || isNumber(ch)) {
152+
ident += ch;
153+
} else {
154+
break;
139155
}
140-
token.fn = OPERATORS[ident]||extend(getterFn(ident), {
156+
index++;
157+
}
158+
fn = OPERATORS[ident];
159+
tokens.push({
160+
index:start,
161+
text:ident,
162+
json: fn,
163+
fn:fn||extend(getterFn(ident), {
141164
assign:function(self, value){
142165
return setter(self, ident, value);
143166
}
144-
});
145-
token.json = OPERATORS[ident];
167+
})
146168
});
147169
}
148170

149171
function readString(quote) {
150-
consume(/^(('(\\'|[^'])*')|("(\\"|[^"])*"))/, function(token, rawString, start){
151-
var hasError;
152-
var string = token.string = rawString.substr(1, rawString.length - 2).
153-
replace(/(\\u(.?.?.?.?))|(\\(.))/g,
154-
function(match, wholeUnicode, unicode, wholeEscape, escape){
155-
if (unicode && !unicode.match(/[\da-fA-F]{4}/))
156-
hasError = hasError || bind(null, throwError, "Invalid unicode escape [\\u" + unicode + "]", start);
157-
return unicode ?
158-
String.fromCharCode(parseInt(unicode, 16)) :
159-
ESCAPE[escape] || escape;
160-
});
161-
(hasError||noop)();
162-
token.json = true;
163-
token.fn = function(){
164-
return (string.length == dateParseLength) ?
165-
angular['String']['toDate'](string) :
166-
string;
167-
};
168-
}, "Unterminated string");
172+
var start = index;
173+
index++;
174+
var string = "";
175+
var rawString = quote;
176+
var escape = false;
177+
while (index < text.length) {
178+
var ch = text.charAt(index);
179+
rawString += ch;
180+
if (escape) {
181+
if (ch == 'u') {
182+
var hex = text.substring(index + 1, index + 5);
183+
if (!hex.match(/[\da-f]{4}/i))
184+
throwError( "Invalid unicode escape [\\u" + hex + "]");
185+
index += 4;
186+
string += String.fromCharCode(parseInt(hex, 16));
187+
} else {
188+
var rep = ESCAPE[ch];
189+
if (rep) {
190+
string += rep;
191+
} else {
192+
string += ch;
193+
}
194+
}
195+
escape = false;
196+
} else if (ch == '\\') {
197+
escape = true;
198+
} else if (ch == quote) {
199+
index++;
200+
tokens.push({index:start, text:rawString, string:string, json:true,
201+
fn:function(){
202+
return (string.length == dateParseLength) ?
203+
angular['String']['toDate'](string) : string;
204+
}});
205+
return;
206+
} else {
207+
string += ch;
208+
}
209+
index++;
210+
}
211+
throwError("Unterminated quote", start);
169212
}
170213
}
171214

test/ParserSpec.js

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,9 @@ describe('parser', function() {
8282
expect(tokens.length).toEqual(1);
8383
expect(tokens[0].string).toEqual('\u00a0');
8484
});
85-
86-
it('should error when non terminated string', function(){
87-
expect(function(){
88-
lex('ignore "text');
89-
}).toThrow(new Error('Lexer Error: Unterminated string at column 7 in expression [ignore "text].'));
90-
});
9185

9286
it('should ignore whitespace', function() {
93-
var tokens = lex("a \t \n \r \u00A0 b");
87+
var tokens = lex("a \t \n \r b");
9488
expect(tokens[0].text).toEqual('a');
9589
expect(tokens[1].text).toEqual('b');
9690
});
@@ -136,15 +130,25 @@ describe('parser', function() {
136130
expect(tokens[0].text).toEqual(0.5E+10);
137131
});
138132

133+
it('should throws exception for invalid exponent', function() {
134+
expect(function() {
135+
lex("0.5E-");
136+
}).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-].'));
137+
138+
expect(function() {
139+
lex("0.5E-A");
140+
}).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-A].'));
141+
});
142+
139143
it('should tokenize number starting with a dot', function() {
140144
var tokens = lex(".5");
141145
expect(tokens[0].text).toEqual(0.5);
142146
});
143147

144148
it('should throw error on invalid unicode', function() {
145149
expect(function() {
146-
lex("'\\u1xbla'");
147-
}).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1xbl] at columns 0-9 ['\\u1xbla'] in expression ['\\u1xbla']."));
150+
lex("'\\u1''bla'");
151+
}).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1''b] at column 2 in expression ['\\u1''bla']."));
148152
});
149153
});
150154

0 commit comments

Comments
 (0)