Skip to content

Commit b869ecb

Browse files
committed
move parser combinators into own file
1 parent 42675e5 commit b869ecb

3 files changed

Lines changed: 374 additions & 380 deletions

File tree

packages/jsparse/package.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ Package.describe({
33
});
44

55
Package.on_use(function (api) {
6-
api.add_files([
7-
'parser.js',
8-
'lexer.js'], ['client', 'server']);
6+
api.add_files(['lexer.js', 'parserlib.js', 'parser.js'],
7+
['client', 'server']);
98
});

packages/jsparse/parser.js

Lines changed: 2 additions & 377 deletions
Original file line numberDiff line numberDiff line change
@@ -1,381 +1,6 @@
1+
///// JAVASCRIPT PARSER
12

2-
// NOTES
3-
4-
// push/pop lexer position
5-
// need to support [no LineTerminator here]
6-
// need to be able to look back at original whitespace later,
7-
// find all the whitespace before a token
8-
// "token" means anything but whitespace, newline, or comment
9-
// multiline comments produce virtual newlines
10-
// maybe conform to the spec's token input to the syntactic grammar?
11-
12-
// XXX track line/col position, for errors and maybe token info
13-
14-
var isArray = function (obj) {
15-
return obj && (typeof obj === 'object') && (typeof obj.length === 'number');
16-
};
17-
18-
Tokenizer = function (codeOrLexer) {
19-
// XXX rethink codeOrLexer later
20-
this.lexer = (codeOrLexer instanceof Lexer ? codeOrLexer :
21-
new Lexer(codeOrLexer));
22-
this.peekType = null;
23-
this.peekText = null;
24-
this.tokenType = null;
25-
this.tokenText = null;
26-
this.lastPos = 0;
27-
this.pos = 0;
28-
this.isLineTerminatorHere = false;
29-
30-
// load peekType and peekText
31-
this.consume();
32-
};
33-
34-
_.extend(Tokenizer.prototype, {
35-
// consumes the token (peekType, peekText) and moves
36-
// it into (type, text), loading the next token
37-
// into (peekType, peekText). A token is a lexeme
38-
// besides WHITESPACE, COMMENT, and NEWLINE.
39-
consume: function () {
40-
var self = this;
41-
var lexer = self.lexer;
42-
self.type = self.peekType;
43-
self.text = self.peekText;
44-
self.lastPos = self.pos;
45-
self.isLineTerminatorHere = false;
46-
do {
47-
lexer.next();
48-
if (lexer.type === "ERROR")
49-
throw new Error("Bad token at position " + lexer.lastPos +
50-
", text `" + lexer.text + "`");
51-
else if (lexer.type === "NEWLINE")
52-
self.isLineTerminatorHere = true;
53-
else if (lexer.type === "COMMENT" && ! /^.*$/.test(lexer.text))
54-
// multiline comments containing line terminators count
55-
// as line terminators.
56-
self.isLineTerminatorHere = true;
57-
} while (lexer.type !== "EOF" && ! Lexer.isToken(lexer.type));
58-
self.peekType = lexer.type;
59-
self.peekText = lexer.text;
60-
self.pos = lexer.lastPos;
61-
}
62-
});
63-
64-
// A parser that consume()s has to succeed.
65-
// Similarly, a parser that fails can't have consumed.
66-
67-
// mutates the parser; don't describe an existing parser.
68-
var describe = function (description, parser) {
69-
parser.description = description;
70-
return parser;
71-
};
72-
73-
// Call this as `throw parseError(...)`.
74-
// `expected` is a parser, `after` is a string.
75-
var parseError = function (t, expected, after) {
76-
var str = (expected.description ? "Expected " + expected.description :
77-
// all parsers that might error should have descriptions,
78-
// but just in case:
79-
"Unexpected token");
80-
if (after)
81-
str += " after " + (after.text ? "`" + after.text + "`" : after);
82-
var pos = t.pos;
83-
str += " at position " + pos;
84-
str += ", found " + (t.peekText ? "`" + t.peekText + "`" : "EOF");
85-
var e = new Error(str);
86-
return e;
87-
};
88-
89-
///// TERMINAL PARSER CONSTRUCTORS
90-
91-
var _tokenClassImpl = function (type, text, dontConsume) {
92-
var textSet = (text ? makeSet(text.split(' ')) : null);
93-
var description = (text ? text.split(' ').join(', ') : type);
94-
return describe(
95-
description,
96-
function (t) {
97-
if (t.peekType == type && (!text || textSet[t.peekText])) {
98-
if (dontConsume)
99-
return [];
100-
var ret = {text: t.peekText, pos: t.pos};
101-
t.consume();
102-
return ret;
103-
}
104-
return null;
105-
});
106-
};
107-
108-
var _tokenImpl = function (text, dontConsume) {
109-
if (/\w/.test(text))
110-
return _tokenClassImpl('KEYWORD', text, dontConsume);
111-
return _tokenClassImpl('PUNCTUATION', text, dontConsume);
112-
};
113-
114-
var tokenClass = function (type, text) {
115-
if (type === "ERROR" || type === "EOF")
116-
throw new Error("Can't create EOF or ERROR tokens, can only look ahead");
117-
return _tokenClassImpl(type, text);
118-
};
119-
120-
var token = function (text) {
121-
return _tokenImpl(text);
122-
};
123-
124-
// Like token, but marks tokens that need to defy the lexer's
125-
// heuristic about whether the next '/' is a division or
126-
// starts a regex.
127-
var preSlashToken = function (text, divisionNotRegex) {
128-
var impl = _tokenImpl(text);
129-
return describe(impl.description,
130-
function (t) {
131-
// temporarily set divisionPermitted,
132-
// restoring it if we don't match.
133-
var oldValue = t.lexer.divisionPermitted;
134-
var result;
135-
try {
136-
t.lexer.divisionPermitted = divisionNotRegex;
137-
result = impl(t);
138-
return result;
139-
} finally {
140-
if (! result)
141-
t.lexer.divisionPermitted = oldValue;
142-
}
143-
});
144-
};
145-
146-
// NON-CONSUMING PARSER CONSTRUCTORS
147-
148-
var lookAheadTokenClass = function (type, text) {
149-
return _tokenClassImpl(type, text, true);
150-
};
151-
152-
var lookAheadToken = function (text) {
153-
return _tokenImpl(text, true);
154-
};
155-
156-
///// NON-TERMINAL PARSER CONSTRUCTORS
157-
158-
// call as: runRequired(parser, tokenizer[, prevToken])
159-
// to run parser(tokenizer) and assert it matches
160-
var runRequired = function (parser, tokenizer, prevToken) {
161-
return revalue(
162-
tokenizer ? parser(tokenizer) : parser,
163-
function (v, t) {
164-
if (! v)
165-
throw parseError(t || tokenizer, parser, prevToken);
166-
return v;
167-
});
168-
};
169-
170-
var runMaybeRequired = function (require, parser, tokenizer, prevToken) {
171-
if (require)
172-
return runRequired(parser, tokenizer, prevToken);
173-
else
174-
return parser(tokenizer);
175-
};
176-
177-
// Polymorphic in parsers and results; an experiment.
178-
var named = function (name, parserOrResult) {
179-
return describe(
180-
name,
181-
revalue(
182-
parserOrResult,
183-
function (value) {
184-
if (! value)
185-
return null;
186-
187-
var result;
188-
if (isArray(value) && ! value.named)
189-
// bare array, prepend the name
190-
result = [name].concat(Array.prototype.slice.call(value));
191-
else
192-
// token or named array; construct a new named array
193-
result = [name, value];
194-
195-
// don't name the same thing twice
196-
result.named = true;
197-
198-
return result;
199-
}));
200-
};
201-
202-
var or = function (/*parsers*/) {
203-
var args = arguments;
204-
return function (t) {
205-
var result;
206-
for(var i = 0, N = args.length; i < N; i++) {
207-
result = args[i](t);
208-
if (result)
209-
return result;
210-
}
211-
return null;
212-
};
213-
};
214-
215-
// Parses a left-recursive expression with zero or more occurrences
216-
// of a binary op. Leaves the term unwrapped if no op. For example
217-
// (in a hypothetical use case):
218-
// `1` => "1"
219-
// `1+2` => ["binary", "1", "+", "2"]
220-
// `1+2+3` => ["binary", ["binary", "1", "+", "2"], "+", "3"]
221-
//
222-
// opParser can also be an array of op parsers from high to low
223-
// precedence (tightest-binding first)
224-
var binaryLeft = function (termParser, opParser) {
225-
if (isArray(opParser)) {
226-
if (opParser.length === 1) {
227-
// take single opParser out of its array
228-
opParser = opParser[0];
229-
} else {
230-
// pop off last opParser (non-destructively) and replace
231-
// termParser with a recursive binaryLeft on the remaining
232-
// ops.
233-
termParser = binaryLeft(termParser, opParser.slice(0, -1));
234-
opParser = opParser[opParser.length - 1];
235-
}
236-
}
237-
238-
return describe(
239-
termParser.description,
240-
function (t) {
241-
var result = termParser(t);
242-
if (! result)
243-
return null;
244-
245-
var op;
246-
while ((op = opParser(t))) {
247-
result = named(
248-
'binary',
249-
[result, op, runRequired(termParser, t, op)]);
250-
}
251-
return result;
252-
});
253-
};
254-
255-
// Parses a list of one or more items with a separator, listing the
256-
// items and separators. (Separator is optional.) For example:
257-
// `x` => ["x"]
258-
// `x,y` => ["x", ",", "y"]
259-
// `x,y,z` => ["x", ",", "y", ",", "z"]
260-
var list = function (itemParser, sepParser) {
261-
return describe(
262-
itemParser.description,
263-
function (t) {
264-
var result = [itemParser(t)];
265-
if (! result[0])
266-
return null;
267-
268-
if (sepParser) {
269-
var sep;
270-
while ((sep = sepParser(t)))
271-
result.push(sep, runRequired(itemParser, t, sep));
272-
} else {
273-
var item;
274-
while ((item = itemParser(t)))
275-
result.push(item);
276-
}
277-
return result;
278-
});
279-
};
280-
281-
var seq = function (/*parsers*/) {
282-
var args = arguments;
283-
if (! args.length)
284-
return describe("(empty)",
285-
function (t) { return []; });
286-
287-
var description = args[0].description;
288-
for (var i = 1; i < args.length; i++)
289-
description += " " + args[i].description;
290-
return describe(
291-
description,
292-
function (t) {
293-
var result = [];
294-
for (var i = 0, N = args.length; i < N; i++) {
295-
// first item in sequence can fail, and we
296-
// fail (without error); after that, error on failure
297-
var r = runMaybeRequired(i > 0, args[i], t);
298-
if (! r)
299-
return null;
300-
301-
if (r.unpack) // append array!
302-
result.push.apply(result, r);
303-
else
304-
result.push(r);
305-
}
306-
return result;
307-
});
308-
};
309-
310-
var unpack = function (arrayParser) {
311-
return revalue(arrayParser, function (v) {
312-
if (v && isArray(v))
313-
v.unpack = true;
314-
return v;
315-
});
316-
};
317-
318-
// lookAhead parser must never consume
319-
var lookAhead = function (lookAheadParser, nextParser) {
320-
return describe(
321-
lookAheadParser.description,
322-
function (t) {
323-
if (! lookAheadParser(t))
324-
return null;
325-
return nextParser(t);
326-
});
327-
};
328-
var negLookAhead = function (lookAheadParser, nextParser) {
329-
if (! nextParser)
330-
return function (t) {
331-
return lookAheadParser(t) ? null : [];
332-
};
333-
334-
return describe(
335-
nextParser.description,
336-
function (t) {
337-
if (lookAheadParser(t))
338-
return null;
339-
return nextParser(t);
340-
});
341-
};
342-
343-
// parser that looks at nothing and returns result
344-
var constant = function (result) {
345-
// no description
346-
return function (t) {
347-
return result;
348-
};
349-
};
350-
351-
// afterLookAhead allows the parser to fail rather than
352-
// succeed if would otherwise fail at a position where
353-
// afterLookAhead doesn't match, potentially providing
354-
// a better error message. For example, the illegal
355-
// object literal `{true:1}` will stop at the `true`
356-
// and say something like "expected property name"
357-
// instead of "expected }". As another example,
358-
// `for(;var;) {}` will lead to "Expected expression"
359-
// instead of "Expected ;" when the optional expression
360-
// turns out to be an illegal `var`.
361-
var opt = function (parser, afterLookAhead) {
362-
return describe(parser.description,
363-
or(parser, afterLookAhead ? afterLookAhead : seq()));
364-
};
365-
366-
// note: valueTransformFunc gets the tokenizer as a second argument
367-
// if it's called on a parser. This func is allowed to then
368-
// run more parsers.
369-
var revalue = function (parserOrValue, valueTransformFunc) {
370-
if (typeof parserOrValue === 'function')
371-
// it's a parser
372-
return describe(parserOrValue.description,
373-
function (t) {
374-
return valueTransformFunc(parserOrValue(t), t);
375-
});
376-
else
377-
return valueTransformFunc(parserOrValue);
378-
};
3+
// XXX unit tests
3794

3805
var parse = function (tokenizer) {
3816
var noLineTerminatorHere = describe(

0 commit comments

Comments
 (0)