-
Notifications
You must be signed in to change notification settings - Fork 128
Expand file tree
/
Copy pathparser.pxi
More file actions
312 lines (271 loc) · 8.95 KB
/
parser.pxi
File metadata and controls
312 lines (271 loc) · 8.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
(ns pixie.parser
(:require [pixie.stdlib :as s]))
;; This file contans a small framework for writing generic parsers in Pixie. Although the generated
;; code is probably not the fastest, it is fairly simple, and that simplicity should open the road for
;; future optimizations. The parsers allowed support multiple inheritance and multiple input data types.
;; Backtracking is supported by snapshots taken at key parts of the parsing process. For a string parser
;; these snapshots are simply a integer index into the string being parsed.
;; Cursors
(defprotocol ICursor
(next! [this] "Advance to the next element")
(current [this] "Return the current element")
(snapshot [this] "Return a snapshot of the cursor's mutable state")
(rewind! [this snapshot] "Rewind the cursor to a previous snapshot")
(at-end? [this] "Is there more to parse?"))
(deftype StringCursor [idx s]
ICursor
(next! [this]
(set-field! this :idx (inc idx)))
(current [this]
(when (< idx (count s))
(nth s idx)))
(snapshot [this]
idx)
(rewind! [this val]
(set-field! this :idx val))
(at-end? [this]
(= idx (count s))))
;; Create a cursor from the given string
(defn string-cursor [s]
(->StringCursor 0 s))
;; Mechanics
(deftype ParseFailure [])
;; If a parser returns this value, parsing has failed
(def fail (->ParseFailure))
(defn failure?
"Returns true if return value from a parser is a parse failure"
[v]
(identical? v fail))
(defn parse-if
"Parse and return the current value of the cursor if this predicate succeeds against the cursor. Advances
the cursor to the next element."
[pred]
(fn [cursor]
(if (pred (current cursor))
(let [value (current cursor)]
(next! cursor)
value)
fail)))
(defprotocol IParserGenerator
(to-parser [this] "Convert the current object to a parser"))
(extend-protocol IParserGenerator
IFn
(to-parser [this]
this)
Character
(to-parser [this]
(parse-if #(= % this)))
String
(to-parser [this]
(println "bad-parser " this)
(assert false)))
(defn or
"Defines a parser that succeeds if one of the provided parsers succeeds. Parsers are tried in-order."
([a] a)
([a b]
(let [a (to-parser a)
b (to-parser b)
m (atom #{})]
(fn [cursor]
(let [key [cursor (snapshot cursor)]]
(if-let [v (contains? @m key)]
(b cursor)
(let [_ (swap! m conj key)
state (snapshot cursor)
val (a cursor)]
(swap! m disj key)
(if (identical? val fail)
(do (rewind! cursor state)
(b cursor))
val)))))))
([a b & more]
(apply or (or a b) more)))
(defn add-clauses [cursor-sym body [[sym goal] & more]]
(if sym
`(let [~sym (~sym ~cursor-sym)]
(if (identical? ~sym fail)
fail
~(add-clauses cursor-sym body more)))
body))
(defn -parse-args
[args]
(loop [args args
rules []
return nil]
(let [[arg & rest] args]
(assert (not (= '-> arg)) "invalid position for ->")
(if arg
(if (= '<- arg)
(let [return (first rest)]
(recur (next rest)
rules
return))
(if (= (first rest) '->)
(let [binding (-> rest next first)
rest (-> rest next next)]
(recur rest
(conj rules [binding arg])
return))
(recur rest
(conj rules [(gensym "_") arg])
return)))
[rules return]))))
(defmacro and
"Defines a parser that succeeds only if all parsers succeed. Tried in order. Each parser clause can be followed
by a -> to give the parser's output a name. There may also be a single <- followed by any Pixie code that can be used
to post-process the parsed output."
[& args]
(let [[parsed body] (-parse-args args)
cursor-sym (gensym "cursor")]
`(let [~@(mapcat
(fn [[sym parser]]
[sym `(to-parser ~parser)])
parsed)]
(fn [~cursor-sym]
(let [prev-pos# (snapshot ~cursor-sym)
result# ~(add-clauses cursor-sym body parsed)]
(if (identical? result# fail)
(do (rewind! ~cursor-sym prev-pos#)
fail)
result#))))))
(defprotocol IDeliverable
(-deliver [this val]))
(deftype PromiseFn [f name]
IDeliverable
(-deliver [this val]
(set-field! this :f val))
IFn
(-invoke [this val]
(assert f (str "PromiseFN " name " has not been delivered"))
(f val)))
(defn promise-fn
"Defines a promise that is callable."
[name]
(->PromiseFn nil name))
(defn -parse-parser-args [args]
(loop [rules {}
args args]
(if args
(let [name (first args)
body (second args)
args (-> args next next)]
(assert (symbol? name) "Must name all rules")
(if (= '<- (first args))
(let [return (first (next args))
full-rule `(let [p# (to-parser ~body)]
(fn [cursor#]
(let [~'value (p# cursor#)]
(if (failure? ~'value)
~'value
~return))))]
(recur (assoc rules name full-rule)
(next (next args))))
(recur (assoc rules name body)
args)))
rules)))
(defmacro parser
"(parser inherits & rules)
Creates a parser that inherits from zero or more other parsers defined in `inherits`. Rules are pairs
of names and rules that will be assigned to those names. Names are inherited from parent parsers in the
order they are defined."
[inherits & rules]
(let [parted (apply merge
(conj (mapv (fn [sym]
(::forms (deref (resolve-in *ns* sym))))
inherits)
(-parse-parser-args rules)))
rules (apply concat parted)
syms (keys parted)]
`(let [~@(mapcat (fn [s]
`[~s (promise-fn (quote ~s))])
syms)]
~@(map (fn [[s goal]]
`(-deliver ~s ~goal))
parted)
~(assoc (zipmap (map (comp keyword name) syms)
syms)
::forms (list 'quote (apply hashmap rules))))))
(defmacro defparser
"(defparser nm inherits rules)
Same as parser but assigns the resulting parser to a var with the name nm"
[nm inherits & rules]
`(def ~nm (parser ~inherits ~@rules)))
;; Common parsers
(defn char-range
"Defines a parser that parses a numerical range of characters"
[from to]
(parse-if (fn [v]
(when (char? v)
(<= (int from) (int v) (int to))))))
(defn one+
"Defines a parser that succeeds if the given parser succeeds once or more. Will return a vector, but any
reducing function can be provided via rf as well."
([g]
(one+ g conj))
([g rf]
(let [g (to-parser g)]
(fn [cursor]
(loop [acc (rf)
cnt 0]
(let [prev-pos (snapshot cursor)
v (g cursor)]
(if (identical? v fail)
(if (= 0 cnt)
(do (rewind! cursor prev-pos)
fail)
(rf acc))
(recur (rf acc v)
(inc cnt)))))))))
(def one+chars #(one+ % string-builder))
(defn zero+
"Defines a parser that succeeds if a given parser succeeds zero or more times. Will return a vector, but
any reducing function can be provided via rf as well."
([g]
(zero+ g conj))
([g rf]
(let [g (to-parser g)]
(fn [cursor]
(loop [acc (rf)]
(let [v (g cursor)]
(if (identical? v fail)
(rf acc)
(recur (rf acc v)))))))))
(def zero+chars #(zero+ % string-builder))
(defn eat
"Eagerly parses as many values as possible until g fails. Discards the result, returns nil."
[g]
(fn [cursor]
(loop []
(let [prev-pos (snapshot cursor)
v (g cursor)]
(if (identical? v fail)
(do (rewind! cursor prev-pos)
nil)
(recur))))))
(defn maybe
"Always succeeds, returns nil when the input did not match the parser."
([g]
(maybe g nil))
([g default]
(let [g (to-parser g)]
(fn [cursor]
(let [v (g cursor)]
(if (failure? v)
default
v))))))
(defmacro sequence
[coll arrow body]
(assert (= '<- arrow) "Middle argument to sequence must be a return arrow")
`(and ~@coll ~'<- ~body))
(defn end
"A parser that only succeeds if there is no more input left to process."
[cursor]
(if (at-end? cursor)
nil
fail))
(defn one-of
"Defines a parser that succeeds if the value being parsed is found in v"
[v]
(parse-if (partial contains? v)))
(def digits (parse-if (set "1234567890")))
(def whitespace (parse-if #{\newline \return \space \tab}))