forked from salesforce/agentscript
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.ts
More file actions
256 lines (223 loc) · 6.82 KB
/
parser.ts
File metadata and controls
256 lines (223 loc) · 6.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/*
* Copyright (c) 2026, Salesforce, Inc.
* All rights reserved.
* SPDX-License-Identifier: Apache-2.0
* For full license text, see the LICENSE file in the repo root or https://www.apache.org/licenses/LICENSE-2.0
*/
/**
* Recursive descent parser for AgentScript.
*
* Core invariant: NEWLINE and DEDENT are unconditional synchronization points.
* Every parse function that encounters an unexpected token calls synchronize()
* which skips to the next NEWLINE/DEDENT/EOF.
*/
import { isTokenKind, TokenKind, type Token } from './token.js';
import { Lexer } from './lexer.js';
import { CSTNode } from './cst-node.js';
import { isSyncPoint } from './errors.js';
import {
synchronize,
skipNewlines,
consumeCommentsAndSkipNewlines,
isAtEnd,
} from './recovery.js';
import { parseMappingOrExpression } from './parse-mapping.js';
import { parseSequence } from './parse-sequence.js';
import invariant from 'tiny-invariant';
/**
* Token consumption — peek, advance, and query the token stream.
*/
export interface TokenStream {
source: string;
peek(): Token;
peekAt(offset: number): Token;
peekAtIndex(idx: number): Token;
peekKind(): TokenKind;
consume(): Token;
consumeKind<K extends TokenKind>(kind: K): Token<K>;
currentOffset(): number;
peekOffset(): number;
isAtSyncPoint(): boolean;
}
/**
* CST node construction — create, populate, and finalize nodes.
*/
export interface NodeBuilder {
consumeNamed(type: string): CSTNode;
startNode(type: string): CSTNode;
startNodeAt(type: string, existingChild: CSTNode): CSTNode;
finishNode(node: CSTNode, startTok: Token): void;
addAnonymousChild(parent: CSTNode, token: Token): void;
}
/**
* Combined interface used by expression parser to access parser state.
* Avoids circular dependency between parser.ts and expressions.ts.
*/
export interface ParserContext extends TokenStream, NodeBuilder {}
export class Parser implements ParserContext {
source: string;
private tokens: Token[];
private pos = 0;
private _eof: Token | undefined;
constructor(source: string) {
this.source = source;
const lexer = new Lexer(source);
this.tokens = lexer.tokenize();
}
parse(): CSTNode {
const root = this.parseSourceFile();
return root;
}
// --- ParserContext implementation ---
peek(): Token {
return this.peekAt(0);
}
peekAt(offset: number): Token {
// n.b. (Allen): Because this is called so frequently, these invariants cause significant runtime overhead.
// invariant(this.pos + offset >= 0, 'peekAt too small');
// invariant(this.pos + offset <= this.tokens.length, 'peekAt too large');
return this.peekAtIndex(this.pos + offset);
}
peekAtIndex(idx: number): Token {
return this.tokens[idx] ?? this.eofToken();
}
peekKind(): TokenKind {
return this.peek().kind;
}
consume(): Token {
const tok = this.peek();
this.pos++;
return tok;
}
consumeKind<K extends TokenKind>(kind: K): Token<K> {
const tok = this.peek();
invariant(
isTokenKind(tok, kind),
`Expected token kind ${kind} but got ${tok.kind}`
);
this.pos++;
return tok;
}
consumeNamed(type: string): CSTNode {
const tok = this.consume();
const offset = tok.startOffset;
return new CSTNode(
type,
this.source,
offset,
offset + tok.text.length,
tok.start,
tok.end
);
}
currentOffset(): number {
const idx = this.pos > 0 ? this.pos - 1 : 0;
return this.peekAtIndex(idx).startOffset;
}
peekOffset(): number {
return this.peek().startOffset;
}
isAtSyncPoint(): boolean {
return isSyncPoint(this.peekKind());
}
startNode(type: string): CSTNode {
const tok = this.peek();
const offset = tok.startOffset;
return new CSTNode(type, this.source, offset, offset, tok.start, tok.end);
}
startNodeAt(type: string, existingChild: CSTNode): CSTNode {
return new CSTNode(
type,
this.source,
existingChild.startOffset,
existingChild.endOffset,
existingChild.startPosition,
existingChild.endPosition
);
}
finishNode(_node: CSTNode, _startTok: Token): void {
// No-op: appendChild() tracks end position incrementally.
}
addAnonymousChild(parent: CSTNode, token: Token): void {
const offset = token.startOffset;
const child = new CSTNode(
token.text,
this.source,
offset,
offset + token.text.length,
token.start,
token.end,
false
);
parent.appendChild(child);
}
// --- Top-level parsing ---
private parseSourceFile(): CSTNode {
const node = this.startNode('source_file');
// Skip leading newlines and indentation (handles template literals with leading whitespace)
skipNewlines(this);
if (this.peekKind() === TokenKind.INDENT) {
this.consume();
}
// Consume leading comments at source_file level (tree-sitter treats them as extras)
consumeCommentsAndSkipNewlines(this, node);
// Determine what kind of source file this is
if (this.peekKind() === TokenKind.DASH_SPACE) {
// Sequence
node.appendChild(parseSequence(this));
} else {
// Mapping or expression
// n.b. (Allen): Originally we didn't permit expressions at the top level, but
// we did that to make testing easier in tree-sitter so I suppose
// we can just make this a feature of the language.
const content = parseMappingOrExpression(this, _ctx =>
parseSequence(_ctx)
);
if (content) node.appendChild(content);
}
// Consume trailing comments at source_file level
consumeCommentsAndSkipNewlines(this, node);
// Catch-all: if there are unconsumed tokens, wrap them in ERROR nodes.
// This ensures every byte of source is represented in the CST.
while (!isAtEnd(this)) {
if (
this.peekKind() === TokenKind.NEWLINE ||
this.peekKind() === TokenKind.DEDENT
) {
this.consume();
continue;
}
if (this.peekKind() === TokenKind.COMMENT) {
node.appendChild(this.consumeNamed('comment'));
continue;
}
const err = synchronize(this);
if (err) {
node.appendChild(err);
} else {
// Consume one token to guarantee progress
this.consume();
}
}
// Root node must span entire source (matches tree-sitter invariant)
node.startOffset = 0;
node.startPosition = { row: 0, column: 0 };
node.endOffset = this.source.length;
node.endPosition = this.eofToken().end;
return node;
}
private eofToken(): Token {
if (!this._eof) {
const lastToken = this.tokens[this.tokens.length - 1];
const pos = lastToken ? lastToken.end : { row: 0, column: 0 };
this._eof = {
kind: TokenKind.EOF,
text: '',
start: pos,
end: pos,
startOffset: this.source.length,
};
}
return this._eof;
}
}