Skip to content

Commit 0f2eda8

Browse files
committed
feat: file nodes, arrow function extraction, parallel I/O
- Create file-kind nodes for each parsed source file - Add isInsideClassLikeNode() for method vs function detection - Extract arrow functions and function expressions from variable declarators - Batch file I/O with FILE_IO_BATCH_SIZE=10 using Promise.all - Add symlink cycle detection with visitedDirs Set in scanDirectory - Add lazy grammar loading with exported getGrammar() function - Add indexFileWithContent() for pre-read content processing - Add tests for file nodes and arrow function extraction
1 parent 4825661 commit 0f2eda8

4 files changed

Lines changed: 384 additions & 56 deletions

File tree

__tests__/extraction.test.ts

Lines changed: 123 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,19 @@ export function processPayment(amount: number): Promise<Receipt> {
127127
`;
128128
const result = extractFromSource('payment.ts', code);
129129

130-
expect(result.nodes).toHaveLength(1);
131-
expect(result.nodes[0]).toMatchObject({
130+
// File node + function node
131+
const fileNode = result.nodes.find((n) => n.kind === 'file');
132+
expect(fileNode).toBeDefined();
133+
expect(fileNode?.name).toBe('payment.ts');
134+
135+
const funcNode = result.nodes.find((n) => n.kind === 'function');
136+
expect(funcNode).toMatchObject({
132137
kind: 'function',
133138
name: 'processPayment',
134139
language: 'typescript',
135140
isExported: true,
136141
});
137-
expect(result.nodes[0]?.signature).toContain('amount: number');
142+
expect(funcNode?.signature).toContain('amount: number');
138143
});
139144

140145
it('should extract class declarations', () => {
@@ -175,8 +180,11 @@ export interface User {
175180
`;
176181
const result = extractFromSource('types.ts', code);
177182

178-
expect(result.nodes).toHaveLength(1);
179-
expect(result.nodes[0]).toMatchObject({
183+
const fileNode = result.nodes.find((n) => n.kind === 'file');
184+
expect(fileNode).toBeDefined();
185+
186+
const ifaceNode = result.nodes.find((n) => n.kind === 'interface');
187+
expect(ifaceNode).toMatchObject({
180188
kind: 'interface',
181189
name: 'User',
182190
isExported: true,
@@ -207,8 +215,9 @@ export const useAuth = (): AuthContextValue => {
207215
`;
208216
const result = extractFromSource('hooks.ts', code);
209217

210-
expect(result.nodes).toHaveLength(1);
211-
expect(result.nodes[0]).toMatchObject({
218+
const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'useAuth');
219+
expect(funcNode).toBeDefined();
220+
expect(funcNode).toMatchObject({
212221
kind: 'function',
213222
name: 'useAuth',
214223
isExported: true,
@@ -223,8 +232,9 @@ export const processData = function(input: string): string {
223232
`;
224233
const result = extractFromSource('utils.ts', code);
225234

226-
expect(result.nodes).toHaveLength(1);
227-
expect(result.nodes[0]).toMatchObject({
235+
const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'processData');
236+
expect(funcNode).toBeDefined();
237+
expect(funcNode).toMatchObject({
228238
kind: 'function',
229239
name: 'processData',
230240
isExported: true,
@@ -286,8 +296,9 @@ export const fetchData = async () => {
286296
`;
287297
const result = extractFromSource('api.js', code);
288298

289-
expect(result.nodes).toHaveLength(1);
290-
expect(result.nodes[0]).toMatchObject({
299+
const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'fetchData');
300+
expect(funcNode).toBeDefined();
301+
expect(funcNode).toMatchObject({
291302
kind: 'function',
292303
name: 'fetchData',
293304
isExported: true,
@@ -306,8 +317,8 @@ export type AuthContextValue = {
306317
`;
307318
const result = extractFromSource('types.ts', code);
308319

309-
expect(result.nodes).toHaveLength(1);
310-
expect(result.nodes[0]).toMatchObject({
320+
const typeNode = result.nodes.find((n) => n.kind === 'type_alias');
321+
expect(typeNode).toMatchObject({
311322
kind: 'type_alias',
312323
name: 'AuthContextValue',
313324
isExported: true,
@@ -323,8 +334,8 @@ type InternalState = {
323334
`;
324335
const result = extractFromSource('internal.ts', code);
325336

326-
expect(result.nodes).toHaveLength(1);
327-
expect(result.nodes[0]).toMatchObject({
337+
const typeNode = result.nodes.find((n) => n.kind === 'type_alias');
338+
expect(typeNode).toMatchObject({
328339
kind: 'type_alias',
329340
name: 'InternalState',
330341
isExported: false,
@@ -415,18 +426,18 @@ export const useAuth = () => {
415426
expect(varNodes).toHaveLength(0);
416427
});
417428

418-
it('should not extract non-exported const as exported variable', () => {
429+
it('should extract non-exported const as non-exported variable', () => {
419430
const code = `
420431
const internalConfig = {
421432
debug: true,
422433
};
423434
`;
424435
const result = extractFromSource('internal.ts', code);
425436

426-
// Non-exported const should NOT create a variable node
427-
// (only export_statement triggers extractExportedVariables)
428-
const varNodes = result.nodes.filter((n) => n.kind === 'variable' && n.name === 'internalConfig');
429-
expect(varNodes).toHaveLength(0);
437+
// Non-exported const at file level should be extracted as a constant (not exported)
438+
const varNodes = result.nodes.filter((n) => (n.kind === 'variable' || n.kind === 'constant') && n.name === 'internalConfig');
439+
expect(varNodes).toHaveLength(1);
440+
expect(varNodes[0]?.isExported).toBeFalsy();
430441
});
431442

432443
it('should extract Zod schema exports', () => {
@@ -463,6 +474,93 @@ export const authMachine = createMachine({
463474
});
464475
});
465476

477+
describe('File Node Extraction', () => {
478+
it('should create a file-kind node for each parsed file', () => {
479+
const code = `
480+
export function greet(name: string): string {
481+
return "Hello " + name;
482+
}
483+
`;
484+
const result = extractFromSource('greeter.ts', code);
485+
486+
const fileNode = result.nodes.find((n) => n.kind === 'file');
487+
expect(fileNode).toBeDefined();
488+
expect(fileNode?.name).toBe('greeter.ts');
489+
expect(fileNode?.filePath).toBe('greeter.ts');
490+
expect(fileNode?.language).toBe('typescript');
491+
expect(fileNode?.startLine).toBe(1);
492+
});
493+
494+
it('should create file nodes for Python files', () => {
495+
const code = `
496+
def main():
497+
pass
498+
`;
499+
const result = extractFromSource('main.py', code);
500+
501+
const fileNode = result.nodes.find((n) => n.kind === 'file');
502+
expect(fileNode).toBeDefined();
503+
expect(fileNode?.name).toBe('main.py');
504+
expect(fileNode?.language).toBe('python');
505+
});
506+
507+
it('should create containment edges from file node to top-level declarations', () => {
508+
const code = `
509+
export function foo() {}
510+
export function bar() {}
511+
`;
512+
const result = extractFromSource('fns.ts', code);
513+
514+
const fileNode = result.nodes.find((n) => n.kind === 'file');
515+
expect(fileNode).toBeDefined();
516+
517+
// There should be contains edges from the file node to each function
518+
const containsEdges = result.edges.filter(
519+
(e) => e.source === fileNode?.id && e.kind === 'contains'
520+
);
521+
expect(containsEdges.length).toBeGreaterThanOrEqual(2);
522+
});
523+
});
524+
525+
describe('Arrow Function Variable Extraction', () => {
526+
it('should extract const arrow functions as function nodes', () => {
527+
const code = `
528+
const handleClick = () => {
529+
console.log('clicked');
530+
};
531+
`;
532+
const result = extractFromSource('handler.ts', code);
533+
534+
const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'handleClick');
535+
expect(funcNode).toBeDefined();
536+
expect(funcNode?.kind).toBe('function');
537+
});
538+
539+
it('should detect async arrow functions', () => {
540+
const code = `
541+
export const fetchUser = async (id: string) => {
542+
return await db.find(id);
543+
};
544+
`;
545+
const result = extractFromSource('api.ts', code);
546+
547+
const funcNode = result.nodes.find((n) => n.kind === 'function' && n.name === 'fetchUser');
548+
expect(funcNode).toBeDefined();
549+
expect(funcNode?.isExported).toBe(true);
550+
});
551+
552+
it('should not create duplicate nodes for arrow functions in export statements', () => {
553+
const code = `
554+
export const compute = (x: number) => x * 2;
555+
`;
556+
const result = extractFromSource('math.ts', code);
557+
558+
const funcNodes = result.nodes.filter((n) => n.kind === 'function' && n.name === 'compute');
559+
// Should appear only once, not duplicated between extractFunctionVariable and extractFunction
560+
expect(funcNodes).toHaveLength(1);
561+
});
562+
});
563+
466564
describe('Python Extraction', () => {
467565
it('should extract function definitions', () => {
468566
const code = `
@@ -473,8 +571,11 @@ def calculate_total(items: list, tax_rate: float) -> float:
473571
`;
474572
const result = extractFromSource('calc.py', code);
475573

476-
expect(result.nodes).toHaveLength(1);
477-
expect(result.nodes[0]).toMatchObject({
574+
const fileNode = result.nodes.find((n) => n.kind === 'file');
575+
expect(fileNode).toBeDefined();
576+
577+
const funcNode = result.nodes.find((n) => n.kind === 'function');
578+
expect(funcNode).toMatchObject({
478579
kind: 'function',
479580
name: 'calculate_total',
480581
language: 'python',

src/extraction/grammars.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,14 @@ function loadGrammar(language: Language): unknown | null {
156156
}
157157
}
158158

159+
/**
160+
* Get a grammar by language, loading it lazily if needed.
161+
* Exported for direct grammar access without parser initialization.
162+
*/
163+
export function getGrammar(language: string): unknown | null {
164+
return loadGrammar(language as Language);
165+
}
166+
159167
/**
160168
* Get a parser for the specified language
161169
*/

0 commit comments

Comments
 (0)