@@ -31,6 +31,22 @@ const FILE_IO_BATCH_SIZE = 10;
3131
3232// PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
3333
34+ /**
35+ * Maximum time (ms) to wait for a single file to parse in the worker thread.
36+ * If tree-sitter hangs or WASM runs out of memory, this prevents the entire
37+ * indexing run from freezing. The worker is restarted after a timeout.
38+ */
39+ const PARSE_TIMEOUT_MS = 10_000 ;
40+
41+ /**
42+ * Number of files to parse before recycling the worker thread.
43+ * WASM linear memory can grow but NEVER shrink (WebAssembly spec limitation).
44+ * The only way to reclaim tree-sitter's WASM heap is to destroy the entire
45+ * V8 isolate by terminating the worker thread and spawning a fresh one.
46+ * This interval balances memory usage against the cost of reloading grammars.
47+ */
48+ const WORKER_RECYCLE_INTERVAL = 500 ;
49+
3450/**
3551 * Progress callback for indexing operations
3652 */
@@ -395,7 +411,8 @@ export class ExtractionOrchestrator {
395411 */
396412 async indexAll (
397413 onProgress ?: ( progress : IndexProgress ) => void ,
398- signal ?: AbortSignal
414+ signal ?: AbortSignal ,
415+ verbose ?: boolean
399416 ) : Promise < IndexResult > {
400417 await initGrammars ( ) ;
401418 const startTime = Date . now ( ) ;
@@ -406,6 +423,10 @@ export class ExtractionOrchestrator {
406423 let totalNodes = 0 ;
407424 let totalEdges = 0 ;
408425
426+ const log = verbose
427+ ? ( msg : string ) => { console . log ( `[worker] ${ msg } ` ) ; }
428+ : ( _msg : string ) => { } ;
429+
409430 // Phase 1: Scan for files
410431 onProgress ?.( {
411432 phase : 'scanning' ,
@@ -446,58 +467,139 @@ export class ExtractionOrchestrator {
446467 // Falls back to in-process parsing if the compiled worker is unavailable (e.g. tests).
447468 const parseWorkerPath = path . join ( __dirname , 'parse-worker.js' ) ;
448469 const useWorker = fs . existsSync ( parseWorkerPath ) ;
449- let parseWorker : import ( 'worker_threads' ) . Worker | null = null ;
470+ let WorkerClass : typeof import ( 'worker_threads' ) . Worker | null = null ;
450471
451472 if ( useWorker ) {
452473 const { Worker } = await import ( 'worker_threads' ) ;
453- parseWorker = new Worker ( parseWorkerPath ) ;
474+ WorkerClass = Worker ;
454475 } else {
455476 // In-process fallback: load grammars locally
456477 await loadGrammarsForLanguages ( neededLanguages ) ;
457478 }
458479
459- // Set up worker-based or in-process parsing
480+ // --- Worker lifecycle management ---
481+ // The worker can crash (OOM in WASM) or hang on pathological files.
482+ // We track pending parse promises and handle both cases:
483+ // - Timeout: terminate + restart the worker, reject the timed-out request
484+ // - Crash: reject all pending promises, restart for remaining files
485+ let parseWorker : import ( 'worker_threads' ) . Worker | null = null ;
460486 let nextId = 0 ;
487+ let workerParseCount = 0 ;
461488 const pendingParses = new Map < number , {
462489 resolve : ( result : ExtractionResult ) => void ;
490+ reject : ( err : Error ) => void ;
491+ timer : ReturnType < typeof setTimeout > ;
463492 } > ( ) ;
464493
465- if ( parseWorker ) {
466- // Wait for grammars to load in the worker
467- await new Promise < void > ( ( resolve , reject ) => {
468- parseWorker ! . once ( 'message' , ( msg : { type : string } ) => {
469- if ( msg . type === 'grammars-loaded' ) resolve ( ) ;
470- else reject ( new Error ( `Unexpected message: ${ msg . type } ` ) ) ;
471- } ) ;
472- parseWorker ! . postMessage ( { type : 'load-grammars' , languages : neededLanguages } ) ;
473- } ) ;
494+ function rejectAllPending ( reason : string ) : void {
495+ for ( const [ id , pending ] of pendingParses ) {
496+ clearTimeout ( pending . timer ) ;
497+ pendingParses . delete ( id ) ;
498+ pending . reject ( new Error ( reason ) ) ;
499+ }
500+ }
474501
475- parseWorker . on ( 'message' , ( msg : { type : string ; id ?: number ; result ?: ExtractionResult } ) => {
502+ function attachWorkerHandlers ( w : import ( 'worker_threads' ) . Worker ) : void {
503+ w . on ( 'message' , ( msg : { type : string ; id ?: number ; result ?: ExtractionResult } ) => {
476504 if ( msg . type === 'parse-result' && msg . id !== undefined ) {
477505 const pending = pendingParses . get ( msg . id ) ;
478506 if ( pending ) {
507+ clearTimeout ( pending . timer ) ;
479508 pendingParses . delete ( msg . id ) ;
480509 pending . resolve ( msg . result ! ) ;
481510 }
482511 }
483512 } ) ;
513+
514+ w . on ( 'error' , ( err ) => {
515+ logWarn ( 'Parse worker error' , { error : err . message } ) ;
516+ rejectAllPending ( `Worker error: ${ err . message } ` ) ;
517+ } ) ;
518+
519+ w . on ( 'exit' , ( code ) => {
520+ if ( code !== 0 && pendingParses . size > 0 ) {
521+ logWarn ( 'Parse worker exited unexpectedly' , { code } ) ;
522+ rejectAllPending ( `Worker exited with code ${ code } ` ) ;
523+ }
524+ // Clear reference so we know to respawn
525+ if ( parseWorker === w ) parseWorker = null ;
526+ } ) ;
484527 }
485528
486- function requestParse ( filePath : string , content : string ) : Promise < ExtractionResult > {
487- if ( parseWorker ) {
488- return new Promise < ExtractionResult > ( ( resolve ) => {
489- const id = nextId ++ ;
490- pendingParses . set ( id , { resolve } ) ;
491- parseWorker ! . postMessage ( { type : 'parse' , id, filePath, content } ) ;
529+ async function ensureWorker ( ) : Promise < import ( 'worker_threads' ) . Worker > {
530+ if ( parseWorker ) return parseWorker ;
531+ log ( 'Spawning new parse worker...' ) ;
532+ parseWorker = new WorkerClass ! ( parseWorkerPath ) ;
533+ attachWorkerHandlers ( parseWorker ) ;
534+
535+ // Load grammars in the new worker
536+ await new Promise < void > ( ( resolve , reject ) => {
537+ parseWorker ! . once ( 'message' , ( msg : { type : string } ) => {
538+ if ( msg . type === 'grammars-loaded' ) resolve ( ) ;
539+ else reject ( new Error ( `Unexpected message: ${ msg . type } ` ) ) ;
492540 } ) ;
541+ parseWorker ! . postMessage ( { type : 'load-grammars' , languages : neededLanguages } ) ;
542+ } ) ;
543+
544+ return parseWorker ;
545+ }
546+
547+ if ( WorkerClass ) {
548+ await ensureWorker ( ) ;
549+ }
550+
551+ /**
552+ * Recycle the worker thread to reclaim WASM memory.
553+ * Terminates the current worker and clears the reference so
554+ * ensureWorker() will spawn a fresh one on the next call.
555+ */
556+ function recycleWorker ( ) : void {
557+ if ( ! parseWorker ) return ;
558+ log ( `Recycling worker after ${ workerParseCount } parses (heap: ${ Math . round ( process . memoryUsage ( ) . rss / 1024 / 1024 ) } MB RSS)` ) ;
559+ const w = parseWorker ;
560+ parseWorker = null ;
561+ workerParseCount = 0 ;
562+ // Fire-and-forget: worker.terminate() can hang if WASM is stuck
563+ w . terminate ( ) . catch ( ( ) => { } ) ;
564+ }
565+
566+ async function requestParse ( filePath : string , content : string ) : Promise < ExtractionResult > {
567+ if ( ! WorkerClass ) {
568+ // In-process fallback
569+ return extractFromSource ( filePath , content , detectLanguage ( filePath ) ) ;
493570 }
494- // In-process fallback
495- return Promise . resolve ( extractFromSource ( filePath , content , detectLanguage ( filePath ) ) ) ;
571+
572+ // Recycle the worker before the next parse if we've hit the threshold.
573+ // This destroys the WASM linear memory (which can grow but never shrink)
574+ // and starts a fresh worker with a clean heap.
575+ if ( workerParseCount >= WORKER_RECYCLE_INTERVAL ) {
576+ await recycleWorker ( ) ;
577+ }
578+
579+ const worker = await ensureWorker ( ) ;
580+ const id = nextId ++ ;
581+ workerParseCount ++ ;
582+
583+ return new Promise < ExtractionResult > ( ( resolve , reject ) => {
584+ const timer = setTimeout ( ( ) => {
585+ pendingParses . delete ( id ) ;
586+ log ( `TIMEOUT: ${ filePath } exceeded ${ PARSE_TIMEOUT_MS } ms — killing worker` ) ;
587+ // Reject FIRST — worker.terminate() can hang if WASM is stuck
588+ parseWorker = null ;
589+ workerParseCount = 0 ;
590+ reject ( new Error ( `Parse timed out after ${ PARSE_TIMEOUT_MS } ms` ) ) ;
591+ // Fire-and-forget: kill the stuck worker in the background
592+ worker . terminate ( ) . catch ( ( ) => { } ) ;
593+ } , PARSE_TIMEOUT_MS ) ;
594+
595+ pendingParses . set ( id , { resolve, reject, timer } ) ;
596+ worker . postMessage ( { type : 'parse' , id, filePath, content } ) ;
597+ } ) ;
496598 }
497599
498600 for ( let i = 0 ; i < files . length ; i += FILE_IO_BATCH_SIZE ) {
499601 if ( signal ?. aborted ) {
500- if ( parseWorker ) await parseWorker . terminate ( ) ;
602+ if ( parseWorker ) ( parseWorker as import ( 'worker_threads' ) . Worker ) . terminate ( ) . catch ( ( ) => { } ) ;
501603 return {
502604 success : false ,
503605 filesIndexed,
@@ -533,7 +635,7 @@ export class ExtractionOrchestrator {
533635 // Send to worker for parsing, store results on main thread
534636 for ( const { filePath, content, stats, error } of fileContents ) {
535637 if ( signal ?. aborted ) {
536- if ( parseWorker ) await parseWorker . terminate ( ) ;
638+ if ( parseWorker ) ( parseWorker as import ( 'worker_threads' ) . Worker ) . terminate ( ) . catch ( ( ) => { } ) ;
537639 return {
538640 success : false ,
539641 filesIndexed,
@@ -546,7 +648,7 @@ export class ExtractionOrchestrator {
546648 } ;
547649 }
548650
549- processed ++ ;
651+ // Report progress before parsing (show current file being worked on)
550652 onProgress ?.( {
551653 phase : 'parsing' ,
552654 current : processed ,
@@ -555,6 +657,7 @@ export class ExtractionOrchestrator {
555657 } ) ;
556658
557659 if ( error || content === null || stats === null ) {
660+ processed ++ ;
558661 filesErrored ++ ;
559662 errors . push ( {
560663 message : `Failed to read file: ${ error instanceof Error ? error . message : String ( error ) } ` ,
@@ -565,8 +668,24 @@ export class ExtractionOrchestrator {
565668 continue ;
566669 }
567670
568- // Parse in worker thread (main thread stays unblocked)
569- const result = await requestParse ( filePath , content ) ;
671+ // Parse in worker thread (main thread stays unblocked).
672+ // Wrapped in try/catch to handle worker timeouts and crashes gracefully.
673+ let result : ExtractionResult ;
674+ try {
675+ result = await requestParse ( filePath , content ) ;
676+ } catch ( parseErr ) {
677+ processed ++ ;
678+ filesErrored ++ ;
679+ errors . push ( {
680+ message : parseErr instanceof Error ? parseErr . message : String ( parseErr ) ,
681+ filePath,
682+ severity : 'error' ,
683+ code : 'parse_error' ,
684+ } ) ;
685+ continue ;
686+ }
687+
688+ processed ++ ;
570689
571690 // Store in database on main thread (SQLite is not thread-safe)
572691 if ( result . nodes . length > 0 || result . errors . length === 0 ) {
@@ -593,8 +712,11 @@ export class ExtractionOrchestrator {
593712 }
594713 }
595714
596- // Shut down parse worker
597- if ( parseWorker ) await parseWorker . terminate ( ) ;
715+ // Shut down parse worker and clear any pending timers
716+ rejectAllPending ( 'Indexing complete' ) ;
717+ if ( parseWorker ) {
718+ ( parseWorker as import ( 'worker_threads' ) . Worker ) . terminate ( ) . catch ( ( ) => { } ) ;
719+ }
598720
599721 // Phase 3: Resolve references
600722 onProgress ?.( {
0 commit comments