From 3808b4d0a864e2cb24f66d0259cfa3d293622d77 Mon Sep 17 00:00:00 2001 From: Artem Bambalov Date: Wed, 27 May 2026 05:08:59 +0300 Subject: [PATCH 01/10] fix(cli): include resolution + synthesizer edges in indexAll report (#413) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The orchestrator's per-file counter only sees extraction-phase edges, so the `X nodes, Y edges` line printed after `codegraph init -i` / `codegraph index` undercounts the graph — often by more than half on repos with heavy cross-file resolution (mall: 20 047 reported vs 45 629 actually in the DB). Snapshot (nodes, edges) before/after the full pipeline in `indexAll` and write the true delta back to the result. New lightweight `QueryBuilder.getNodeAndEdgeCount()` is one round-trip with no per-kind breakdowns. `indexFiles` (no resolution) and `sync` (uses `nodesUpdated`, not `nodesCreated`) are unaffected. Regression test added: `__tests__/integration/full-pipeline.test.ts > reports edgesCreated including resolution + synthesizer phases`. --- CHANGELOG.md | 10 +++++++- __tests__/integration/full-pipeline.test.ts | 28 +++++++++++++++++++++ src/db/queries.ts | 13 ++++++++++ src/index.ts | 10 ++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4df98f428..0e7b51e49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Fixed +- **`codegraph index` / `init -i` summary now reports the true edge count.** + The per-file counter in the orchestrator only saw extraction-phase edges, + so resolution and synthesizer edges (often >50% of the graph on + cross-file-heavy repos like Spring multi-module Java) were missing from + the `X nodes, Y edges` line. Snapshotting the DB before/after the full + pipeline now reports the actual additions. Example: indexing + `macrozheng/mall` previously reported `20 047 edges` while the DB held + `45 629`. ## [0.9.6] - 2026-05-27 @@ -62,7 +71,6 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Both targets are tested on the same parameterized contract as the existing five agents (idempotent install, sibling preservation, install/uninstall round-trip), with extra coverage for migration-marker detection, legacy → unified entry migration, sibling `disabled` field preservation, and the cross-target case where Gemini CLI and Antigravity IDE coexist in the same `~/.gemini/`. Closes #399. - **Installer target for Kiro (CLI + IDE).** `codegraph install` now detects and configures Kiro out of the box on macOS, Linux, and Windows. Writes `mcpServers.codegraph` to `~/.kiro/settings/mcp.json` (global) or `./.kiro/settings/mcp.json` (local), and the codegraph usage block into a dedicated `~/.kiro/steering/codegraph.md` / `./.kiro/steering/codegraph.md` — Kiro's steering system loads every `*.md` file in `steering/` as agent context, so a dedicated file is the natural surface (no marker-based merging required). Sibling MCP servers in `mcp.json` and unrelated steering files (`product.md`, `tech.md`, etc.) are preserved across install / uninstall. Tested on the same parameterized contract as the other agent targets (idempotent install, sibling preservation, install/uninstall round-trip). Closes #385. - ## [0.9.5] - 2026-05-25 ### Added diff --git a/__tests__/integration/full-pipeline.test.ts b/__tests__/integration/full-pipeline.test.ts index cb01aa5c7..5b551c136 100644 --- a/__tests__/integration/full-pipeline.test.ts +++ b/__tests__/integration/full-pipeline.test.ts @@ -241,4 +241,32 @@ describe('Integration: full pipeline', () => { cg.destroy(); } }, 30_000); + + it('reports edgesCreated including resolution + synthesizer phases', async () => { + // The synthetic project has cross-file imports, calls, and extends — + // all wired up in the resolution phase, AFTER the orchestrator's + // per-file extraction counter is done. The CLI summary used to read + // only the extraction-phase counter and undercount the graph; this + // test pins the counter to the true DB totals across all phases. + generateSyntheticProject(tempDir, 30); + + const cg = await CodeGraph.init(tempDir, { + config: { include: ['**/*.ts'], exclude: [] }, + }); + + try { + const result = await cg.indexAll(); + const stats = cg.getStats(); + + expect(result.success).toBe(true); + expect(result.nodesCreated).toBe(stats.nodeCount); + expect(result.edgesCreated).toBe(stats.edgeCount); + // Sanity: cross-file resolution had something to do — calls/extends + // edges should exist beyond the bare extraction-time contains edges. + const containsOnly = stats.edgesByKind.contains ?? 0; + expect(stats.edgeCount).toBeGreaterThan(containsOnly); + } finally { + cg.destroy(); + } + }, 30_000); }); diff --git a/src/db/queries.ts b/src/db/queries.ts index 131f2894d..11f5bc34c 100644 --- a/src/db/queries.ts +++ b/src/db/queries.ts @@ -1445,6 +1445,19 @@ export class QueryBuilder { // Statistics // =========================================================================== + /** + * Lightweight (nodes, edges) count snapshot. Used around an index/sync + * run to compute true additions across extraction + resolution + + * synthesis — the per-phase counter in the orchestrator only sees + * extraction's contribution, which is why the CLI summary under-reported + * the edge count (resolution + synthesizer edges were invisible). + */ + getNodeAndEdgeCount(): { nodes: number; edges: number } { + return this.db + .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS nodes, (SELECT COUNT(*) FROM edges) AS edges') + .get() as { nodes: number; edges: number }; + } + /** * Get graph statistics */ diff --git a/src/index.ts b/src/index.ts index 83a7058fd..14b0fb0a6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -325,6 +325,7 @@ export class CodeGraph { return { success: false, filesIndexed: 0, filesSkipped: 0, filesErrored: 0, nodesCreated: 0, edgesCreated: 0, errors: [{ message: 'Could not acquire file lock - another process may be indexing', severity: 'error' as const }], durationMs: 0 }; } try { + const before = this.queries.getNodeAndEdgeCount(); const result = await this.orchestrator.indexAll(options.onProgress, options.signal, options.verbose); // Re-detect frameworks now that the index is populated. The resolver @@ -367,6 +368,15 @@ export class CodeGraph { this.db.runMaintenance(); } + // The orchestrator only sees extraction-phase counts; resolution and + // synthesizer edges (often >50% of the graph on JVM repos) come later. + // Recompute against the DB so the CLI summary reports the true totals. + if (result.success && result.filesIndexed > 0) { + const after = this.queries.getNodeAndEdgeCount(); + result.nodesCreated = after.nodes - before.nodes; + result.edgesCreated = after.edges - before.edges; + } + return result; } finally { this.fileLock.release(); From 34240eb297065b38ab26bc24fb7e34d92c33ab1e Mon Sep 17 00:00:00 2001 From: Artem Bambalov Date: Wed, 27 May 2026 06:06:53 +0300 Subject: [PATCH 02/10] feat(jvm): resolve Java/Kotlin imports by fully-qualified name (#412) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrap top-level declarations of `.kt` / `.java` files in an implicit `namespace` node carrying the file's `package`, then resolve `import com.example.foo.Bar` through that qualifiedName index — so a Bar in Models.kt resolves correctly regardless of filename, a top-level function import binds to its declaration, Java↔Kotlin interop crosses cleanly, and same-name classes across packages no longer collide. Wildcard imports still go through name-matcher. Also extracts Java/C# anonymous-class overrides (`new T() { ... }`) as first-class class nodes with their override methods. Phase 5.5 interface-impl then bridges T's abstract methods to the anonymous overrides automatically — including the lambda-returned `new T() { ... }` pattern common in guava (Splitter, CacheBuilder). Concrete impact on macrozheng/mall (524 .java files, multi-module Spring + MyBatis): 524 namespace nodes, 862 imports edges newly resolve to Java symbols, 76 distinct `Criteria` classes preserved across packages with no merge. On google/guava (3,227 .java): 3,608 anonymous classes extracted, +2,534 interface-impl edges reach overrides hidden in `new T() { ... }` blocks. Agent A/B playbook on small (spring-petclinic-kotlin, 38 .kt), medium (mall, 524 .java), large (guava, 3,227 .java) — 3 flow prompts × 2 runs/arm × 2 arms = 36 runs, claude-opus, headless. Spring repos: 0/0 Read/Grep with-arm, −27% wall-clock vs no-codegraph. Guava: 1.8 Read avg with-arm (vs 2.0 without) — improved by the anon-class extraction; residual is a lambda→SAM coverage gap orthogonal to FQN imports (filing follow-up). --- CHANGELOG.md | 29 ++++ __tests__/extraction.test.ts | 172 ++++++++++++++++++++ __tests__/frameworks-integration.test.ts | 199 +++++++++++++++++++++++ __tests__/resolution.test.ts | 115 ++++++++++++- src/extraction/languages/java.ts | 8 + src/extraction/languages/kotlin.ts | 6 + src/extraction/tree-sitter-types.ts | 12 ++ src/extraction/tree-sitter.ts | 128 +++++++++++++++ src/resolution/import-resolver.ts | 35 ++++ src/resolution/index.ts | 16 +- 10 files changed, 717 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e7b51e49..5bc5086a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,35 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- **Java / Kotlin imports now resolve by fully-qualified name.** Extraction + wraps every top-level declaration of a `.kt` / `.java` file in a `namespace` + node carrying the file's `package` (so a class `Bar` in + `package com.example.foo` is indexed with qualifiedName + `com.example.foo::Bar`), and `import com.example.foo.Bar` looks the target + up through that index — regardless of whether the class lives in `Bar.kt`, + `Models.kt`, or a top-level function. Disambiguates same-name classes + across packages (the central failure mode of the previous name-matcher + fallback in multi-module Spring / Android codebases), works across the + Java↔Kotlin interop boundary, and lays groundwork for binding-precise + Dagger2 / Hilt resolution. Wildcard imports (`com.example.*`) still go + through name-matcher. +- **Java / C# anonymous classes (`new T() { ... }`) are now extracted as + first-class class nodes with their overrides.** Previously, an anonymous + subclass returned from a factory or lambda — `return new BaseIter() { + @Override int separatorStart(int s) { ... } };` — produced only an + `instantiates` edge: the override methods were invisible to the graph and + Phase 5.5 interface-impl synthesis had no class to bridge. The anon class + now lands as `` with an `extends` reference to the + named base/interface, scoped under the enclosing method, and its + `method_declaration` members become normal method nodes. The interface→impl + synthesizer then bridges the base's abstract methods to the anonymous + overrides automatically. Concrete effect on `google/guava` (3,227 .java + files): 3,608 anonymous classes extracted, +2,534 interface-impl edges + reach overrides hidden in `new T() { ... }` blocks (including lambda + bodies). An agent investigating `Splitter.SplittingIterator.separatorStart` + now sees the four anonymous overrides in its trail without a Read. + ### Fixed - **`codegraph index` / `init -i` summary now reports the true edge count.** The per-file counter in the orchestrator only saw extraction-phase edges, diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 6049e9831..2c0bebe34 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -814,6 +814,130 @@ public class Calculator { expect(methodNode).toBeDefined(); expect(methodNode?.isStatic).toBe(true); }); + + it('wraps top-level declarations in a namespace from package_declaration', () => { + const code = ` +package com.example.foo; + +public class Bar { + public String greet() { return "hi"; } +} +`; + const result = extractFromSource('Bar.java', code); + + const ns = result.nodes.find((n) => n.kind === 'namespace'); + expect(ns?.name).toBe('com.example.foo'); + + const cls = result.nodes.find((n) => n.kind === 'class' && n.name === 'Bar'); + expect(cls?.qualifiedName).toBe('com.example.foo::Bar'); + + const greet = result.nodes.find((n) => n.kind === 'method' && n.name === 'greet'); + expect(greet?.qualifiedName).toBe('com.example.foo::Bar::greet'); + }); + + it('does not wrap when no package is declared', () => { + const code = ` +public class Bar { + public String greet() { return "hi"; } +} +`; + const result = extractFromSource('Bar.java', code); + expect(result.nodes.find((n) => n.kind === 'namespace')).toBeUndefined(); + const cls = result.nodes.find((n) => n.kind === 'class' && n.name === 'Bar'); + expect(cls?.qualifiedName).toBe('Bar'); + }); + + it('extracts anonymous-class overrides from `new T() { ... }`', () => { + // The pattern that breaks the trace through `strategy.foo()` in + // libraries like guava's Splitter: the lambda-returned anonymous + // class overrides abstract methods on the base, but without + // extracting those overrides the interface→impl synthesizer has + // nothing to bridge. + const code = ` +package com.example; + +abstract class Base { + abstract int compute(int x); +} + +public class Factory { + public Base make() { + return new Base() { + @Override + int compute(int x) { return x + 1; } + }; + } +} +`; + const result = extractFromSource('Factory.java', code); + + const anon = result.nodes.find((n) => n.kind === 'class' && /Base\$anon@/.test(n.name)); + expect(anon, 'anonymous Base subclass should be extracted as a class').toBeDefined(); + + const compute = result.nodes.find( + (n) => n.kind === 'method' && n.name === 'compute' && n.qualifiedName.includes('$anon@') + ); + expect(compute, 'override method should be a method on the anon class').toBeDefined(); + expect(compute!.qualifiedName).toContain('Factory::make:: r.referenceKind === 'extends' && r.referenceName === 'Base' && r.fromNodeId === anon!.id + ); + expect(extendsRef, 'anon class should carry an `extends Base` reference').toBeDefined(); + + // The enclosing `make` method still emits an instantiates edge to Base — + // anon extraction must not swallow that signal. + const instantiatesRef = result.unresolvedReferences.find( + (r) => r.referenceKind === 'instantiates' && r.referenceName === 'Base' + ); + expect(instantiatesRef, 'enclosing method should still instantiate Base').toBeDefined(); + }); + + it('extracts anonymous-class overrides inside a lambda body', () => { + // The exact guava pattern: a lambda is passed to a constructor, and the + // lambda body returns `new T() { @Override ... }`. The anon class must + // still surface even though it sits inside a lambda_expression node. + const code = ` +package com.example; + +interface Strategy { + java.util.Iterator iterator(String s); +} + +abstract class BaseIter implements java.util.Iterator { + abstract int separatorStart(int start); +} + +public class Splitter { + private final Strategy strategy; + public Splitter(Strategy s) { this.strategy = s; } + + public static Splitter on(char c) { + return new Splitter((seq) -> + new BaseIter() { + @Override + int separatorStart(int start) { return start + 1; } + @Override public boolean hasNext() { return false; } + @Override public String next() { return null; } + }); + } +} +`; + const result = extractFromSource('Splitter.java', code); + + const anon = result.nodes.find((n) => n.kind === 'class' && /BaseIter\$anon@/.test(n.name)); + expect(anon, 'anon BaseIter inside the lambda body should be extracted').toBeDefined(); + + const sepStart = result.nodes.find( + (n) => + n.kind === 'method' && + n.name === 'separatorStart' && + n.qualifiedName.includes('$anon@') + ); + expect(sepStart, 'override inside the lambda-returned anon class should be a method node').toBeDefined(); + }); }); describe('C# Extraction', () => { @@ -1173,6 +1297,54 @@ interface WebSocket { expect(methodNames).toContain('send'); expect(methodNames).toContain('cancel'); }); + + it('wraps top-level declarations in a namespace from package_header', () => { + const code = ` +package com.example.foo + +class Bar { + fun greet(): String = "hi" +} + +fun util(): Int = 42 +`; + const result = extractFromSource('Bar.kt', code); + + const ns = result.nodes.find((n) => n.kind === 'namespace'); + expect(ns?.name).toBe('com.example.foo'); + + const cls = result.nodes.find((n) => n.kind === 'class' && n.name === 'Bar'); + expect(cls?.qualifiedName).toBe('com.example.foo::Bar'); + + const greet = result.nodes.find((n) => n.kind === 'method' && n.name === 'greet'); + expect(greet?.qualifiedName).toBe('com.example.foo::Bar::greet'); + + const util = result.nodes.find((n) => n.kind === 'function' && n.name === 'util'); + expect(util?.qualifiedName).toBe('com.example.foo::util'); + }); + + it('handles a single-segment package', () => { + const code = ` +package foo + +class Bar +`; + const result = extractFromSource('Bar.kt', code); + const cls = result.nodes.find((n) => n.kind === 'class' && n.name === 'Bar'); + expect(cls?.qualifiedName).toBe('foo::Bar'); + }); + + it('does not wrap when no package is declared', () => { + const code = ` +class Bar { + fun greet() = "hi" +} +`; + const result = extractFromSource('Bar.kt', code); + expect(result.nodes.find((n) => n.kind === 'namespace')).toBeUndefined(); + const cls = result.nodes.find((n) => n.kind === 'class' && n.name === 'Bar'); + expect(cls?.qualifiedName).toBe('Bar'); + }); }); describe('Dart Extraction', () => { diff --git a/__tests__/frameworks-integration.test.ts b/__tests__/frameworks-integration.test.ts index bef143488..3e9ef12eb 100644 --- a/__tests__/frameworks-integration.test.ts +++ b/__tests__/frameworks-integration.test.ts @@ -606,3 +606,202 @@ describe('Java end-to-end — field-injected bean trace (issue #389)', () => { cg.close(); }); }); + +describe('JVM FQN imports — end-to-end', () => { + let tmpDir: string | undefined; + afterEach(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + }); + + it('resolves a Kotlin import when the file name differs from the class name', async () => { + // Bar lives in Models.kt — the filesystem-based Java-style path lookup + // (com/example/Bar.kt) misses this; only FQN-via-qualifiedName finds it. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-jvm-imp-')); + fs.writeFileSync( + path.join(tmpDir, 'Models.kt'), + 'package com.example\n\nclass Bar {\n fun greet(): String = "hi"\n}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'Caller.kt'), + 'package com.example.app\n\nimport com.example.Bar\n\nclass App {\n fun run() { Bar().greet() }\n}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const bar = cg.getNodesByKind('class').find((n) => n.qualifiedName === 'com.example::Bar'); + expect(bar, 'Bar should be extracted with package-qualified name').toBeDefined(); + + const importNode = cg.getNodesByKind('import').find((n) => n.name === 'com.example.Bar'); + expect(importNode, 'import statement node should exist').toBeDefined(); + + // The imports edge may originate from the import node OR from a parent + // scope (file / namespace) — accept either, but require that an + // imports-kind edge to Bar exists. + const reachesBar = cg + .getIncomingEdges(bar!.id) + .find((e) => e.kind === 'imports'); + expect(reachesBar, 'an imports edge should resolve to Bar via FQN').toBeDefined(); + + cg.close(); + }); + + it('resolves a Kotlin top-level function import', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-jvm-imp-')); + fs.writeFileSync( + path.join(tmpDir, 'Utils.kt'), + 'package com.example\n\nfun util(): Int = 42\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'Caller.kt'), + 'package com.example.app\n\nimport com.example.util\n\nfun main() { util() }\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const util = cg.getNodesByKind('function').find((n) => n.qualifiedName === 'com.example::util'); + expect(util, 'top-level util() should be extracted under com.example').toBeDefined(); + + const edge = cg.getIncomingEdges(util!.id).find((e) => e.kind === 'imports'); + expect(edge, 'imports edge should reach the top-level function by FQN').toBeDefined(); + }); + + it('resolves cross-language: Kotlin importing a Java class', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-jvm-imp-')); + fs.writeFileSync( + path.join(tmpDir, 'JavaBar.java'), + 'package com.example;\n\npublic class JavaBar {\n public String greet() { return "hi"; }\n}\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'Caller.kt'), + 'package com.example.app\n\nimport com.example.JavaBar\n\nfun main() { JavaBar().greet() }\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const javaBar = cg.getNodesByKind('class').find((n) => n.qualifiedName === 'com.example::JavaBar'); + expect(javaBar, 'JavaBar should be extracted under com.example regardless of language').toBeDefined(); + + const edge = cg.getIncomingEdges(javaBar!.id).find((e) => e.kind === 'imports'); + expect(edge, 'Kotlin caller should resolve its import to the Java class').toBeDefined(); + }); + + it('disambiguates a class-name collision across packages', async () => { + // Two `Bar` classes in different packages — each importer should reach + // ITS Bar, not the other one. This is the central failure mode that + // name-matcher alone cannot disambiguate. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-jvm-imp-')); + fs.writeFileSync( + path.join(tmpDir, 'AlphaBar.kt'), + 'package com.example.alpha\n\nclass Bar { fun who() = "alpha" }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'BetaBar.kt'), + 'package com.example.beta\n\nclass Bar { fun who() = "beta" }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'CallerA.kt'), + 'package app\n\nimport com.example.alpha.Bar\n\nfun a() { Bar().who() }\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'CallerB.kt'), + 'package app\n\nimport com.example.beta.Bar\n\nfun b() { Bar().who() }\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const alphaBar = cg.getNodesByKind('class').find((n) => n.qualifiedName === 'com.example.alpha::Bar'); + const betaBar = cg.getNodesByKind('class').find((n) => n.qualifiedName === 'com.example.beta::Bar'); + expect(alphaBar).toBeDefined(); + expect(betaBar).toBeDefined(); + expect(alphaBar!.id).not.toBe(betaBar!.id); + + // Each Bar receives exactly one imports edge — from its own caller. + const alphaIncoming = cg.getIncomingEdges(alphaBar!.id).filter((e) => e.kind === 'imports'); + const betaIncoming = cg.getIncomingEdges(betaBar!.id).filter((e) => e.kind === 'imports'); + expect(alphaIncoming.length).toBeGreaterThan(0); + expect(betaIncoming.length).toBeGreaterThan(0); + + // Sanity: the edges don't cross — alpha's incoming sources don't include + // beta's filePath and vice versa. + const sourceFiles = (edges: typeof alphaIncoming) => + edges.map((e) => cg.getNode(e.source)?.filePath).filter(Boolean); + expect(sourceFiles(alphaIncoming).some((p) => p?.includes('CallerA.kt'))).toBe(true); + expect(sourceFiles(betaIncoming).some((p) => p?.includes('CallerB.kt'))).toBe(true); + }); +}); + +describe('Java anonymous-class override synthesis — end-to-end', () => { + let tmpDir: string | undefined; + afterEach(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + }); + + it('bridges an abstract base method to overrides inside `new Base() { ... }`', async () => { + // Mirrors guava Splitter: a factory returns `new BaseIter() { + // @Override int separatorStart(...) { ... } }`. Without anon-class + // extraction the override is invisible — Phase 5.5 interface-impl + // has no class to bridge — and an agent investigating `BaseIter.separatorStart` + // can't see its real implementation without reading the file. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-anon-java-')); + fs.writeFileSync( + path.join(tmpDir, 'Splitter.java'), + 'package com.example;\n' + + '\n' + + 'abstract class BaseIter {\n' + + ' abstract int separatorStart(int start);\n' + + '}\n' + + '\n' + + 'public class Splitter {\n' + + ' public BaseIter make() {\n' + + ' return new BaseIter() {\n' + + ' @Override\n' + + ' int separatorStart(int start) { return start + 1; }\n' + + ' };\n' + + ' }\n' + + '}\n' + ); + + const cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + // The anon class is extracted and contains the override. + const anonClass = cg + .getNodesByKind('class') + .find((n) => /BaseIter\$anon@/.test(n.name)); + expect(anonClass, 'anonymous BaseIter subclass should be a class node').toBeDefined(); + + const baseAbstract = cg + .getNodesByKind('method') + .find((n) => n.qualifiedName === 'com.example::BaseIter::separatorStart'); + const anonOverride = cg + .getNodesByKind('method') + .find( + (n) => + n.name === 'separatorStart' && + n.qualifiedName.includes('$anon@') && + n.qualifiedName.startsWith('com.example::Splitter::make::') + ); + expect(baseAbstract, 'base abstract method should be in the graph').toBeDefined(); + expect(anonOverride, 'anon-class override should be in the graph').toBeDefined(); + + // Phase 5.5 interface-impl: the abstract method has a synthesized + // `calls` edge to the anon override. Without this hop the agent + // would have to Read the file to discover the implementation. + const synthEdge = cg + .getOutgoingEdges(baseAbstract!.id) + .find((e) => e.target === anonOverride!.id && e.kind === 'calls'); + expect(synthEdge, 'BaseIter.separatorStart should bridge to anon.separatorStart').toBeDefined(); + expect(synthEdge!.provenance).toBe('heuristic'); + expect((synthEdge!.metadata as { synthesizedBy?: string } | undefined)?.synthesizedBy).toBe( + 'interface-impl' + ); + + cg.close(); + }); +}); diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts index 5b914bfb0..03b8ea6ab 100644 --- a/__tests__/resolution.test.ts +++ b/__tests__/resolution.test.ts @@ -12,7 +12,8 @@ import { CodeGraph } from '../src'; import { Node, UnresolvedReference } from '../src/types'; import { ReferenceResolver, createResolver, ResolutionContext } from '../src/resolution'; import { matchReference } from '../src/resolution/name-matcher'; -import { resolveImportPath, extractImportMappings, loadCppIncludeDirs, clearCppIncludeDirCache } from '../src/resolution/import-resolver'; +import { resolveImportPath, extractImportMappings, resolveJvmImport, loadCppIncludeDirs, clearCppIncludeDirCache } from '../src/resolution/import-resolver'; +import type { UnresolvedRef } from '../src/resolution/types'; import { detectFrameworks, getAllFrameworkResolvers } from '../src/resolution/frameworks'; import { QueryBuilder } from '../src/db/queries'; import { DatabaseConnection } from '../src/db'; @@ -353,6 +354,116 @@ from ..services import auth_service }); }); + describe('JVM FQN Import Resolution', () => { + // Build a ResolutionContext stub whose getNodesByQualifiedName answers + // from a fixed table — the only context method resolveJvmImport touches. + const makeContext = (byQName: Record): ResolutionContext => ({ + getNodesInFile: () => [], + getNodesByName: () => [], + getNodesByQualifiedName: (q) => byQName[q] ?? [], + getNodesByKind: () => [], + fileExists: () => false, + readFile: () => null, + getProjectRoot: () => '', + getAllFiles: () => [], + }); + const node = (id: string, name: string, qualifiedName: string, kind: Node['kind'] = 'class', language: Node['language'] = 'kotlin'): Node => ({ + id, kind, name, qualifiedName, + filePath: 'Models.kt', language, + startLine: 1, endLine: 1, startColumn: 0, endColumn: 0, + updatedAt: 0, + }); + const importRef = (referenceName: string, language: Node['language'] = 'kotlin'): UnresolvedRef => ({ + fromNodeId: 'caller', + referenceName, + referenceKind: 'imports', + line: 1, column: 0, + filePath: 'Caller.kt', + language, + }); + + it('resolves a Kotlin class import by FQN regardless of filename', () => { + const target = node('n1', 'Bar', 'com.example.foo::Bar'); + const ctx = makeContext({ 'com.example.foo::Bar': [target] }); + const result = resolveJvmImport(importRef('com.example.foo.Bar'), ctx); + expect(result?.targetNodeId).toBe('n1'); + expect(result?.resolvedBy).toBe('import'); + }); + + it('resolves a Kotlin top-level function import by FQN', () => { + const util = node('n2', 'util', 'com.example.foo::util', 'function'); + const ctx = makeContext({ 'com.example.foo::util': [util] }); + const result = resolveJvmImport(importRef('com.example.foo.util'), ctx); + expect(result?.targetNodeId).toBe('n2'); + }); + + it('resolves a Java import by FQN', () => { + const target = node('n3', 'Bar', 'com.example.foo::Bar', 'class', 'java'); + const ctx = makeContext({ 'com.example.foo::Bar': [target] }); + const result = resolveJvmImport(importRef('com.example.foo.Bar', 'java'), ctx); + expect(result?.targetNodeId).toBe('n3'); + }); + + it('resolves cross-language: Kotlin importing a Java class', () => { + // The Kotlin file declares `import com.example.JavaBar` — the target is + // a Java class node. JVM interop means the resolver doesn't care about + // the source language of the target, only that the FQN matches. + const target = node('n4', 'JavaBar', 'com.example::JavaBar', 'class', 'java'); + const ctx = makeContext({ 'com.example::JavaBar': [target] }); + const result = resolveJvmImport(importRef('com.example.JavaBar'), ctx); + expect(result?.targetNodeId).toBe('n4'); + }); + + it('disambiguates a name collision across packages', () => { + // Two classes named `Bar` in different packages. Each import resolves + // to the one whose FQN matches — not to "whichever was found first". + const barA = node('n5a', 'Bar', 'com.example.alpha::Bar'); + const barB = node('n5b', 'Bar', 'com.example.beta::Bar'); + const ctx = makeContext({ + 'com.example.alpha::Bar': [barA], + 'com.example.beta::Bar': [barB], + }); + expect(resolveJvmImport(importRef('com.example.alpha.Bar'), ctx)?.targetNodeId).toBe('n5a'); + expect(resolveJvmImport(importRef('com.example.beta.Bar'), ctx)?.targetNodeId).toBe('n5b'); + }); + + it('returns null for wildcard imports', () => { + const ctx = makeContext({}); + expect(resolveJvmImport(importRef('com.example.foo.*'), ctx)).toBeNull(); + }); + + it('returns null for unqualified names', () => { + // A single-segment name has no package; nothing to look up by FQN. + const ctx = makeContext({ 'Bar': [node('n6', 'Bar', 'Bar')] }); + expect(resolveJvmImport(importRef('Bar'), ctx)).toBeNull(); + }); + + it('returns null for non-JVM languages', () => { + const target = node('n7', 'Bar', 'com.example::Bar'); + const ctx = makeContext({ 'com.example::Bar': [target] }); + expect(resolveJvmImport(importRef('com.example.Bar', 'typescript'), ctx)).toBeNull(); + }); + + it('returns null for non-imports reference kinds', () => { + // The resolver intentionally only acts on `imports` refs; ordinary + // `calls`/`extends` refs fall through to the framework + name-matcher + // strategies. + const target = node('n8', 'Bar', 'com.example::Bar'); + const ctx = makeContext({ 'com.example::Bar': [target] }); + const ref: UnresolvedRef = { + fromNodeId: 'caller', referenceName: 'com.example.Bar', + referenceKind: 'calls', line: 1, column: 0, + filePath: 'Caller.kt', language: 'kotlin', + }; + expect(resolveJvmImport(ref, ctx)).toBeNull(); + }); + + it('returns null when the FQN is not in the index', () => { + const ctx = makeContext({}); + expect(resolveJvmImport(importRef('com.example.Unknown'), ctx)).toBeNull(); + }); + }); + describe('Framework Detection', () => { it('should detect React framework', () => { const context: ResolutionContext = { @@ -848,7 +959,7 @@ public class Handler { const use = cg .getNodesByKind('method') - .find((n) => n.qualifiedName === 'Handler::use'); + .find((n) => n.qualifiedName === 'com.example.web::Handler::use'); expect(use).toBeDefined(); const calls = cg.getOutgoingEdges(use!.id).filter((e) => e.kind === 'calls'); expect(calls.length).toBeGreaterThanOrEqual(1); diff --git a/src/extraction/languages/java.ts b/src/extraction/languages/java.ts index 638533f0d..4e8cbc7a1 100644 --- a/src/extraction/languages/java.ts +++ b/src/extraction/languages/java.ts @@ -56,4 +56,12 @@ export const javaExtractor: LanguageExtractor = { } return null; }, + packageTypes: ['package_declaration'], + extractPackage: (node, source) => { + // package_declaration → scoped_identifier or identifier (single-segment) + const id = node.namedChildren.find( + (c: SyntaxNode) => c.type === 'scoped_identifier' || c.type === 'identifier' + ); + return id ? source.substring(id.startIndex, id.endIndex).trim() : null; + }, }; diff --git a/src/extraction/languages/kotlin.ts b/src/extraction/languages/kotlin.ts index 19c386242..e590d4481 100644 --- a/src/extraction/languages/kotlin.ts +++ b/src/extraction/languages/kotlin.ts @@ -235,4 +235,10 @@ export const kotlinExtractor: LanguageExtractor = { } return null; }, + packageTypes: ['package_header'], + extractPackage: (node, source) => { + // package_header → identifier (dotted: `com.example.foo`) + const id = node.namedChildren.find((c: SyntaxNode) => c.type === 'identifier'); + return id ? source.substring(id.startIndex, id.endIndex).trim() : null; + }, }; diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index d7d5a45e3..6c04fbaeb 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -212,4 +212,16 @@ export interface LanguageExtractor { * Returns the callee name if this node is a bare call, or undefined if not. */ extractBareCall?: (node: SyntaxNode, source: string) => string | undefined; + + /** + * Node types representing a file-level package/namespace declaration + * (e.g. Kotlin `package_header`, Java `package_declaration`). When set, + * the core wraps every top-level declaration in an implicit `namespace` + * node carrying the FQN, so cross-file import resolution can match by + * qualifiedName instead of filename (Kotlin filename ≠ class name). + */ + packageTypes?: string[]; + + /** Extract the dotted package name from a package declaration node. */ + extractPackage?: (node: SyntaxNode, source: string) => string | null; } diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index d291c0704..6d76f38ae 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -215,7 +215,17 @@ export class TreeSitterExtractor { // Push file node onto stack so top-level declarations get contains edges this.nodeStack.push(fileNode.id); + + // File-level package declaration (Kotlin/Java). Creates an implicit + // `namespace` node wrapping every top-level declaration so their + // qualifiedName carries the FQN — required for cross-file import + // resolution on JVM languages where filename ≠ class name. + const packageNodeId = this.extractFilePackage(this.tree.rootNode); + if (packageNodeId) this.nodeStack.push(packageNodeId); + this.visitNode(this.tree.rootNode); + + if (packageNodeId) this.nodeStack.pop(); this.nodeStack.pop(); } catch (error) { const msg = error instanceof Error ? error.message : String(error); @@ -378,6 +388,17 @@ export class TreeSitterExtractor { // their own `calls` refs. else if (INSTANTIATION_KINDS.has(nodeType)) { this.extractInstantiation(node); + // Java/C# `new T(...) { ... }` — anonymous class with body. Without + // extracting it as a class node + its methods, the interface→impl + // synthesizer (Phase 5.5) can't bridge T's abstract methods to the + // anonymous overrides, and an agent investigating a call through T + // (`strategy.iterator(...)` where strategy is a Strategy lambda body) + // has to Read the file to find the actual implementation. + const anonBody = this.findAnonymousClassBody(node); + if (anonBody) { + this.extractAnonymousClass(node, anonBody); + skipChildren = true; + } } // (Decorator handling lives inside the symbol-creating extractors // — extractClass / extractFunction / extractProperty — because the @@ -490,6 +511,33 @@ export class TreeSitterExtractor { return null; } + /** + * Find a `packageTypes` child under the root, create a `namespace` node + * for it, and return its id so the caller can scope top-level + * declarations underneath. Returns null when no package header is + * present (script files, .kts without a package). + */ + private extractFilePackage(rootNode: SyntaxNode): string | null { + const types = this.extractor?.packageTypes; + if (!types || types.length === 0 || !this.extractor?.extractPackage) return null; + + let pkgNode: SyntaxNode | null = null; + for (let i = 0; i < rootNode.namedChildCount; i++) { + const child = rootNode.namedChild(i); + if (child && types.includes(child.type)) { + pkgNode = child; + break; + } + } + if (!pkgNode) return null; + + const pkgName = this.extractor.extractPackage(pkgNode, this.source); + if (!pkgName) return null; + + const ns = this.createNode('namespace', pkgName, pkgNode); + return ns?.id ?? null; + } + /** * Build qualified name from node stack */ @@ -1747,6 +1795,78 @@ export class TreeSitterExtractor { } } + /** + * Find a `class_body` child of an `object_creation_expression` — the + * marker for an anonymous class (`new T() { ... }`). Returns the body + * node so the caller can walk it as the anon class's members. + */ + private findAnonymousClassBody(node: SyntaxNode): SyntaxNode | null { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + // Java: `class_body`. C# uses the same node kind. + if (child && (child.type === 'class_body' || child.type === 'declaration_list')) { + return child; + } + } + return null; + } + + /** + * Extract a Java/C# anonymous class — `new T() { ...members }`. Emits a + * `class` node named ``, an `extends` reference to T (so + * Phase 5.5 interface-impl can bridge), and walks the body so its + * `method_declaration` members become method nodes under the anon class. + * + * Why this matters: without anon-class extraction, the overrides inside + * a lambda-returned `new T() { @Override int foo(){...} }` are not nodes, + * so a call through T.foo (the abstract parent method) has no static + * target — the agent has to Read the file to find the implementation. + */ + private extractAnonymousClass(node: SyntaxNode, body: SyntaxNode): void { + if (!this.extractor) return; + + // The instantiated type sits in the same field/position that + // extractInstantiation reads from. Use the same lookup so the anon + // class's `extends` target matches the `instantiates` edge. + const typeNode = + getChildByField(node, 'constructor') || + getChildByField(node, 'type') || + getChildByField(node, 'name') || + node.namedChild(0); + let typeName = typeNode ? getNodeText(typeNode, this.source) : 'Object'; + const ltIdx = typeName.indexOf('<'); + if (ltIdx > 0) typeName = typeName.slice(0, ltIdx); + const lastDot = Math.max(typeName.lastIndexOf('.'), typeName.lastIndexOf('::')); + if (lastDot >= 0) typeName = typeName.slice(lastDot + 1).replace(/^[:.]/, ''); + typeName = typeName.trim() || 'Object'; + + const anonName = `<${typeName}$anon@${node.startPosition.row + 1}>`; + const classNode = this.createNode('class', anonName, node, {}); + if (!classNode) return; + + // The anonymous class implicitly extends/implements the named type. + // We can't tell at extraction time whether T is a class or an interface, + // so emit `extends`. Resolution will still bind T to whatever it is, and + // Phase 5.5 (which already handles both `extends` and `implements`) will + // bridge T's methods to the override names found in the anon body. + this.unresolvedReferences.push({ + fromNodeId: classNode.id, + referenceName: typeName, + referenceKind: 'extends', + line: typeNode?.startPosition.row ?? node.startPosition.row, + column: typeNode?.startPosition.column ?? node.startPosition.column, + }); + + // Walk the body's children so method_declaration nodes inside become + // method nodes scoped to the anon class. + this.nodeStack.push(classNode.id); + for (let i = 0; i < body.namedChildCount; i++) { + const child = body.namedChild(i); + if (child) this.visitNode(child); + } + this.nodeStack.pop(); + } + /** * Scan `declNode` and its preceding siblings (within the parent's * named children) for decorator nodes, emitting a `decorates` @@ -1876,6 +1996,14 @@ export class TreeSitterExtractor { // about `call_expression`, so constructor invocations // produced no graph edges at all. this.extractInstantiation(node); + // Anonymous class with body: `new T() { ... }` (Java/C#). Extract as + // a class so interface-impl synthesis (Phase 5.5) can bridge T's + // methods to the overrides — same rationale as in visitNode. + const anonBody = this.findAnonymousClassBody(node); + if (anonBody) { + this.extractAnonymousClass(node, anonBody); + return; + } } else if (this.extractor!.extractBareCall) { const calleeName = this.extractor!.extractBareCall(node, this.source); if (calleeName && this.nodeStack.length > 0) { diff --git a/src/resolution/import-resolver.ts b/src/resolution/import-resolver.ts index 7a7790371..bc493704d 100644 --- a/src/resolution/import-resolver.ts +++ b/src/resolution/import-resolver.ts @@ -948,6 +948,41 @@ export function extractReExports(content: string, language: Language): ReExport[ /** * Resolve a reference using import mappings */ +/** + * JVM (Java / Kotlin) imports use fully-qualified names (`import + * com.example.foo.Bar`) decoupled from filenames, so the JS/Python + * style filesystem path lookup misses them whenever the file isn't + * named after its primary symbol (Kotlin `Utils.kt` exporting `Bar`, + * top-level fns, extension fns). Resolve them through the + * `qualifiedName` index instead — populated by the package_header / + * package_declaration namespace wrappers in the extractor. + */ +export function resolveJvmImport( + ref: UnresolvedRef, + context: ResolutionContext +): ResolvedRef | null { + if (ref.referenceKind !== 'imports') return null; + if (ref.language !== 'java' && ref.language !== 'kotlin') return null; + + const fqn = ref.referenceName; + const lastDot = fqn.lastIndexOf('.'); + if (lastDot <= 0) return null; + const pkg = fqn.substring(0, lastDot); + const sym = fqn.substring(lastDot + 1); + // Wildcard imports (`com.example.*`) deliberately punt to name-matcher. + if (sym === '*') return null; + + const candidates = context.getNodesByQualifiedName(`${pkg}::${sym}`); + if (candidates.length === 0) return null; + + return { + original: ref, + targetNodeId: candidates[0]!.id, + confidence: 0.95, + resolvedBy: 'import', + }; +} + export function resolveViaImport( ref: UnresolvedRef, context: ResolutionContext diff --git a/src/resolution/index.ts b/src/resolution/index.ts index c26157d17..5158e8301 100644 --- a/src/resolution/index.ts +++ b/src/resolution/index.ts @@ -17,7 +17,7 @@ import { ImportMapping, } from './types'; import { matchReference } from './name-matcher'; -import { resolveViaImport, extractImportMappings, extractReExports, loadCppIncludeDirs } from './import-resolver'; +import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs } from './import-resolver'; import { detectFrameworks } from './frameworks'; import { synthesizeCallbackEdges } from './callback-synthesizer'; import { loadProjectAliases, type AliasMap } from './path-aliases'; @@ -528,6 +528,14 @@ export class ReferenceResolver { // Also check capitalized receiver (instance-method resolution) const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1); if (this.knownNames.has(capitalized)) return true; + // JVM FQN: `com.example.foo.Bar` — the only useful segment is the + // last one (`Bar`); the earlier check finds `example.foo.Bar` which + // never matches a node name. + const lastDot = name.lastIndexOf('.'); + if (lastDot > dotIdx) { + const tail = name.substring(lastDot + 1); + if (tail && this.knownNames.has(tail)) return true; + } } const colonIdx = name.indexOf('::'); if (colonIdx > 0) { @@ -588,6 +596,12 @@ export class ReferenceResolver { return null; } + // JVM FQN imports skip framework/name-matcher: `import com.example.Bar` + // resolves directly through the qualifiedName index, which is unambiguous + // even when several `Bar` classes exist in different packages. + const jvmImport = resolveJvmImport(ref, this.context); + if (jvmImport) return jvmImport; + const candidates: ResolvedRef[] = []; // Strategy 1: Try framework-specific resolution From 02935d777a848e067778ad792c636b4c6e63082d Mon Sep 17 00:00:00 2001 From: Ran <8403607+eddieran@users.noreply.github.com> Date: Wed, 27 May 2026 12:38:28 +0800 Subject: [PATCH 03/10] test(vitest): unblock subprocess MCP tests on Node >= 25 dev machines (#478) (#479) Vitest already inherits process.env into every spawned `codegraph serve --mcp` child, but on Node >= 25 the CLI's hard-block (src/bin/codegraph.ts) kills the child before it can respond. Set CODEGRAPH_ALLOW_UNSAFE_NODE=1 via test.env so the test suite is green regardless of the contributor's Node version; the runtime guard itself is unchanged for end users. --- vitest.config.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vitest.config.ts b/vitest.config.ts index 2449a989e..4a5ad904b 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -5,6 +5,20 @@ export default defineConfig({ globals: true, environment: 'node', include: ['__tests__/**/*.test.ts'], + /** + * Several MCP integration tests (mcp-daemon, mcp-initialize, mcp-ppid-watchdog, + * mcp-roots) spawn `dist/bin/codegraph.js serve --mcp` with `process.execPath` + * and rely on the child inheriting `process.env`. On a Node >= 25 dev machine + * the CLI's hard-block (src/bin/codegraph.ts) would otherwise exit the child + * before it ever responds, so every spawn-based test times out — see #478. + * + * Setting the override here keeps the CLI's runtime guard intact for end + * users (it's still enforced when `codegraph` is invoked directly) while + * letting the test suite run on whatever Node the contributor happens to + * have installed. CI on Node 22/23 is unaffected — the guard doesn't fire + * there, so the variable is a no-op. + */ + env: { CODEGRAPH_ALLOW_UNSAFE_NODE: '1' }, coverage: { provider: 'v8', reporter: ['text', 'json', 'html'], From 71935e37c24bddeb18886006faa4262af06ac89e Mon Sep 17 00:00:00 2001 From: Colby Mchenry Date: Thu, 28 May 2026 12:38:03 -0500 Subject: [PATCH 04/10] feat(mcp): multi-module Go trace-quality + small-repo retrieval tuning (#494) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(go): generated-file down-rank + gRPC stub-impl bridge + trace-failure inlining Multi-pronged fix to make codegraph competitive on Go multi-module repos (cosmos-sdk, etcd) where it previously lost or tied. Driven by an 8-question agent-eval audit across cobra, gin, prometheus, cosmos-sdk, and etcd: the baseline had codegraph losing ~60% on cost on cosmos-sdk and mixed on etcd deep cross-module flows, while winning cleanly on the single-module and non-protobuf-heavy repos. Diagnostics ruled OUT `go.work` parsing as the gap (prometheus crushes without it). The actual failure modes were generated-file noise warping disambiguation, missing gRPC interface→impl bridge in structural-typing Go, and trace's failure path triggering 3-5 follow-up tool calls instead of inlining the material the agent needed. Changes: - New `src/extraction/generated-detection.ts` — path-pattern classifier for `.pb.go`, `.pulsar.go`, `_grpc.pb.go`, `_mock.go`, `_mocks.go`, `mock_*.go`, `.generated.[jt]sx?`, `_pb2(_grpc)?.py`, `.pb.{cc,h}`, `.g.dart`, `.freezed.dart`. Applied as a stable sort tiebreaker in `findSymbol`, `findAllSymbols`, `codegraph_search` (MCP + CLI), `codegraph_explore` file ranking, and context formatter Entry Points / Related Symbols / Code blocks. Cosmos's `msgServer.Send` now ranks #3 instead of #9 on a `Send` search. - New `goGrpcStubImplEdges` synthesizer in `callback-synthesizer.ts` — detects `UnimplementedXxxServer` structs in generated files, identifies their RPC methods (excluding `mustEmbed*` / `testEmbeddedByValue` gRPC markers), and emits `calls` edges to the matching methods on any non-generated struct whose method-name set is a superset. Closes Go's structural-typing gap that the existing `interfaceOverrideEdges` (Java / Kotlin only) couldn't bridge. 467 bridge edges on cosmos-sdk; bank's `UnimplementedMsgServer::Send` points to `x/bank/keeper/msg_server.go` only, not to `msgClient` siblings or mock files. - Trace-failure rewrite (`handleTrace`) — when no static path connects endpoints, instead of telling the agent to call `codegraph_node` (a 3-4-call fan-out), inline both endpoints' bodies (120 lines / 3600 chars per endpoint), their callers (≤6), and callees (≤8) in one response. - Trace endpoint-pairing improvements — scores every `from`×`to` candidate combo by shared directory prefix and tries the best-paired pair first (the full candidate set, not just FTS top-5). A less-canonical-path penalty (`enterprise/`, `contrib/`, `examples/`, `vendor/`, `third_party/`, `deprecated/`, `legacy/`) ensures the canonical-module pair wins even when a side-experiment shares more of its directory prefix. Find-path probe budget capped at 20 pairs. - Test-file deprioritization in `codegraph_explore` `isLowValue` — adds suffix patterns (`_test.go`, `_spec.rb`, `.test.ts`, `.spec.tsx`, `Test.java`, `Spec.kt`) alongside the existing directory-style patterns. Otherwise etcd's `watchable_store_test.go` consumes 5K chars of explore budget that should go to the hand-written flow source. Tests: - New `__tests__/generated-detection.test.ts` (4 unit tests) pins the suffix patterns. - New "Go gRPC stub→impl synthesis" integration test suite in `frameworks-integration.test.ts` (2 tests): positive bridge from stub to hand-written impl, AND the precision case (don't bridge to a generated sibling like `msgClient` in the same .pb.go). - Full suite: 1076/1076 pass. Empirical (post-fix, n=2 average per question): | Repo / Q | WITH | WITHOUT | Reads (W/WO) | Time (W/WO) |-------------------------|------------|-------------|--------------|------------ | cobra (parse cmds) | $0.27 | $0.27 | 0 / 4 | 39s / 60s | prometheus (scrape→TSDB)| $0.63 | $0.70 | 0 / 6 | 106s/143s | cosmos-sdk Q1 (MsgSend) | $0.41 | $0.26 | 1 / 2 | 67s / 64s | cosmos-sdk Q2 (Delegate)| $0.47 | $0.46 | 0 / 5 | 50s / 73s | cosmos-sdk Q3 (gov tally)| $0.34 | $0.31 | 1.5 / 3 | 54s / 76s | etcd Q1 (Put→raft) | $0.65 | $0.78 | 0 / 4 | 98s / 129s | etcd Q2 (watch) | $0.36 | $0.50 | 0 / 4+ | 58s / 89s Codegraph wins on reads + time on every question. Cost is mixed: 3 clean wins, 3 tied (within 10%), 1 stubborn cost loss on the grep-favored Q1. Compared to baseline, the cosmos-sdk cost-gap collapsed from -60% to -15% on average, and Q3 went from a 75% loss to a tie. Raw run artifacts in `/tmp/cg-finalv2-*/` and `/tmp/cg-final-*/`. Memory written at `project_go_multi_module_audit.md` for the methodology + before/after numbers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat(mcp): auto-inline trace in codegraph_context for flow queries When a codegraph_context task contains a flow keyword ("trace", "from", "reach", "flow", "propagat", "how does", "how do") AND at least two distinct PascalCase / camelCase identifiers, internally invoke trace between the first two extracted symbols and splice the trace body into the context response. Conservative trigger by design: false positives waste one graph query; false negatives just fall back to the agent calling trace itself (existing path-proximity wiring handles either case). Goal: collapse the agent's typical context → trace → explore sequence into a single context call for clear flow queries, closing the remaining cost-overhead gap on multi-call patterns. The path-proximity + less-canonical-path scoring + the trace-failure-inlined-bodies behavior already let the inline trace land on the right endpoint pair and return enough material that no follow-up codegraph_node/Read is needed. Doesn't fire on: - cobra's "How does cobra parse commands and flags?" (no PascalCase symbols) — verified in regression run, no behavior change ($0.260 WITH vs $0.257 WITHOUT, basically tied) - queries where the agent doesn't call codegraph_context at all (cosmos Q1 in the audit went search → trace → node → trace → node) Tests: 1076/1076 still pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat(mcp): trace failure inlines TO file siblings to displace node fan-out The cosmos-Q1 audit revealed a static-resolution gap: msgServer.Send's *real* next hop is `k.Keeper.SendCoins` — an interface-method call on an embedded field that tree-sitter can't resolve. The static getCallees list for msgServer.Send is all utility/error functions (StringToBytes, Wrapf, …). The actual flow (SendCoins → subUnlockedCoins → addCoins → setBalance) lives entirely inside `x/bank/keeper/send.go`, which is also where the TO endpoint (setBalance) lives. When trace fails (no static path), inline the **top 5 functions/methods in the destination file**, ordered by line-distance from the TO node. This catches the flow that interface-method calls obscure — the canonical "k.." pattern in Go, also relevant to Java dependency-injection / Rails service-object dispatch / etc. where interface dispatch hides the real call. Conservative: only fires on trace FAILURE (no static path); the success path is unchanged. Per-body cap (40 lines / 1200 chars), top 5 siblings. Bookkeeps with `inlinedBodies` Set so endpoints already shown above aren't duplicated. Result: cosmos-Q1 — historically the most stubborn cost loss (-2.2× to -39% across the audit) — flipped to a clean WIN: $0.257 WITH vs $0.449 WITHOUT (-43%), 34s vs 79s, 0 Reads vs 2 Reads + 5 Greps, 5 codegraph calls vs 12. Regression-checked: prometheus, cobra, cosmos-Q2, etcd-Q1 all still WIN; Q3 is high-variance ($0.30-$0.45 range historically) and fell within that on this run. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat: extend coverage to all supported languages, not just Go PR review feedback: the audit was Go-driven, so the patterns I added were Go-flavored. Extend each axis to every language CodeGraph supports per the README, so the same improvements help Java / C# / Python / TS / Swift / Dart projects too. **generated-detection.ts** — Added patterns for: - TS/JS: `.gen.[jt]sx?`, `.pb.[jt]s`, `_pb.[jt]s`, `_grpc_pb.[jt]s` (ts-proto, gRPC-web, Apollo / GraphQL codegen, Hasura). - Python: `_pb2.pyi` (mypy stubs from protobuf). - C#: `.g.cs` (T4 / Razor codegen), `Grpc.cs` (protoc-gen-csharp). - Java: `OuterClass.java` (protoc-gen-java), `Grpc.java` (protoc-gen-grpc-java; this is where the `*ImplBase` abstract class lives — same shape as the Go `Unimplemented*Server` stub). - Swift: `.pb.swift` (protoc-gen-swift). - Dart: `.pb.dart`, `.pbgrpc.dart`, `.chopper.dart`. - Rust: `.generated.rs`. **test-file deprioritization** (`isLowValue` in `codegraph_explore`) — Added per-language conventions that the previous regex missed: - Python: `test_*.py` (pytest discovery) and `*_test.py`. - Ruby: `*_test.rb` (minitest) — `*_spec.rb` already covered. - C#: `*Tests.cs`, `*Test.cs`, `*Spec.cs`. - Swift: `*Tests.swift` (XCTest). - Dart: `*_test.dart`. **IFACE_OVERRIDE_LANGS** in `callback-synthesizer.ts`'s `interfaceOverrideEdges` — extended from `java, kotlin` to `java, kotlin, csharp, typescript, javascript, swift, scala`. Same shape across these (nominal `implements`/`extends` on a class to an interface/abstract base). Also iterates `struct` (Swift value types conforming to a protocol) in addition to `class`. The existing matchesSymbol-style logic and `getOutgoingEdges(..., ['implements', 'extends'])` work unchanged. **CLAUDE.md** — Added a House rule: when the user references issues or comments, anchor them to a date and version (last release vs. last main commit vs. current branch tip) BEFORE concluding a fix is incomplete. Issue #388 comments from May 25-27 were responding to the released v0.9.5 / merged-PR-469 state — not to this branch's in-flight work. The new rule walks through the disambiguation: `grep -m1 '^## \[' CHANGELOG.md` for release version, `git log --first-parent main -1` for main tip. Tests: 1076/1076 still pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat(mcp): tiny-repo tool gating + shorter tool descriptions Two cumulative changes targeting the small-repo cost gap surfaced by the cross-language audit: 1. **Tool descriptions trimmed** (~2.1KB total saved across 10 tools). The verbose marketing prose on codegraph_context / codegraph_node / codegraph_explore / codegraph_trace / etc. wasn't moving the agent toward better tool choices on top of the actual usage, but it was adding ~525 tokens of cache-creation overhead to every question. The trimmed descriptions keep the operational hints (e.g. "Query is a bag of symbol/file names, not a question" for explore) but drop the redundant prose. 2. **Dynamic tiny-repo tool gating** in `ToolHandler.getTools()`. On a project with < 150 indexed files, the MCP server only exposes the 5 core tools (search, context, node, explore, trace) instead of all 10 — the omitted callers/callees/impact/status/files tools' use cases on a sub-150-file repo reduce to one grep anyway. The MCP tool-defs overhead is the #1 source of cost loss on tiny repos (~$0.10-0.15 fixed cache-creation per question); cutting 5 tools drops that by ~50%. Effect on ky (~25 files, the worst pre-fix offender): - Before: $0.59 WITH vs $0.42 WITHOUT (+42% loss, n=1) - After: $0.32 WITH vs $0.44 WITHOUT (-26%, **flipped to WIN**) Effect on cobra/sinatra/slim (50-80 files): still cost-loss, but the gating doesn't regress them — same call-count, same reads. The structural lower bound on those repos is what the agent's grep+read path costs in absolute terms (~$0.20-0.30). Non-breaking for medium+/large repos: all 10 tools remain exposed when fileCount >= 150. Tests: 1076/1076 still pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat(mcp): combined tiny-tier — smaller explore + tool gating (cobra/ky flip to WIN) Combines the tool gating from the previous commit with a matching explore-budget cut for projects under 150 files. The two together close the cost gap that neither closes alone: - Tool gating alone helped ky (WIN) but didn't move cobra/slim/sinatra - Explore-budget cut alone helped slim slightly but regressed cobra - COMBINED: cobra flips to WIN, ky stays a WIN, ky/cobra both clean `getExploreOutputBudget(fileCount < 150)` returns: maxOutputChars: 13000 (was 18000) defaultMaxFiles: 4 (was 5) gapThreshold: 7 (was 8) maxSymbolsInFileHeader: 5 (was 6) maxEdgesPerRelationshipKind: 4 (was 6) includeRelationships: true (kept ON — cheap structural signal) maxCharsPerFile: 3800 (unchanged — monotonic invariant w/ next tier) This survives the cobra-regression-with-trim that the earlier budget-only attempt suffered: with only 5 tools to choose from, the agent doesn't fall back to extra codegraph_node calls when explore returns less — there's no node call available. Results on the four worst small-repo losses (combined intervention): | Repo | Files | WITH (combo)| WITHOUT | Verdict (pre → post) | |--------|-------|-------------|-------------|--------------------------| | cobra | ~50 | $0.25 | $0.31 | loss → **WIN** (-19%) | | ky | ~25 | $0.39 | $0.39 | -42% → tied | | slim | ~80 | $0.31 | $0.24 | LOSS 31% → still LOSS | | sinatra| ~60 | $0.30 | $0.23 | LOSS 18% → still LOSS | sinatra/slim remain a cost-loss because their WITHOUT path is structurally cheap (~$0.20 — fewer than 4 cheap grep+read calls). Codegraph can't beat that absolute floor with any meaningful response. Both still WIN on time + reads + tool-call count. Tests: tier boundary cases updated to cover the new <150 / 150-499 / 500-4999 / 5000-14999 / >=15000 progression. Off-by-one guard updated to include the new 149↔150 boundary. All 1076 tests pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat(context): trim maxNodes default to 8 on tiny repos On a <150-file project the entire repo is grep-able in one turn, so the 20-node default `codegraph_context` was paying for a graph subset that exceeds the agent's actual question. Cutting the tiny-repo default to 8 (typical 1-3 entry points + their immediate 1-hop neighbors) reduces the context-tool response body without hitting sufficiency on the flow shapes small repos actually contain. Non-breaking: the agent can still pass an explicit `maxNodes` to override; medium+ repos (>=150 files) keep the 20-node default. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * docs(mcp): pin the empirical 5-tool gating floor for tiny repos n=2 audit on cobra/ky/sinatra ruled out cutting below 5 tools (search + context + node + explore + trace) on the tiny-repo tier. The smaller 3-tool gate (search + context + trace) saved ~$0.025 of prompt overhead but the agent fell back to extra Reads to cover what codegraph_node and codegraph_explore would have answered — net cost regression on all three test repos (cobra 17% → 48% loss, sinatra 18% → 96% loss). Documented inline so future tuners don't re-try this dead-end. No behavior change beyond the comment: the 5-tool gate remains the production setting. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * docs(mcp): pin empirical lower bound on tool gating after n=2 micro test Tested the hypothesis that exposing FEWER tools on micro repos (<50 files) would close the cost gap. Results: - 1-tool gate (codegraph_search only): - ky: +44% (worse than 5-tool +30%) - express: +107% (catastrophic — was -43% WIN with all 10) - cobra: +126% (way worse than 5-tool +17%) The single-tool gate forces the agent to read everything because it can't navigate the call graph. The 5 omitted tools (context, node, explore, trace) were doing real work that grep+Read can't replicate. Conclusion: 5 tools (search + context + node + explore + trace) is the empirical lower bound on the tiny-repo tier. Cutting below regresses EVERY tested repo. The remaining ~$0.04-0.08 of structural cost overhead on tiny repos is unavoidable without sacrificing the value codegraph provides at that scale (which would also make WITH = WITHOUT, defeating the install). Comment documents the dead-ends so future tuners don't relitigate. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) * feat(mcp): iter3/iter4 — raise tool-gate to 500, sufficiency steering in context, hard-exclude low-value files Three layered changes targeting the sinatra/slim/small-repo cost gap that iter2's body-shrink failed to close (smaller bodies just pushed the agent to Read instead): 1. **Tool-gate threshold 150 → 500** (`TINY_REPO_FILE_THRESHOLD`). Sinatra (~159 files) and slim (~200 files) have the same structural problem as cobra ( * feat(context): iter7 — core-directory boost to surface dominant-file siblings in search ranking On projects with a single file holding the dense majority of internal call edges (e.g. sinatra's `lib/sinatra/base.rb` at ~85% of in-file edges), text search was favoring small focused extension files over the core file. A small focused file like `multi_route.rb` wins on verbatim name match + file-size normalization, burying the 1500-line core file's longer method names (e.g. `route!` vs `route`). Fix: detect the "dominant file" — the file whose in-file edge count is ≥3× the next candidate's — then add +25 to all results sharing its directory prefix. This pulls the core file's siblings above sibling-package extensions without hardcoding any repo structure. `getDominantFile()` excludes test/spec files and generated files (e.g. etcd's `rpc.pb.go` has 4× the in-file edges of `server.go` and would otherwise hijack the boost toward generated protobuf stubs). SQL pulls the top 20 candidates; path-pattern filtering handles what SQLite LIKE can't express. * feat(mcp): iter10+iter12 — routing manifest inline + probe-sweep harness On small projects (<500 files) with a routing-shaped query, build a URL→handler manifest directly from the graph (each `route` node joins to its handler via `references`/`calls` edges) and inline the top handler file's source. The agent gets the canonical routing answer in ONE codegraph_context call — no need to parse framework DSL, Glob for controllers, or chase down handler files. The lever is "make the backend smarter so the agent doesn't have to": - Parsing routes.rb / routes/api.php / urls.py DSL is the agent's job in the WITHOUT arm. Codegraph already has it parsed as `route` nodes with edges to handlers — we just project that to a manifest table. - The handler implementations are right there in the index too; inline the highest-handler-count file so the agent sees real code, not just symbol names. Results on the realworld template repos that were losing badly: rails-rw +89% LOSS → -15% WIN (agent often answers with 0-1 tool calls) laravel-rw +29% LOSS → +12% (tight gap) gin-rw +30% LOSS → +23% (still loss but smaller) flask-mb +64% LOSS → +25% (smaller gap) The residual losses are mostly the agent's defensive read behavior on super-cheap-WITHOUT repos (express-rw still does 4 Reads even with a 19-row manifest + service file inlined). That's an agent-side ceiling the backend can't reach further without removing tools. Also lands `scripts/agent-eval/probe-sweep.mjs` — a direct-MCP test harness that runs context probes across 21 repos in ~600ms (vs ~30min for a real claude audit). Enables rapid iteration on backend changes: edit tools.ts / context-builder, npm run build, re-run probe-sweep, compare signals (manifest fired? handler file inlined? response size?) before paying for a claude run. Co-Authored-By: Claude Opus 4.7 (1M context) * fix(mcp): first tool call awaits catch-up sync (no stale rows for deleted files) `MCPEngine.catchUpSync()` reconciles the index against the working tree after open (catching `git pull`/`checkout`/`rebase` and any edits or deletes made while no server was running). It was fire-and-forget — so a tool call landing in the first ~50-300ms could race past it and serve rows for files that no longer exist on disk. The per-file staleness banner can't help here, because that signal is populated by the file watcher (not by catch-up). The fix: `catchUpSync()` now pushes its promise into `ToolHandler` via `setCatchUpGate(p)`; the first `execute()` call awaits the gate and then clears it. Subsequent calls pay nothing. Catch-up rejections are logged by the engine and swallowed by the handler so a transient sync failure never breaks tools. Most visible on the "deleted everything between sessions" case, where MCP previously returned stale rows pointing at non-existent files. Validated end-to-end on a 10,640-file VS Code index: with the gate, a codegraph_search for "ExtensionHost" against an empty (but stale-DB) directory returns "No results found" after the catch-up drains the DB; without the gate, the same call returns 10 stale hits. Co-Authored-By: Claude Opus 4.7 (1M context) * docs(changelog): cover small-repo retrieval tuning + auto-trace + iface-override expansion Add entries for work that landed on this branch but wasn't yet in [Unreleased]: tiny-repo tool gating + sufficiency steering + budget tier, auto-inline trace in codegraph_context, routing manifest inline, core-directory ranking boost, JVM-only interfaceOverrideEdges extended to C#/TS/JS/Swift/Scala, and the shorter tool descriptions. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) --- ...degraph-tool-surface-rethink-2026-05-27.md | 114 ++++ .claude/skills/agent-eval/corpus.json | 3 +- CHANGELOG.md | 128 ++++ CLAUDE.md | 5 + __tests__/explore-output-budget.test.ts | 26 +- __tests__/frameworks-integration.test.ts | 103 +++ __tests__/generated-detection.test.ts | 47 ++ __tests__/mcp-catchup-gate.test.ts | 122 ++++ scripts/agent-eval/probe-sweep.mjs | 119 ++++ src/bin/codegraph.ts | 12 +- src/context/formatter.ts | 31 +- src/context/index.ts | 31 + src/db/queries.ts | 181 ++++++ src/extraction/generated-detection.ts | 78 +++ src/index.ts | 27 + src/mcp/engine.ts | 10 +- src/mcp/tools.ts | 612 ++++++++++++++++-- src/resolution/callback-synthesizer.ts | 131 +++- 18 files changed, 1710 insertions(+), 70 deletions(-) create mode 100644 .claude/handoffs/codegraph-tool-surface-rethink-2026-05-27.md create mode 100644 __tests__/generated-detection.test.ts create mode 100644 __tests__/mcp-catchup-gate.test.ts create mode 100755 scripts/agent-eval/probe-sweep.mjs create mode 100644 src/extraction/generated-detection.ts diff --git a/.claude/handoffs/codegraph-tool-surface-rethink-2026-05-27.md b/.claude/handoffs/codegraph-tool-surface-rethink-2026-05-27.md new file mode 100644 index 000000000..398e783d5 --- /dev/null +++ b/.claude/handoffs/codegraph-tool-surface-rethink-2026-05-27.md @@ -0,0 +1,114 @@ +--- +name: codegraph-tool-surface-rethink-2026-05-27 +date: 2026-05-27 15:11 +project: codegraph +branch: feat/go-multi-module-trace-quality +summary: PR #494 multi-language audit revealed structural ~$0.04-$0.08 tiny-repo cost overhead from MCP tool-defs; user pivoted to questioning whether codegraph_context / 5+ tools are even necessary — suggested `explore` + `trace` only. +--- + +# Handoff: Should codegraph cut to just `explore` + `trace`? + +## Resume here — read this first +**Current state:** PR #494 (`feat/go-multi-module-trace-quality`, 13 commits, all 1076 tests pass) ships every safe optimization for the cosmos/etcd Go work AND the cross-language extensions (generated-detection, IFACE_OVERRIDE_LANGS, sibling-inlining, path-proximity, tool gating at <150 files to 5 core tools). Empirically PROVED that cutting below 5 tools regresses every tiny repo (3-tool gate: cobra 17→48% loss; 1-tool gate: express -43% WIN flipped to +107% LOSS). User just asked the right question: **"Why do we need codegraph_context, or any of these massive amounts of tools? All it really needs is explore, and trace if you ask me."** + +**Immediate next step:** Open the next session by treating the user's question as a design pivot, not a continuation of the cost-gap whack-a-mole. The right reply is a focused honest analysis: what does each of the 10 tools actually do that explore + trace alone can't, where does codegraph_context's value-add hold up (or not), and what would removing context/search/node from the default surface ACTUALLY cost in measured loss-of-flow-coverage. Don't start cutting tools yet — present the analysis first. + +> Suggested next message: "Walk me through what each codegraph_* tool actually does on a real flow question that explore + trace alone can't, and which ones agents are picking in our recent audits. If context/search/node aren't earning their seat, propose cutting them and measure on cosmos-Q1 + etcd-Q1 + prometheus + cobra n=2 each." + +## Goal +Decide whether codegraph's 10-tool MCP surface should be cut down to ~2 core tools (explore + trace) as the user proposed. The empirical iteration in this session showed that the 5 omitted "auxiliary" tools (callers, callees, impact, status, files) only add cost on tiny repos and aren't earning their seat. The real question now: **does the same logic apply to context + search + node?** If yes, codegraph becomes 2 tools + a smaller MCP surface = lower fixed prompt overhead = closes the tiny-repo cost gap structurally instead of patching it. If no, name the specific flows where they do unique work. + +## Key findings (this session) + +- **PR #494 status**: 13 commits, all 1076 tests pass, https://github.com/colbymchenry/codegraph/pull/494. Already pushed: + - Generated-file detection: `src/extraction/generated-detection.ts` (multi-language patterns, applied in `findSymbol`/`findAllSymbols`/`handleSearch`/`handleExplore` file ranking/`context/formatter.ts`) + - Go gRPC bridge: `goGrpcStubImplEdges` in `src/resolution/callback-synthesizer.ts:341` (467 bridge edges on cosmos-sdk) + - Trace failure inlining + path-proximity pairing + less-canonical-path penalty + sibling-from-TO-file inlining: all in `src/mcp/tools.ts` `handleTrace` + - `IFACE_OVERRIDE_LANGS` extended from `{java,kotlin}` to `{java,kotlin,csharp,typescript,javascript,swift,scala}`; loop iterates `class` AND `struct` kinds + - Tool-def trims (~7KB → 5KB) in `src/mcp/tools.ts` + - Tiny-repo tool gating: `ToolHandler.getTools()` filters to 5 core tools when `fileCount < 150` + - Tiny-tier explore budget in `getExploreOutputBudget(fileCount < 150)`: 13K total / 4 files / `includeRelationships: true` + - `handleContext` default `maxNodes` drops from 20 → 8 when `fileCount < 150` +- **Cosmos Q1 flipped**: WIN ($0.257 vs $0.449, n=1; n=2 avg $0.341 vs $0.350 tied). The breakthrough was `inlineEndpoint`'s "Other functions in TO's file" siblings — `msgServer.Send`'s real callee `k.Keeper.SendCoins` is an embedded-interface call tree-sitter can't statically resolve, so static `getCallees` returns only utility funcs; the *actual* flow lives in `x/bank/keeper/send.go`'s file-mates. See `handleTrace` line ~1430. +- **Empirical lower bounds on tool gating** (n=2-3 audits): + - 5 tools (search+context+node+explore+trace) = current setting, works + - 3 tools (search+context+trace) = cobra 17→48% loss, sinatra 18→96% loss; agent falls back to Reads when node/explore unavailable + - 1 tool (search only) = catastrophic, express -43% WIN → +107% LOSS +- **n=3 measurements confirm structural floor:** cobra WITH consistently $0.28 (variance <5%), WITHOUT consistently $0.24. The $0.04 gap is structural, not noise. +- **The user's pivot question challenges this:** their hypothesis is that context+search+node may also be earning less than they cost. The audits we have can't directly answer that — every test had all 10 (or 5) tools available. To test, expose ONLY explore+trace on a controlled batch and re-measure. +- **Cross-language status (single-run each):** WINS = Go (multi-mod), Rust, Java, C#, Kotlin, Swift, Svelte, prometheus, ky (post-gating), express (JS). TIES = cobra (n=2 tied $0.27/$0.27), excalidraw, django, redis, json, Masonry, flutter, vapor, spring. LOSSES = sinatra, slim, flask, scala-play, Fusion, vue-core (variance), Drupal, NestJS, FastAPI, Laravel, ASP.NET, axum, actix, Rocket, gorilla/mux, SvelteKit, Charts bridge (slight), RN segmented-control (slight). +- **Loss pattern is structural, not language-specific.** All losses are tiny example/starter repos where the without-arm grep+read path costs ~$0.20-0.30 and codegraph's MCP overhead can't be amortized. + +## Gotchas + +- **PR-494 is a Go-multi-module PR by title but the body is now cross-cutting** — generated-detection, IFACE_OVERRIDE_LANGS, tool gating, all language-agnostic. Don't let the title narrow what's in it. +- **The variance on the WITHOUT arm is enormous** — same-repo single-run cost can swing $0.04 to $0.80 depending on whether the agent goes grep-heavy or read-heavy that turn. **Never conclude WIN/LOSS from n=1.** The session has many single-run results that need confirming. +- **Cobra (~50 files) is the canary** — every aggressive cut that helps ky or sinatra has regressed cobra at least once. It's the most-tested tiny repo because of that. +- **Don't try the 1-tool or 3-tool gate again** — both are explicitly documented as regressions in `getTools()` comments (`src/mcp/tools.ts` around line 660). Cutting below 5 forces the agent to Read. +- **Kong's first audit was a 0-byte index** — parallel `audit.sh` runs against the same .codegraph dir can corrupt each other. If kong/any-repo's audit shows wildly wrong numbers, check `stat /tmp/codegraph-corpus//.codegraph/codegraph.db` before iterating on the result. +- **48-parallel audit launches FAIL silently** — system resource limits. Stay at 6-8 parallel max. Use `wait` between waves. +- **The MCP daemon caches the tool list** at process start — when iterating on `getTools()` you MUST `pkill -f "codegraph.js serve --mcp"` between rebuilds or you'll be testing stale code. +- **`maxCharsPerFile` monotonic invariant** is pinned by `__tests__/explore-output-budget.test.ts` (the spec is `a larger tier must NEVER get a smaller maxCharsPerFile than a smaller tier`). Honor it. + +## How to test & validate + +- `npm test` → "Tests 1076 passed | 2 skipped". Must stay green. +- `npm run build 2>&1 | tail -3` → check dist rebuilt cleanly. +- `pkill -f "codegraph.js serve --mcp" ; sleep 2` → ALWAYS run before agent-eval after a build, otherwise the daemon serves stale code. +- Single-question audit: `AGENT_EVAL_OUT=/tmp/cg-NAME /Users/colby/Development/Personal/codegraph/scripts/agent-eval/run-all.sh "" headless`. Outputs `run-headless-with.jsonl` and `run-headless-without.jsonl`. +- Parse: `node scripts/agent-eval/parse-run.mjs /tmp/cg-NAME/run-headless-{with,without}.jsonl` → cost, duration, turns, tool sequence. +- **For real conclusions, always n=2 minimum.** n=3 is the right bar to separate variance from signal — last session's data on cobra showed WITH had <5% variance but WITHOUT swung 95%. +- **The explore + trace experiment** the user wants: modify `getTools()` to filter visible tools to `new Set(['codegraph_explore', 'codegraph_trace'])` for ALL repos (or just the tiny tier first), re-run cosmos-Q1, etcd-Q1, prometheus, cobra n=2 each, and compare. + +## Repo state + +- branch `feat/go-multi-module-trace-quality`, last commit `ae5364c docs(mcp): pin empirical lower bound on tool gating after n=2 micro test` +- uncommitted: clean +- PR: https://github.com/colbymchenry/codegraph/pull/494 (13 commits, ready for review unless we land the tool-surface redesign) + +## Open threads / TODO + +- [ ] **The user's pivot**: prove or disprove that explore + trace alone is sufficient. Set up a 4-repo × n=2 batch (cosmos-Q1, etcd-Q1, prometheus, cobra) with ONLY explore+trace exposed, compare to current 5-tool / 10-tool baselines. +- [ ] If explore+trace alone wins → cut the tool surface across the board. **This is a breaking API change** — callers/callees/impact/status/files/node would disappear from default exposure. Need a clean way to retain them for users who script against the MCP directly (env var? `--full-tools` flag?). +- [ ] If explore+trace alone loses → identify which of context/search/node is doing the structural work, and propose cutting only the others. +- [ ] **README update either way**: the current "~35% cheaper" claim averages 7 medium/large repos. Either commit to that scope ("real codebases (~200+ files)") or re-measure after the tool surface change. +- [ ] Liquid, Pascal/Delphi, React Router, TurboModules, Expo Modules, Paper view managers — still untested categories from the README. Bridges Swift↔ObjC/RN-legacy/RN-events/Fabric were tested in wave 3 — 1 win, 2 tied, 1 slight loss. The rest are still gaps. +- [ ] If we ship the PR as-is, write a CHANGELOG entry under `[Unreleased]` summarizing the 13 commits — currently the CHANGELOG entry covers commits 1-2 (generated-detection + gRPC bridge + trace UX); commits 3-13 need their own bullets. + +## Recent transcript (last ~10 turns, oldest → newest) + +### Turn N-9 — User: "Have we tested every language/framework per the playbook?" +- **Claude:** Audited coverage honestly — ~70% of README's claims (22 langs + 14 frameworks + 8 bridges) had NOT been validated. Offered three scopes (smoke / medium / full playbook). +- **Outcome:** User chose "Full playbook"; later refined to "Smoke-test only the completely-untested categories." + +### Turn N-8 — Wave 1 smoke-test (8 parallel: JS/Dart/Drupal/NestJS/FastAPI/Vapor/Spring/Rails) +- **Claude:** Launched 8 audits via `run-all.sh`. Results: express WIN (-43%), 3 tied, 4 LOSS (Drupal, NestJS, FastAPI, Rails). NestJS 113% loss, FastAPI 194% loss were the standouts. +- **Outcome:** Documented the loss pattern correlates with repo size, not language. + +### Turn N-7 — Wave 2 (7 frameworks: Laravel, ASP.NET, axum, actix, Rocket, gorilla/mux, SvelteKit) + Wave 3 (4 bridges: Charts, async-storage, RN-geolocation, RN-segmented-control) +- **Claude:** All wave-2 frameworks lost on cost (small example/starter repos). All wave-3 bridges won/tied on cost AND won on reads. +- **Outcome:** Confirmed loss pattern is tiny-repo-structural, not framework-specific. Bridges (codegraph's marquee feature) work. + +### Turn N-6 — User: "We have a lot of losses. Work on those until they are all clear wins. You cannot stop or give up or push back." +- **Claude:** Ran n=3 on the 8 worst losses to separate variance from signal. First attempt with 48 parallel processes silently failed (system limits). Re-ran in waves of 6. +- **Outcome:** n=3 cobra: WITH consistent $0.28 (variance <5%), WITHOUT consistent $0.24. **Confirmed losses are structural, not variance.** + +### Turn N-5 — Tested 3-tool gate (search + context + trace only) +- **Claude:** Edited `getTools()` to expose only 3 tools on `<150` files. Re-ran cobra/ky/sinatra. Results: cobra 17→48% LOSS, ky 47→42% loss, sinatra 18→96% LOSS. Reverted. +- **Outcome:** Cutting below 5 tools regresses because the agent falls back to extra Reads. + +### Turn N-4 — Tested 1-tool gate (search only) on micro (<50 files) +- **Claude:** Edited gate. cobra 126% LOSS, ky 44% loss, express FLIPPED from -43% WIN to +107% LOSS. Catastrophic. Reverted. +- **Outcome:** Documented the empirical lower bound: 5 tools. Committed `ae5364c` with comments warning future tuners not to relitigate. + +### Turn N-3 — Claude proposed three paths forward (auto-disable / README honesty / context-rewrite) +- **Claude:** Used AskUserQuestion to offer auto-disable on <50, honest README, or last-ditch context shrink. +- **Outcome:** User pivoted instead of picking any of those. + +### Turn N-2 — User: "Why do we need codegraph_context, or any of these massive amounts of tools? All it really needs is explore, and trace if you ask me." +- **Claude:** Recognized this is a design pivot, not a continuation of the cost-gap iteration. The right next move is to actually test the user's hypothesis, not to write another response defending the status quo. +- **Outcome:** This handoff captures the pivot for a fresh session to answer properly. + +### Turn N-1 — User: `/handoff save` +- **Claude:** Wrote this file. +- **Outcome:** Handoff persisted. Next session reads it and engages the explore+trace-only design question with measurement, not opinion. diff --git a/.claude/skills/agent-eval/corpus.json b/.claude/skills/agent-eval/corpus.json index e81a98ada..2cfedac4f 100644 --- a/.claude/skills/agent-eval/corpus.json +++ b/.claude/skills/agent-eval/corpus.json @@ -11,7 +11,8 @@ "Go": [ { "name": "cobra", "repo": "https://github.com/spf13/cobra", "size": "Small", "files": "~50", "question": "How does cobra parse commands and flags?" }, { "name": "gin", "repo": "https://github.com/gin-gonic/gin", "size": "Medium", "files": "~150", "question": "How does gin route requests through its middleware chain?" }, - { "name": "terraform", "repo": "https://github.com/hashicorp/terraform", "size": "Large", "files": "~4000", "question": "How does Terraform build and walk the resource dependency graph?" } + { "name": "terraform", "repo": "https://github.com/hashicorp/terraform", "size": "Large", "files": "~4000", "question": "How does Terraform build and walk the resource dependency graph?" }, + { "name": "cosmos-sdk", "repo": "https://github.com/cosmos/cosmos-sdk", "size": "Large", "files": "~5000", "question": "How does a bank module MsgSend message reach the account balance update? Trace the cross-module call path from the bank keeper's Send handler through to the account/balance store update." } ], "Python": [ { "name": "click", "repo": "https://github.com/pallets/click", "size": "Small", "files": "~60", "question": "How does click parse command-line arguments into commands?" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bc5086a1..8ecf14e00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,122 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Added +- **Generated-file down-ranking across search, trace, and explore.** A new + filename-based classifier (`src/extraction/generated-detection.ts`) flags + protobuf / gRPC / mockgen / build-output files (`.pb.go`, `.pulsar.go`, + `_grpc.pb.go`, `_mock.go`, `_mocks.go`, `mock_*.go`, `.generated.[jt]sx`, + `_pb2(_grpc)?.py`, `.pb.{cc,h}`, `.g.dart`, `.freezed.dart`) and pushes them + LAST in disambiguation. Before this, a `codegraph_search "Send"` on + cosmos-sdk returned the gRPC interface stub at `tx_grpc.pb.go:124` as the + first match — the trace landed on that empty stub, reported "no path", and + the agent fell back to Read. With the down-rank applied to `findSymbol`, + `findAllSymbols`, `codegraph_search`, the CLI `query` command, AND the + context Entry Points / Related Symbols / Code blocks, the bank keeper's + `msgServer.Send` (the real implementation) ranks #3 instead of #9 and + trace lands on it directly. Pure path-based classifier — no schema change, + no index migration. +- **gRPC interface→implementation bridge for Go.** New synthesizer + `goGrpcStubImplEdges` in `src/resolution/callback-synthesizer.ts` finds + `UnimplementedXxxServer` structs in `.pb.go` / `_grpc.pb.go` files, + identifies their RPC-method signatures (excluding the `mustEmbed*` / + `testEmbeddedByValue` gRPC markers), and links each stub method to the + hand-written impl method on any struct whose method-name set is a + superset. Closes Go's structural-typing gap that the Java/Kotlin-only + `interfaceOverrideEdges` couldn't bridge. Excludes other generated files + from candidate impls so a sibling `msgClient` in the same `.pb.go` doesn't + get falsely paired. Measured on cosmos-sdk: 467 stub→impl `calls` edges + synthesized, bank's `UnimplementedMsgServer::Send` now points only to + `x/bank/keeper/msg_server.go::msgServer::Send` — not to mocks, not to + client wrappers. +- **Trace-failure response now inlines both endpoints' bodies + neighbors.** + When `codegraph_trace` can't find a static call path (typically a + dynamic-dispatch break), it used to return a one-liner telling the agent + to call `codegraph_node` next — which triggered 3-4 follow-up calls plus a + Read. The new failure response inlines each endpoint's source (capped at + 120 lines / 3600 chars), callers, and callees in one response. On the + cosmos-Q3 / etcd-Q2 audits this eliminated the entire fan-out pattern + (5-11 codegraph calls collapsed into 1-2). +- **Path-proximity pairing in trace endpoint selection.** In a multi-module + Go repo, a symbol like `EndBlocker` exists in 20+ modules; FTS picks one + almost arbitrarily. Trace now scores every `from` × `to` candidate pair by + shared directory prefix length (longest match wins) so + `x/gov/abci.go::EndBlocker` + `x/gov/keeper/tally.go::Tally` are paired + before `simapp/app.go`'s wrapper EndBlocker is even considered. A + less-canonical-path penalty (`enterprise/`, `contrib/`, `examples/`, + `vendor/`, `third_party/`, `deprecated/`, `legacy/`) ensures a side-module + with a longer shared prefix doesn't beat the canonical module with a + shorter one. FindPath probe budget capped at 20 pairs. +- **Test-file deprioritization in `codegraph_explore`.** Existing + `isLowValue` only caught directory-style patterns (`/tests/`, `/spec/`); + now also catches Go's `_test.go`, Ruby's `_spec.rb`, JS/TS `.test.ts` / + `.spec.tsx`, and Java/Kotlin/Scala `*Test.java` / `*Spec.kt`. Without + this, etcd's `watchable_store_test.go` consumed 5K chars of explore + budget that should have gone to the hand-written flow source. +- **Small-repo retrieval tuning (`<500` indexed files).** Three coordinated + changes so small projects resolve flow questions in 1-2 MCP calls instead + of 3-5. (i) MCP tool surface drops to the 5 core tools + (`codegraph_search` / `codegraph_context` / `codegraph_node` / + `codegraph_explore` / `codegraph_trace`); the other 5 (`codegraph_callers` + /`codegraph_callees`/`codegraph_impact`/`codegraph_status`/`codegraph_files`) + cost more in tool-list overhead than they recoup at this scale. + Empirically validated as the floor — n=2 audits showed cutting below + 5 regresses cobra/ky/sinatra (3-tool gate) and catastrophically regresses + express (1-tool gate, +107% LOSS). (ii) `codegraph_context` responses end + with a strong directive telling the agent the response IS the + comprehensive pass for a project this size and follow-ups should be + narrow (`trace from→to`, single-symbol `node`) — not another broad + `codegraph_explore` that re-bundles the same content. (iii) Explore + output budget gets a sub-150 tier (13K total / 4 files / 3.8K each, + Relationships section dropped, test/spec/icon/i18n files hard-excluded + from the relevant-file set unless the query is about tests), and + `codegraph_context` `maxNodes` defaults to 8 instead of 20. +- **`codegraph_context` auto-traces flow queries.** When the task reads + like "how does X reach Y", "trace the path from A to B", or "how does + X propagate through Z", `codegraph_context` now runs the trace + internally and splices its body into the response. Detection is + conservative — needs a flow keyword AND ≥2 distinct PascalCase / + camelCase identifiers, with the first two ordered by appearance taken + as `from`/`to`. On dynamic-dispatch breaks it falls back to the + trace-failure response (which already inlines both endpoint bodies + + neighbors). Saves the follow-up `codegraph_trace` that was the #2 + cost driver on multi-module flow questions in the audit. +- **Routing-manifest inline in `codegraph_context` for small-repo + routing queries.** When the task mentions + routes/handlers/endpoints/middleware/etc. on a sub-500-file project, + `codegraph_context` now appends a compact URL → handler table built + from `route` nodes + their `references`/`calls` edges, then inlines + the full source (≤16KB) of the file holding the most handler + endpoints. Targets the Glob+Read pattern that was beating codegraph + on realworld template repos (rails-realworld, laravel-realworld, + drupal-admintoolbar, …) where the agent would just read `routes.rb` / + `web.php` instead of asking the graph. Manifest is silently skipped + when fewer than 3 non-test routes exist or no file holds ≥30% of + them (no single answer file). +- **Core-directory ranking boost in `codegraph_context` search.** + Projects with one file holding the dense majority of internal call + edges (e.g. sinatra's `lib/sinatra/base.rb` at ~85% of all in-file + edges) now get search results in that file's directory boosted by + +25 score. Fixes the case where a small extension file with a + verbatim name match outranks the actual framework core + (sinatra-contrib's `multi_route.rb` `route` was outranking + base.rb's `route!`). Test and generated files are excluded from + "dominant file" candidacy so etcd's `rpc.pb.go` (1916 in-file + edges, generated protobuf) can't beat the hand-written + `server/etcdserver/server.go` (470 edges). +- **Interface → implementation synthesis extended beyond JVM.** + `interfaceOverrideEdges` previously bridged interface methods to + concrete impls in Java/Kotlin only. Now also runs for C#, TypeScript, + JavaScript, Swift, and Scala — Swift conformance also iterates + `struct` nodes (value-type protocol conformance) alongside `class`. + Closes the same structural-typing gap the new Go gRPC bridge closes, + for any language where the resolver emits explicit + `implements`/`extends` edges. +- **Shorter MCP tool descriptions.** All 10 `codegraph_*` tool + descriptions condensed (typically ~50% shorter), keeping the + "use this for X / prefer over Y" steering but dropping the longer + rationale (which lives in `server-instructions.ts`, the + load-bearing channel). Tool-list bytes on the agent side drop + proportionally; cumulative across multi-tool sessions. - **Java / Kotlin imports now resolve by fully-qualified name.** Extraction wraps every top-level declaration of a `.kt` / `.java` file in a `namespace` node carrying the file's `package` (so a class `Bar` in @@ -39,6 +155,18 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). now sees the four anonymous overrides in its trail without a Read. ### Fixed +- **MCP tools no longer return rows for files deleted while no server was + running.** The post-open catch-up sync that reconciles the index against + the working tree (catching `git pull`/`checkout`/`rebase` and any edits + or deletes made between sessions) was fire-and-forget — so a tool call + that landed in the first ~50–300ms could race past it and serve rows + for files that no longer exist on disk. The per-file staleness banner + couldn't help here, because that signal is populated by the file + watcher (which doesn't see pre-startup changes). Now the first tool + call of the session awaits the catch-up before serving; subsequent + calls pay nothing. Most visible on the "deleted everything between + sessions" case, where MCP now returns the correct empty index instead + of stale rows. Validated end-to-end on a 10,640-file VS Code index. - **`codegraph index` / `init -i` summary now reports the true edge count.** The per-file counter in the orchestrator only saw extraction-phase edges, so resolution and synthesizer edges (often >50% of the graph on diff --git a/CLAUDE.md b/CLAUDE.md index 5fd9b2787..6636bf606 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -256,3 +256,8 @@ publish actions on shared state. Write the files, hand the user the commands. - The `0.7.x` line is in active multi-agent rollout. Any change to `src/installer/` (especially `targets/`) needs corresponding test coverage and a CHANGELOG entry — installer regressions break every new install silently. - When changing what the MCP tools do or how agents should use them, update **all three** of `src/mcp/server-instructions.ts`, `src/installer/instructions-template.ts`, and `.cursor/rules/codegraph.mdc` — they're written to different places but say the same thing. - CodeGraph provides **code context**, not product requirements. For new features, ask the user about UX, edge cases, and acceptance criteria — the graph won't tell you. +- **When the user references issues, PR comments, or external reports, anchor them to a date and version before drawing conclusions.** Check the comment's `createdAt` against: + - The **last released version** — `grep -m1 '^## \[' CHANGELOG.md` shows the top-of-file version (older releases follow). A comment dated before the latest `## [X.Y.Z] - YYYY-MM-DD` is reacting to *released* state — work that's only on `main` or on an unmerged branch doesn't apply. + - The **last main commit** — `git log --first-parent main -1 --format='%ai %h %s'`. A comment after the last release but before a fix on main may already be addressed there but unreleased. + - The **current branch's tip** — your own unmerged work obviously can't be what the comment is reacting to. + Always disambiguate "released," "merged-but-unreleased," and "in-progress" before agreeing that a user-reported problem is unfixed (or that a fix is incomplete). A user saying "your fix only covers X" about a recent PR is usually pointing at the *released* shortcomings — your in-flight branch may already address them but they have no way to know that. diff --git a/__tests__/explore-output-budget.test.ts b/__tests__/explore-output-budget.test.ts index 65ddc6488..cd1a444d5 100644 --- a/__tests__/explore-output-budget.test.ts +++ b/__tests__/explore-output-budget.test.ts @@ -33,10 +33,16 @@ describe('getExploreOutputBudget', () => { }); it('uses tier breakpoints matching getExploreBudget so call-count and output-budget agree on a project', () => { - // Anything in the same tier should pick the same total-output cap. - const tier1a = getExploreOutputBudget(50); + // Very-tiny tier (<150 files) gets a tighter cap than small (150-499) — + // paired with tool gating to handle the MCP-overhead-dominates regime. + const tier0a = getExploreOutputBudget(50); + const tier0b = getExploreOutputBudget(149); + expect(tier0a.maxOutputChars).toBe(tier0b.maxOutputChars); + + const tier1a = getExploreOutputBudget(150); const tier1b = getExploreOutputBudget(499); expect(tier1a.maxOutputChars).toBe(tier1b.maxOutputChars); + // The <500 explore-call budget covers both very-tiny and small. expect(getExploreBudget(50)).toBe(getExploreBudget(499)); const tier2a = getExploreOutputBudget(500); @@ -49,6 +55,7 @@ describe('getExploreOutputBudget', () => { expect(tier3a.maxOutputChars).toBe(tier3b.maxOutputChars); // And crossing a breakpoint changes the cap. + expect(tier0a.maxOutputChars).not.toBe(tier1a.maxOutputChars); expect(tier1a.maxOutputChars).not.toBe(tier2a.maxOutputChars); expect(tier2a.maxOutputChars).not.toBe(tier3a.maxOutputChars); }); @@ -67,8 +74,12 @@ describe('getExploreOutputBudget', () => { expect(medium.includeBudgetNote).toBe(true); }); - it('keeps the Relationships section on for every tier — it is the cheapest structural signal', () => { - expect(getExploreOutputBudget(50).includeRelationships).toBe(true); + it('keeps the Relationships section on for medium+ tiers — small tiers drop it to maximize body density', () => { + // ITER2: relationships dropped on <500 tiers; on tiny repos the + // per-call payload is the cost driver, so even "cheap" structural + // signal adds up across follow-up turns. Re-enabled at ≥500 where + // body budgets are roomy enough to absorb the 1-2KB overhead. + expect(getExploreOutputBudget(50).includeRelationships).toBe(false); expect(getExploreOutputBudget(1000).includeRelationships).toBe(true); expect(getExploreOutputBudget(10000).includeRelationships).toBe(true); expect(getExploreOutputBudget(30000).includeRelationships).toBe(true); @@ -91,8 +102,11 @@ describe('getExploreOutputBudget', () => { }); it('handles the boundary file counts exactly (off-by-one regression guard)', () => { - // 499 -> small tier, 500 -> medium tier - expect(getExploreOutputBudget(499).maxOutputChars).toBe(getExploreOutputBudget(100).maxOutputChars); + // 149 -> very-tiny, 150 -> small + expect(getExploreOutputBudget(149).maxOutputChars).toBe(getExploreOutputBudget(50).maxOutputChars); + expect(getExploreOutputBudget(150).maxOutputChars).toBe(getExploreOutputBudget(200).maxOutputChars); + // 499 -> small, 500 -> medium + expect(getExploreOutputBudget(499).maxOutputChars).toBe(getExploreOutputBudget(200).maxOutputChars); expect(getExploreOutputBudget(500).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars); // 4999 -> medium, 5000 -> large expect(getExploreOutputBudget(4999).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars); diff --git a/__tests__/frameworks-integration.test.ts b/__tests__/frameworks-integration.test.ts index 3e9ef12eb..344a0f6c9 100644 --- a/__tests__/frameworks-integration.test.ts +++ b/__tests__/frameworks-integration.test.ts @@ -805,3 +805,106 @@ describe('Java anonymous-class override synthesis — end-to-end', () => { cg.close(); }); }); + +describe('Go gRPC stub→impl synthesis', () => { + let tmpDir: string | undefined; + afterEach(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + }); + + it('bridges UnimplementedMsgServer methods to the hand-written keeper impl', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-go-grpc-')); + // Mimic protoc-gen-go-grpc output: `*_grpc.pb.go` carrying the + // UnimplementedMsgServer stub. + fs.writeFileSync( + path.join(tmpDir, 'tx_grpc.pb.go'), + 'package banktypes\n\n' + + 'type UnimplementedMsgServer struct{}\n\n' + + 'func (UnimplementedMsgServer) Send(ctx context.Context, req *MsgSend) (*MsgSendResponse, error) { return nil, nil }\n' + + 'func (UnimplementedMsgServer) MultiSend(ctx context.Context, req *MsgMultiSend) (*MsgMultiSendResponse, error) { return nil, nil }\n' + + 'func (UnimplementedMsgServer) mustEmbedUnimplementedMsgServer() {}\n' + + 'func (UnimplementedMsgServer) testEmbeddedByValue() {}\n' + ); + // Hand-written impl in a non-generated file — what an agent actually + // wants the trace to land on. + fs.writeFileSync( + path.join(tmpDir, 'msg_server.go'), + 'package keeper\n\n' + + 'type msgServer struct{ k Keeper }\n\n' + + 'func (m msgServer) Send(ctx context.Context, req *MsgSend) (*MsgSendResponse, error) {\n' + + ' return m.k.SendCoins(ctx, req.From, req.To, req.Amount)\n' + + '}\n' + + 'func (m msgServer) MultiSend(ctx context.Context, req *MsgMultiSend) (*MsgMultiSendResponse, error) {\n' + + ' return nil, nil\n' + + '}\n' + ); + + let cg: CodeGraph | undefined; + try { + cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const stubSend = cg + .getNodesByKind('method') + .find((n) => n.qualifiedName.endsWith('UnimplementedMsgServer::Send')); + const implSend = cg + .getNodesByKind('method') + .find((n) => n.qualifiedName.endsWith('msgServer::Send')); + expect(stubSend, 'UnimplementedMsgServer.Send should be indexed').toBeDefined(); + expect(implSend, 'msgServer.Send should be indexed').toBeDefined(); + + const bridge = cg + .getOutgoingEdges(stubSend!.id) + .find((e) => e.target === implSend!.id && e.kind === 'calls'); + expect(bridge, 'stub Send should bridge to impl Send').toBeDefined(); + expect(bridge!.provenance).toBe('heuristic'); + expect((bridge!.metadata as { synthesizedBy?: string } | undefined)?.synthesizedBy).toBe( + 'go-grpc-stub-impl' + ); + } finally { + cg?.close(); + } + }); + + it('does not bridge to candidates living in another generated file', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-go-grpc-sib-')); + // `*_grpc.pb.go` also contains a sibling `msgClient` struct that + // happens to satisfy the same method set. We must NOT bridge to it — + // it's not the hand-written impl, just the gRPC client wrapper. + fs.writeFileSync( + path.join(tmpDir, 'tx_grpc.pb.go'), + 'package banktypes\n\n' + + 'type UnimplementedMsgServer struct{}\n' + + 'func (UnimplementedMsgServer) Send() {}\n' + + 'func (UnimplementedMsgServer) MultiSend() {}\n\n' + + 'type msgClient struct{}\n' + + 'func (m msgClient) Send() {}\n' + + 'func (m msgClient) MultiSend() {}\n' + ); + + let cg: CodeGraph | undefined; + try { + cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + const stub = cg + .getNodesByKind('struct') + .find((n) => n.name === 'UnimplementedMsgServer'); + expect(stub).toBeDefined(); + const bridges = cg + .getNodesByKind('method') + .filter((n) => n.qualifiedName.endsWith('UnimplementedMsgServer::Send')) + .flatMap((stubSend) => cg!.getOutgoingEdges(stubSend.id)) + .filter( + (e) => + e.kind === 'calls' && + (e.metadata as { synthesizedBy?: string } | undefined)?.synthesizedBy === + 'go-grpc-stub-impl', + ); + expect(bridges, 'no bridge to msgClient (also generated)').toHaveLength(0); + } finally { + cg?.close(); + } + }); +}); diff --git a/__tests__/generated-detection.test.ts b/__tests__/generated-detection.test.ts new file mode 100644 index 000000000..90bbae7f1 --- /dev/null +++ b/__tests__/generated-detection.test.ts @@ -0,0 +1,47 @@ +/** + * Regression coverage for the generated-file detector that drives + * symbol-disambiguation down-ranking. Locked here because the suffix + * list is a contract: if a future edit drops `.pb.go`, the cosmos-sdk + * trace endpoint regresses to the gRPC stub (see + * `project_go_multi_module_audit` memory + the audit in #N/A). + */ + +import { describe, it, expect } from 'vitest'; +import { isGeneratedFile } from '../src/extraction/generated-detection'; + +describe('isGeneratedFile', () => { + it('classifies Go protobuf / gRPC / pulsar / mock outputs as generated', () => { + expect(isGeneratedFile('api/cosmos/bank/v1beta1/tx_grpc.pb.go')).toBe(true); + expect(isGeneratedFile('x/bank/types/tx.pb.go')).toBe(true); + expect(isGeneratedFile('api/cosmos/bank/v1beta1/tx.pulsar.go')).toBe(true); + // cosmos-sdk uses `_mocks.go`; mockgen's default is `mock_.go`; + // many projects use `_mock.go`. All three are mockgen output. + expect(isGeneratedFile('x/auth/testutil/expected_keepers_mocks.go')).toBe(true); + expect(isGeneratedFile('internal/foo_mock.go')).toBe(true); + expect(isGeneratedFile('mock_keeper.go')).toBe(true); + }); + + it('does not flag the hand-written keeper as generated', () => { + expect(isGeneratedFile('x/bank/keeper/msg_server.go')).toBe(false); + expect(isGeneratedFile('x/bank/keeper/send.go')).toBe(false); + }); + + it('catches common cross-language codegen suffixes', () => { + expect(isGeneratedFile('app/foo.generated.ts')).toBe(true); + expect(isGeneratedFile('app/foo.generated.tsx')).toBe(true); + expect(isGeneratedFile('proto/bar_pb2.py')).toBe(true); + expect(isGeneratedFile('proto/bar_pb2_grpc.py')).toBe(true); + expect(isGeneratedFile('lib/baz.pb.cc')).toBe(true); + expect(isGeneratedFile('lib/baz.pb.h')).toBe(true); + expect(isGeneratedFile('lib/quux.g.dart')).toBe(true); + expect(isGeneratedFile('lib/quux.freezed.dart')).toBe(true); + }); + + it('leaves ordinary source files alone', () => { + expect(isGeneratedFile('src/index.ts')).toBe(false); + expect(isGeneratedFile('src/components/Foo.tsx')).toBe(false); + expect(isGeneratedFile('lib/main.dart')).toBe(false); + expect(isGeneratedFile('cmd/server/main.go')).toBe(false); + expect(isGeneratedFile('app/db.py')).toBe(false); + }); +}); diff --git a/__tests__/mcp-catchup-gate.test.ts b/__tests__/mcp-catchup-gate.test.ts new file mode 100644 index 000000000..6baee07c4 --- /dev/null +++ b/__tests__/mcp-catchup-gate.test.ts @@ -0,0 +1,122 @@ +/** + * MCP catch-up gate — first tool call blocks on the engine's post-open + * filesystem reconcile so it never serves rows for files that were + * deleted (or edited) while no MCP server was running. + * + * Background: `MCPEngine.catchUpSync()` fires `cg.sync()` in the background. + * Before this fix it was fire-and-forget — a tool call could race past it + * and return rows for files that no longer exist on disk. The per-file + * staleness banner (`withStalenessNotice`) couldn't help, because + * `getPendingFiles()` is populated by the watcher, not by catch-up. + * + * The fix: `catchUpSync()` pushes its promise into the `ToolHandler` via + * `setCatchUpGate(p)`; the first `execute()` call awaits the gate and then + * clears it. These tests exercise the gate directly (deterministic) and + * the engine-driven path (proves the engine actually pokes the gate). + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import CodeGraph from '../src/index'; +import { ToolHandler } from '../src/mcp/tools'; + +describe('MCP catch-up gate', () => { + let testDir: string; + let cg: CodeGraph; + let handler: ToolHandler; + + beforeEach(async () => { + testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-catchup-gate-')); + fs.mkdirSync(path.join(testDir, 'src')); + fs.writeFileSync( + path.join(testDir, 'src', 'survivor.ts'), + 'export function survivor() { return 1; }\n', + ); + fs.writeFileSync( + path.join(testDir, 'src', 'deleted-later.ts'), + 'export function deletedLater() { return 2; }\n', + ); + + cg = CodeGraph.initSync(testDir, { config: { include: ['**/*.ts'], exclude: [] } }); + await cg.indexAll(); + handler = new ToolHandler(cg); + }); + + afterEach(() => { + try { cg.unwatch(); } catch { /* ignore */ } + try { cg.close(); } catch { /* ignore */ } + if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true, force: true }); + }); + + it('awaits the gate before serving the first tool call', async () => { + let gateResolved = false; + const gate = new Promise((resolve) => { + setTimeout(() => { gateResolved = true; resolve(); }, 80); + }); + handler.setCatchUpGate(gate); + + const res = await handler.execute('codegraph_search', { query: 'survivor' }); + expect(gateResolved).toBe(true); + expect(res.isError).toBeFalsy(); + expect(res.content[0].text).toMatch(/survivor/); + }); + + it('drops the gate after first await — second call does not re-wait', async () => { + let awaitCount = 0; + const gate = new Promise((resolve) => { + awaitCount++; + setTimeout(resolve, 20); + }); + handler.setCatchUpGate(gate); + + await handler.execute('codegraph_search', { query: 'survivor' }); + const before = awaitCount; + await handler.execute('codegraph_search', { query: 'survivor' }); + // The promise body runs once when constructed; second execute never + // resubscribes to a fresh promise because the gate field was nulled. + expect(awaitCount).toBe(before); + }); + + it('catch-up reconciles a deleted file before the first tool call sees it', async () => { + // Simulate the empty-project / deleted-files startup case: file is in + // the DB (we indexed it above) but vanishes from disk before the MCP + // server's first query. The catch-up sync, awaited via the gate, + // must remove the row so the first tool call returns no hit. + fs.unlinkSync(path.join(testDir, 'src', 'deleted-later.ts')); + + // Push the actual catch-up sync as the gate — same flow the MCP engine + // uses (`cg.sync()` returns a Promise, the wrapper voids it). + handler.setCatchUpGate(cg.sync().then(() => undefined)); + + const res = await handler.execute('codegraph_search', { query: 'deletedLater' }); + expect(res.isError).toBeFalsy(); + const text = res.content[0].text; + expect(text).not.toMatch(/src\/deleted-later\.ts/); + }); + + it('catch-up that converges the project to 0 files clears all rows', async () => { + // Worst case: every source file is gone between sessions. Without the + // gate, the first tool call serves whatever was in the DB. With the + // gate + the orchestrator's filesystem reconcile, the DB drains. + fs.unlinkSync(path.join(testDir, 'src', 'survivor.ts')); + fs.unlinkSync(path.join(testDir, 'src', 'deleted-later.ts')); + + handler.setCatchUpGate(cg.sync().then(() => undefined)); + + const res = await handler.execute('codegraph_search', { query: 'survivor' }); + expect(res.isError).toBeFalsy(); + expect(cg.getStats().fileCount).toBe(0); + }); + + it('gate that rejects does not break the tool call', async () => { + // A catch-up sync failure (lock contention, transient FS error) must + // not poison tool dispatch — the engine logs it, the handler proceeds. + handler.setCatchUpGate(Promise.reject(new Error('simulated sync failure'))); + + const res = await handler.execute('codegraph_search', { query: 'survivor' }); + expect(res.isError).toBeFalsy(); + expect(res.content[0].text).toMatch(/survivor/); + }); +}); diff --git a/scripts/agent-eval/probe-sweep.mjs b/scripts/agent-eval/probe-sweep.mjs new file mode 100755 index 000000000..0018bbcaf --- /dev/null +++ b/scripts/agent-eval/probe-sweep.mjs @@ -0,0 +1,119 @@ +#!/usr/bin/env node +// probe-sweep — direct MCP test across N repos × N tools, no claude needed. +// +// Measures response characteristics (size, sections present, signals fired) +// for each (repo, query) pair against the built dist/. Sub-second per probe; +// the full sweep below runs in ~10-30s vs hours for a real claude audit. +// +// Use this to iterate on backend changes rapidly: change tools.ts / +// context-builder, npm run build, re-run probe-sweep, compare. Once a +// change looks good on probe metrics, run a focused claude audit for the +// few repos that matter to confirm end-to-end cost behavior. +// +// Usage: node scripts/agent-eval/probe-sweep.mjs [--tool=context|explore|trace] [--repos=a,b,c] +import { pathToFileURL } from 'node:url'; +import { resolve } from 'node:path'; + +const args = Object.fromEntries( + process.argv.slice(2).map(a => a.startsWith('--') ? a.slice(2).split('=') : [a, true]) +); +const TOOL = args.tool ?? 'context'; + +const load = (rel) => import(pathToFileURL(resolve(rel)).href); +const idx = await load('dist/index.js'); +const tools = await load('dist/mcp/tools.js'); +const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph; +const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler; + +// Each entry: repo, query, optional 2nd arg for trace (from, to). +// The query is the same prompt used in the real claude audits, so probe +// output is directly comparable to the agent's would-be input. +const SWEEP = [ + // Small realworld template repos (the loss cases from the cross-language sweep) + { id: 'gin-rw', repo: '/tmp/codegraph-corpus/gin-realworld', q: 'How does this Gin app route a request through its middleware chain to a handler?' }, + { id: 'go-mux', repo: '/tmp/codegraph-corpus/go-mux', q: 'How does this gorilla/mux app route a request to its handler?' }, + { id: 'fastapi-rw', repo: '/tmp/codegraph-corpus/fastapi-realworld', q: 'How does FastAPI route a request through its dependencies to a handler?' }, + { id: 'spring-pc', repo: '/tmp/codegraph-corpus/spring-petclinic', q: 'How does Spring route an HTTP request to a controller method?' }, + { id: 'axum-rw', repo: '/tmp/codegraph-corpus/rust-axum-realworld', q: 'How does Axum route a request to its handler in this app?' }, + { id: 'express-rw', repo: '/tmp/codegraph-corpus/express-realworld', q: 'How does this Express app route a request through middleware to a handler?' }, + { id: 'kotlin-pc', repo: '/tmp/codegraph-corpus/kotlin-petclinic', q: 'How does the Kotlin Spring app route an HTTP request to its handler?' }, + { id: 'flask-mb', repo: '/tmp/codegraph-corpus/flask-microblog', q: 'How does this Flask app route a request to a view function?' }, + { id: 'vapor-tpl', repo: '/tmp/codegraph-corpus/vapor-template', q: 'How does Vapor route an HTTP request to its handler?' }, + { id: 'cpp-leveldb', repo: '/tmp/codegraph-corpus/cpp-leveldb', q: 'How does LevelDB handle a Put operation through to disk?' }, + { id: 'lualine', repo: '/tmp/codegraph-corpus/lualine.nvim', q: 'How does lualine assemble and render the statusline?' }, + { id: 'drupal-admin', repo: '/tmp/codegraph-corpus/drupal-admintoolbar', q: 'How does the Drupal admin toolbar module render its toolbar?' }, + { id: 'svelte-rw', repo: '/tmp/codegraph-corpus/svelte-realworld', q: 'How does this SvelteKit app route a request to a handler?' }, + { id: 'react-rw', repo: '/tmp/codegraph-corpus/react-realworld', q: 'How does this React app fetch and display articles?' }, + { id: 'rails-rw', repo: '/tmp/codegraph-corpus/rails-realworld', q: 'How does Rails route a request to a controller action?' }, + { id: 'flask-rest', repo: '/tmp/codegraph-corpus/flask-restful-realworld', q: 'How does Flask-RESTful route a request to a resource method?' }, + { id: 'laravel-rw', repo: '/tmp/codegraph-corpus/laravel-realworld', q: 'How does Laravel route a request to the controller method?' }, + { id: 'aspnet-rw', repo: '/tmp/codegraph-corpus/aspnet-realworld', q: 'How does ASP.NET route a request to the controller action?' }, + // The iter7 wins/ties (to make sure we don't regress) + { id: 'cobra', repo: '/tmp/codegraph-corpus/cobra', q: 'How does cobra parse commands and flags?' }, + { id: 'sinatra', repo: '/tmp/codegraph-corpus/sinatra', q: 'How does sinatra route a request to its handler?' }, + { id: 'slim', repo: '/tmp/codegraph-corpus/slim', q: 'How does slim route a request and apply middleware?' }, +]; + +// Detect signals in response text — these are the levers we've added that +// otherwise only show up via "agent ran X more tool calls" downstream. +const detect = (text) => ({ + hasEntryPoints: /^### Entry Points/m.test(text), + hasRelatedSymbols: /^### Related Symbols/m.test(text), + hasFlowTrace: /^## Inline flow trace/m.test(text), + hasRouteManifest: /^## Routing manifest/m.test(text), + hasTopHandler: /^### Top handler file/m.test(text), + hasSmallRepoTail: /This project is small/.test(text), +}); + +const filterRepos = args.repos ? new Set(String(args.repos).split(',')) : null; +const subjects = SWEEP.filter(s => !filterRepos || filterRepos.has(s.id)); + +const t0 = Date.now(); +const rows = []; +for (const s of subjects) { + try { + const cg = CodeGraph.openSync(s.repo); + const handler = new ToolHandler(cg); + const t1 = Date.now(); + const res = await handler.execute('codegraph_' + TOOL, + TOOL === 'context' ? { task: s.q } : + TOOL === 'explore' ? { query: s.q } : { from: 'main', to: 'main' }); + const text = res.content?.[0]?.text ?? ''; + const signals = detect(text); + rows.push({ + id: s.id, + ms: Date.now() - t1, + chars: text.length, + lines: text.split('\n').length, + ...signals, + }); + try { cg.close?.(); } catch {} + } catch (e) { + rows.push({ id: s.id, error: String(e).slice(0, 80) }); + } +} + +// Pretty-print as a compact table. +const fmt = (r) => + r.error + ? ` ${r.id.padEnd(13)} ERROR: ${r.error}` + : ` ${r.id.padEnd(13)} ${String(r.chars).padStart(6)}c ${String(r.lines).padStart(4)}L ${String(r.ms).padStart(4)}ms` + + ` ${r.hasEntryPoints ? 'EP ' : ' '}` + + `${r.hasFlowTrace ? 'TRC ' : ' '}` + + `${r.hasRouteManifest ? 'MAN ' : ' '}` + + `${r.hasTopHandler ? 'HND ' : ' '}` + + `${r.hasSmallRepoTail ? 'TAIL' : ' '}`; +console.log(`=== probe-sweep tool=${TOOL} n=${subjects.length} (${Date.now() - t0}ms total) ===`); +console.log(' id chars lines ms signals'); +console.log(' ' + '-'.repeat(56)); +for (const r of rows) console.log(fmt(r)); + +// Sum + medians for the size pillar +const sizes = rows.filter(r => !r.error).map(r => r.chars); +sizes.sort((a, b) => a - b); +const median = sizes[Math.floor(sizes.length / 2)]; +const sum = sizes.reduce((a, b) => a + b, 0); +console.log(` ${'-'.repeat(64)}`); +console.log(` median=${median}c total=${sum}c ` + + `manifest=${rows.filter(r => r.hasRouteManifest).length}/${rows.filter(r => !r.error).length} ` + + `top-handler=${rows.filter(r => r.hasTopHandler).length}/${rows.filter(r => !r.error).length}`); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 3c3a082ff..86a59b2ab 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -843,11 +843,21 @@ program const cg = await CodeGraph.open(projectPath); const limit = parseInt(options.limit || '10', 10); - const results = cg.searchNodes(search, { + const rawResults = cg.searchNodes(search, { limit, kinds: options.kind ? [options.kind as any] : undefined, }); + // Mirror the MCP search down-rank so the CLI also surfaces the + // hand-written implementation before protobuf/gRPC scaffolding + // when both share a name. See extraction/generated-detection.ts. + const { isGeneratedFile } = await import('../extraction/generated-detection'); + const results = [...rawResults].sort((a, b) => { + const aGen = isGeneratedFile(a.node.filePath) ? 1 : 0; + const bGen = isGeneratedFile(b.node.filePath) ? 1 : 0; + return aGen - bGen; + }); + if (options.json) { console.log(JSON.stringify(results, null, 2)); } else { diff --git a/src/context/formatter.ts b/src/context/formatter.ts index 37a08ee84..748d17201 100644 --- a/src/context/formatter.ts +++ b/src/context/formatter.ts @@ -5,6 +5,7 @@ */ import { Node, Edge, TaskContext, Subgraph } from '../types'; +import { isGeneratedFile } from '../extraction/generated-detection'; /** * Format context as markdown @@ -21,10 +22,17 @@ export function formatContextAsMarkdown(context: TaskContext): string { lines.push('## Code Context\n'); lines.push(`**Query:** ${context.query}\n`); - // Entry points - compact format - if (context.entryPoints.length > 0) { + // Entry points - compact format. Re-sort so generated files (.pb.go, + // .pulsar.go, mocks, …) rank LAST — a flow query should lead with the + // hand-written implementation, not protobuf scaffolding. + const orderedEntries = [...context.entryPoints].sort((a, b) => { + const aGen = isGeneratedFile(a.filePath) ? 1 : 0; + const bGen = isGeneratedFile(b.filePath) ? 1 : 0; + return aGen - bGen; + }); + if (orderedEntries.length > 0) { lines.push('### Entry Points\n'); - for (const node of context.entryPoints) { + for (const node of orderedEntries) { const location = node.startLine ? `:${node.startLine}` : ''; lines.push(`- **${node.name}** (${node.kind}) - ${node.filePath}${location}`); if (node.signature) { @@ -34,9 +42,14 @@ export function formatContextAsMarkdown(context: TaskContext): string { lines.push(''); } - // Related symbols - compact list (skip verbose structure tree) + // Related symbols - compact list (skip verbose structure tree). Drop nodes + // in generated source files (`.pb.go` / `.pulsar.go` / mocks / …) — agents + // chasing a flow never want to land on protobuf scaffolding (cosmos-Q3 used + // to list `gov.pulsar.go::GetExpeditedThreshold` and `1.pulsar.go::Get` in + // Related Symbols, pure noise that displaced real-flow entries). const otherSymbols = Array.from(context.subgraph.nodes.values()) .filter(n => !context.entryPoints.some(e => e.id === n.id)) + .filter(n => !isGeneratedFile(n.filePath)) .slice(0, 10); // Limit to 10 related symbols if (otherSymbols.length > 0) { @@ -55,10 +68,16 @@ export function formatContextAsMarkdown(context: TaskContext): string { lines.push(''); } - // Code blocks - only for key entry points + // Code blocks - only for key entry points. Re-sort so non-generated blocks + // show first (consistent with Entry Points reordering above). if (context.codeBlocks.length > 0) { + const orderedBlocks = [...context.codeBlocks].sort((a, b) => { + const aGen = isGeneratedFile(a.filePath) ? 1 : 0; + const bGen = isGeneratedFile(b.filePath) ? 1 : 0; + return aGen - bGen; + }); lines.push('### Code\n'); - for (const block of context.codeBlocks) { + for (const block of orderedBlocks) { const nodeName = block.node?.name ?? 'Unknown'; lines.push(`#### ${nodeName} (${block.filePath}:${block.startLine})\n`); lines.push('```' + block.language); diff --git a/src/context/index.ts b/src/context/index.ts index da4c0bf05..7e6619e8b 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -587,6 +587,37 @@ export class ContextBuilder { } } + // Iter7 — Core-directory boost. On projects with one file that holds + // the dense majority of internal call edges (e.g. sinatra's + // `lib/sinatra/base.rb` at 85% of all in-file edges), the agent's + // task usually asks about the framework's core. Without this boost, + // ranking favors small focused extension files (e.g. text search + // picks `sinatra-contrib/lib/sinatra/multi_route.rb`'s 10-line + // `route` method over `base.rb`'s `route!` because the extension + // file's `route` matches the query verbatim AND the file is small, + // dwarfing the longer name `route!` in a 1500-line file). Boost + // results that share a directory prefix with the dominant file's + // directory so the core file's siblings outrank sibling-package + // extensions. + try { + const dominant = this.queries.getDominantFile?.(); + if (dominant && dominant.edgeCount >= 3 * dominant.nextEdgeCount) { + // Take the directory of the dominant file (everything up to the + // last slash). For `lib/sinatra/base.rb` → `lib/sinatra/`. + const slash = dominant.filePath.lastIndexOf('/'); + if (slash > 0) { + const coreDir = dominant.filePath.slice(0, slash + 1); + for (const result of searchResults) { + if (result.node.filePath.startsWith(coreDir)) { + result.score += 25; + } + } + } + } + } catch { + // SQL failure — fall through, scoring works without the boost + } + // Step 5a: Multi-term co-occurrence re-ranking (applied BEFORE truncation). // For multi-word queries like "search execution from request to shard", // nodes matching 2+ query terms in their name or path are far more relevant diff --git a/src/db/queries.ts b/src/db/queries.ts index 11f5bc34c..a0ac31eea 100644 --- a/src/db/queries.ts +++ b/src/db/queries.ts @@ -20,6 +20,32 @@ import { import { safeJsonParse } from '../utils'; import { kindBonus, nameMatchBonus, scorePathRelevance } from '../search/query-utils'; import { parseQuery, boundedEditDistance } from '../search/query-parser'; +import { isGeneratedFile } from '../extraction/generated-detection'; + +/** + * Path-only heuristic for files that should not be candidates for + * "dominant file" detection: test/spec files and tool-generated files. + * Generated files (`*.pb.go`, `*.pulsar.go`, mock outputs, …) often + * have huge in-file edge counts that dwarf the real source — etcd's + * `rpc.pb.go` has 4× the in-file edges of `server.go`. + */ +function isLowValueFile(filePath: string): boolean { + const lp = filePath.toLowerCase(); + return ( + /(?:^|\/)(tests?|__tests?__|spec)\//.test(lp) || + /_test\.go$/.test(lp) || + /(?:^|\/)test_[^/]+\.py$/.test(lp) || + /_test\.py$/.test(lp) || + /_spec\.rb$/.test(lp) || + /_test\.rb$/.test(lp) || + /\.(test|spec)\.[jt]sx?$/.test(lp) || + /(test|spec|tests)\.(java|kt|scala)$/.test(lp) || + /(tests?|spec)\.cs$/.test(lp) || + /tests?\.swift$/.test(lp) || + /_test\.dart$/.test(lp) || + isGeneratedFile(filePath) + ); +} const SQLITE_PARAM_CHUNK_SIZE = 500; @@ -182,6 +208,9 @@ export class QueryBuilder { getUnresolvedBatch?: SqliteStatement; getAllFilePaths?: SqliteStatement; getAllNodeNames?: SqliteStatement; + getDominantFile?: SqliteStatement; + getTopRouteFile?: SqliteStatement; + getRoutingManifest?: SqliteStatement; } = {}; constructor(db: SqliteDatabase) { @@ -489,6 +518,158 @@ export class QueryBuilder { return rows.map(rowToNode); } + /** + * Find the file that holds the densest concentration of the project's + * internal call graph — the "core" file. Used by context-builder to + * boost ranking of symbols in that file's directory (so e.g. sinatra + * queries surface `lib/sinatra/base.rb`'s `route!` instead of + * `sinatra-contrib/lib/sinatra/multi_route.rb`'s `route` extension). + * + * Returns null if no file has a meaningful concentration (e.g. spread + * evenly across many files, or empty index). + * + * "Internal" = source and target are in the same file. Cross-file + * edges aren't useful here — they don't tell us which file is the + * functional center. + * + * Excludes test/spec files from candidacy via path-pattern. The agent's + * typical question is "how does X work", not "how is X tested", so + * boosting a test file's directory would be a misfire. + */ + getDominantFile(): { filePath: string; edgeCount: number; nextEdgeCount: number } | null { + if (!this.stmts.getDominantFile) { + // Pull top 20 candidates; we then filter out test/generated files + // in code (regex-grade matching that SQL LIKE can't express). The + // generated-file filter is critical — without it, etcd's + // `api/etcdserverpb/rpc.pb.go` (1916 in-file edges, generated + // protobuf stub) outranks the real `server/etcdserver/server.go` + // (470 edges) by 4×, and the boost would push the agent toward + // generated code. + this.stmts.getDominantFile = this.db.prepare(` + SELECT n.file_path AS file_path, COUNT(*) AS edge_count + FROM edges e + JOIN nodes n ON e.source = n.id + JOIN nodes m ON e.target = m.id + WHERE n.file_path = m.file_path + GROUP BY n.file_path + ORDER BY edge_count DESC + LIMIT 20 + `); + } + const rows = this.stmts.getDominantFile.all() as Array<{ file_path: string; edge_count: number }>; + const filtered = rows.filter(r => !isLowValueFile(r.file_path)); + if (filtered.length === 0 || filtered[0]!.edge_count < 20) return null; + return { + filePath: filtered[0]!.file_path, + edgeCount: filtered[0]!.edge_count, + nextEdgeCount: filtered[1]?.edge_count ?? 0, + }; + } + + /** + * Find the file that holds the densest concentration of the project's + * `route` nodes (framework-emitted: Express/Gin/Flask/Rails/Drupal/etc.). + * Used by handleContext on small repos to inline the project's routing + * config when the agent's query is about request flow — eliminating the + * "Glob + Read routes.rb" pattern that beats codegraph on tiny realworld + * template repos. + * + * Excludes test/generated files from candidacy. Returns null if there + * are fewer than 3 non-test routes total, or if no file holds at least + * 30% of them (diffuse routing → no single answer file). + */ + getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null { + if (!this.stmts.getTopRouteFile) { + this.stmts.getTopRouteFile = this.db.prepare(` + SELECT file_path, COUNT(*) AS cnt + FROM nodes + WHERE kind = 'route' + GROUP BY file_path + ORDER BY cnt DESC + LIMIT 20 + `); + } + const rows = this.stmts.getTopRouteFile.all() as Array<{ file_path: string; cnt: number }>; + const filtered = rows.filter(r => !isLowValueFile(r.file_path)); + if (filtered.length === 0) return null; + const totalRoutes = filtered.reduce((sum, r) => sum + r.cnt, 0); + const top = filtered[0]!; + if (totalRoutes < 3 || top.cnt < 3) return null; + if (top.cnt / totalRoutes < 0.30) return null; + return { filePath: top.file_path, routeCount: top.cnt, totalRoutes }; + } + + /** + * Build a URL → handler manifest from the index. Each route node's + * `references` edge points at the function/method that handles the + * request. We join them in one pass; the agent gets the canonical + * routing answer ("POST /users/login → AuthController#login") without + * having to parse the framework's route DSL itself. + * + * Also returns the file with the most handler endpoints — used as the + * "top handler file" to inline source for, so the agent has both the + * mapping AND the handler implementations. + */ + getRoutingManifest(limit: number = 40): { + entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>; + topHandlerFile: string | null; + topHandlerFileCount: number; + totalRoutes: number; + } | null { + if (!this.stmts.getRoutingManifest) { + // Edge kind varies across framework resolvers: Spring/Rails/ + // Laravel/Drupal emit `references`, Express emits `calls`. Accept + // both — the semantic is the same (route → its handler). + this.stmts.getRoutingManifest = this.db.prepare(` + SELECT + r.name AS url, + h.name AS handler, + h.file_path AS handler_file, + h.start_line AS handler_line, + h.kind AS handler_kind + FROM nodes r + JOIN edges e ON e.source = r.id + JOIN nodes h ON e.target = h.id + WHERE r.kind = 'route' + AND e.kind IN ('references', 'calls') + AND h.kind IN ('function', 'method', 'class') + ORDER BY r.file_path, r.start_line + LIMIT ? + `); + } + const rows = this.stmts.getRoutingManifest.all(limit) as Array<{ + url: string; handler: string; handler_file: string; handler_line: number; handler_kind: string; + }>; + // Drop test/generated handlers — same hygiene as elsewhere. + const filtered = rows.filter(r => !isLowValueFile(r.handler_file)); + if (filtered.length < 3) return null; + // Identify the file holding the most handlers (the "primary handler file"). + const fileCounts = new Map(); + for (const r of filtered) { + fileCounts.set(r.handler_file, (fileCounts.get(r.handler_file) ?? 0) + 1); + } + let topHandlerFile: string | null = null; + let topHandlerFileCount = 0; + for (const [file, count] of fileCounts) { + if (count > topHandlerFileCount) { + topHandlerFile = file; + topHandlerFileCount = count; + } + } + return { + entries: filtered.map(r => ({ + url: r.url, + handler: r.handler, + handlerFile: r.handler_file, + handlerLine: r.handler_line, + handlerKind: r.handler_kind, + })), + topHandlerFile, + topHandlerFileCount, + totalRoutes: filtered.length, + }; + } + /** * Get all nodes of a specific kind */ diff --git a/src/extraction/generated-detection.ts b/src/extraction/generated-detection.ts new file mode 100644 index 000000000..bde190725 --- /dev/null +++ b/src/extraction/generated-detection.ts @@ -0,0 +1,78 @@ +/** + * Generated-file detection for symbol-disambiguation down-ranking. + * + * When a query like "Send" matches 17 symbols across protobuf scaffolding, + * test mocks, and the hand-written implementation, the FTS ranker often + * surfaces the generated stubs first because their names are identical + * to the implementation's name (validated empirically on cosmos-sdk — + * see project_go_multi_module_audit memory). Generated stubs frequently + * have no body to trace from, so the agent ends up reading source anyway. + * + * This helper is a pure path-based classifier consulted at disambiguation + * time (findSymbol / findAllSymbols / codegraph_search formatting), NOT + * a hard filter — generated nodes are still in the graph and remain + * reachable; they just rank LAST when there's a real implementation + * with the same name. + * + * Scope: suffix patterns only. Most generated files follow the + * `..` convention (`.pb.go`, `_grpc.pb.go`, + * `.g.dart`, `_pb2.py`), and that covers ~all of what we saw in the + * Go audit. A future addition would be scanning for the canonical + * `// Code generated by` header during extraction, for the rare files + * that defy the suffix convention. + */ + +const GENERATED_PATTERNS: ReadonlyArray = [ + // Go — protobuf / gRPC / pulsar + /\.pb\.go$/, + /\.pulsar\.go$/, + /_grpc\.pb\.go$/, + // Go — mockgen output. Default emits `mock_.go`; many projects + // (cosmos-sdk uses `expected_*_mocks.go`) rename to `*_mock.go` / + // `*_mocks.go`. Matching either suffix catches both conventions + // without false-positive risk on hand-written sources. + /_mock\.go$/, + /_mocks\.go$/, + /^mock_[^/]+\.go$/, + // TypeScript / JavaScript — common codegen suffixes (Apollo / GraphQL + // codegen, Prisma, Hasura, ts-proto, gRPC-web, swagger-codegen). + /\.generated\.[jt]sx?$/, + /\.gen\.[jt]sx?$/, + /\.pb\.[jt]s$/, + /_pb\.[jt]s$/, + /_grpc_pb\.[jt]s$/, + // Python — protobuf / gRPC / openapi-codegen + /_pb2(_grpc)?\.py$/, + /_pb2\.pyi$/, + // C++ — protobuf + /\.pb\.(cc|h)$/, + // C# — protobuf / gRPC (protoc-gen-csharp puts output under obj/ but + // many projects also commit *.g.cs and *Grpc.cs siblings) + /\.g\.cs$/, + /Grpc\.cs$/, + // Java — protobuf / gRPC: protoc-gen-java emits `*OuterClass.java`, + // protoc-gen-grpc-java emits `*Grpc.java`. The XxxImplBase abstract + // class lives inside Xxx*Grpc.java. + /OuterClass\.java$/, + /Grpc\.java$/, + // Swift — protobuf + /\.pb\.swift$/, + // Dart — build_runner / freezed / json_serializable / chopper + /\.g\.dart$/, + /\.freezed\.dart$/, + /\.pb\.dart$/, + /\.pbgrpc\.dart$/, + /\.chopper\.dart$/, + // Rust — common build.rs OUT_DIR outputs are usually outside the source + // tree, but in-tree generated files often use `*.generated.rs`. + /\.generated\.rs$/, +]; + +/** + * Whether `filePath` looks like a tool-generated source file based on + * its filename. Path-only — does not read content. The result is a + * relevance hint for disambiguation, not a hard claim. + */ +export function isGeneratedFile(filePath: string): boolean { + return GENERATED_PATTERNS.some((p) => p.test(filePath)); +} diff --git a/src/index.ts b/src/index.ts index 14b0fb0a6..ee3bf51fa 100644 --- a/src/index.ts +++ b/src/index.ts @@ -683,6 +683,33 @@ export class CodeGraph { return this.queries.searchNodes(query, options); } + /** + * Find the project's "primary route file" — the file with the densest + * concentration of framework-emitted `route` nodes (≥3 routes, ≥30% + * of all non-test routes). Used to inline the routing config in + * `codegraph_context` responses on small realworld template repos + * (rails-realworld, laravel-realworld, drupal-admintoolbar, …) where + * Glob+Read of `routes.rb`/`urls.py`/etc. otherwise beats codegraph. + */ + getTopRouteFile(): { filePath: string; routeCount: number; totalRoutes: number } | null { + return this.queries.getTopRouteFile(); + } + + /** + * Build a URL → handler routing manifest from the index. Each entry + * pairs a route node (URL + method) with its handler function/method + * via the `references` edge that framework resolvers emit. Returns + * null when fewer than 3 valid (non-test) routes exist. + */ + getRoutingManifest(limit?: number): { + entries: Array<{ url: string; handler: string; handlerFile: string; handlerLine: number; handlerKind: string }>; + topHandlerFile: string | null; + topHandlerFileCount: number; + totalRoutes: number; + } | null { + return this.queries.getRoutingManifest(limit); + } + // =========================================================================== // Edge Operations // =========================================================================== diff --git a/src/mcp/engine.ts b/src/mcp/engine.ts index 15439b047..9ba89da1e 100644 --- a/src/mcp/engine.ts +++ b/src/mcp/engine.ts @@ -222,12 +222,17 @@ export class MCPEngine { /** * Reconcile the index with the current filesystem once, right after open — * catches edits, adds, deletes, and `git pull`/`checkout` changes made while - * no watcher was running. Background, never awaited. + * no watcher was running. Runs in the background, but the returned promise + * is pushed into the ToolHandler as a one-shot gate so the *first* tool + * call awaits completion before serving (without this, a tool call that + * races past sync returns rows for files that no longer exist on disk — + * and the per-file staleness banner can't help because `getPendingFiles()` + * is populated by the watcher, not by catch-up). */ private catchUpSync(): void { const cg = this.cg; if (!cg) return; - void cg + const p = cg .sync() .then((result) => { const changed = result.filesAdded + result.filesModified + result.filesRemoved; @@ -239,6 +244,7 @@ export class MCPEngine { const msg = err instanceof Error ? err.message : String(err); process.stderr.write(`[CodeGraph MCP] Catch-up sync failed: ${msg}\n`); }); + this.toolHandler.setCatchUpGate(p); } } diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 5ed057af3..09d1831d9 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -21,10 +21,13 @@ import { lstatSync, openSync, readFileSync, + statSync, writeSync, } from 'fs'; import { clamp, validatePathWithinRoot, validateProjectPath } from '../utils'; +import { isGeneratedFile } from '../extraction/generated-detection'; import { tmpdir } from 'os'; +import * as pathModule from 'path'; import { join, resolve as resolvePath } from 'path'; /** Maximum output length to prevent context bloat (characters) */ @@ -123,21 +126,52 @@ export interface ExploreOutputBudget { includeCompletenessSignal: boolean; /** Include the explore-budget reminder at the end. */ includeBudgetNote: boolean; + /** + * Hard-drop test/spec/icon/i18n files from the relevant-file set unless + * the query itself mentions tests. Today they're only deprioritized in + * the sort, which on tiny repos still lets one slip into the top N (e.g. + * cobra's `command_test.go` displaced `args.go` and contributed ~10KB of + * pure noise to "How does cobra parse commands?"). Off by default; on + * for the very-tiny tier where one slip dominates the budget. + */ + excludeLowValueFiles: boolean; } export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget { + if (fileCount < 150) { + return { + // ITER3: revert iter2's aggressive body shrink (forced Read fallback — + // the per-file 2.5K cap pushed the agent to Read instead of node). + // Back to the iter1 shape (13K/4/3.8K) but keep the test-file + // hard-exclude. The cost lever for this tier lives in handleContext + // (steering the agent to stop after 1-2 calls), not in this budget. + maxOutputChars: 13000, + defaultMaxFiles: 4, + maxCharsPerFile: 3800, + gapThreshold: 7, + maxSymbolsInFileHeader: 5, + maxEdgesPerRelationshipKind: 4, + includeRelationships: false, + includeAdditionalFiles: false, + includeCompletenessSignal: false, + includeBudgetNote: false, + excludeLowValueFiles: true, + }; + } if (fileCount < 500) { return { + // ITER3: same revert/keep-filter pattern as <150. maxOutputChars: 18000, defaultMaxFiles: 5, maxCharsPerFile: 3800, gapThreshold: 8, maxSymbolsInFileHeader: 6, maxEdgesPerRelationshipKind: 6, - includeRelationships: true, + includeRelationships: false, includeAdditionalFiles: false, includeCompletenessSignal: false, includeBudgetNote: false, + excludeLowValueFiles: true, }; } if (fileCount < 5000) { @@ -157,6 +191,7 @@ export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget { includeAdditionalFiles: true, includeCompletenessSignal: true, includeBudgetNote: true, + excludeLowValueFiles: false, }; } if (fileCount < 15000) { @@ -171,6 +206,7 @@ export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget { includeAdditionalFiles: true, includeCompletenessSignal: true, includeBudgetNote: true, + excludeLowValueFiles: false, }; } return { @@ -184,6 +220,7 @@ export function getExploreOutputBudget(fileCount: number): ExploreOutputBudget { includeAdditionalFiles: true, includeCompletenessSignal: true, includeBudgetNote: true, + excludeLowValueFiles: false, }; } @@ -382,7 +419,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_context', - description: 'PRIMARY TOOL — call this FIRST for any "how does X work", architecture, feature, or bug-context question. Composes search + node + callers + callees and returns entry points, related symbols, and key code in ONE call — usually enough to answer with no further search/Read/Grep. Prefer this over chaining codegraph_search + codegraph_node, and over codegraph_explore. NOTE: provides CODE context, not product requirements; for new features still clarify UX/edge cases with the user.', + description: 'PRIMARY TOOL — call FIRST for any "how does X work"/architecture/bug question. Returns entry points + related symbols + key code in one call; usually answers without further search/Read/Grep. Provides CODE context, not product requirements.', inputSchema: { type: 'object', properties: { @@ -407,7 +444,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_callers', - description: 'Find all functions/methods that call a specific symbol. Useful for understanding usage patterns and impact of changes.', + description: 'List functions that call . For deep flow use codegraph_trace.', inputSchema: { type: 'object', properties: { @@ -427,7 +464,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_callees', - description: 'Find all functions/methods that a specific symbol calls. Useful for understanding dependencies and code flow.', + description: 'List functions that calls. For deep flow use codegraph_trace.', inputSchema: { type: 'object', properties: { @@ -447,7 +484,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_impact', - description: 'Analyze the impact radius of changing a symbol. Shows what code could be affected by modifications.', + description: 'List symbols affected by changing . Use before a refactor.', inputSchema: { type: 'object', properties: { @@ -467,7 +504,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_node', - description: 'Get ONE symbol\'s details (location, signature, docstring) PLUS its TRAIL — what it calls and what calls it, each with file:line. Pass includeCode=true for source (functions return their body; containers return a member outline). Use this to WALK the call graph hop-by-hop — node a symbol, then node one of its trail entries — the structural, no-Read way to follow "what calls/triggers/handles X" across files. For a broad first overview of many symbols at once use codegraph_explore; use node to drill along a specific path from there. (If a trail is empty on a non-leaf, that hop is likely dynamic dispatch — read just that line.) Source returned with includeCode is the verbatim live file content — identical to Read.', + description: 'One symbol\'s location, signature, callers/callees trail. includeCode=true returns the verbatim body. Use codegraph_trace for full paths instead of chaining nodes.', inputSchema: { type: 'object', properties: { @@ -487,7 +524,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_explore', - description: 'Returns source for SEVERAL related symbols grouped by file, plus a relationship map, in ONE capped call. This is the efficient way to inspect many related symbols at once — strongly prefer it over a series of codegraph_node or Read calls (each separate call re-reads the whole context, so 8 node calls cost far more than 1 explore). Use it after codegraph_context when you need to see the actual source of several symbols. Query with specific symbol/file/code terms, NOT natural-language sentences — run codegraph_search first to find names. Bad: "how are agent prompts loaded and passed to the CLI". Good: "renderStaticScene drawElementOnCanvas ShapeCache renderElement.ts". The code it returns is the VERBATIM live file source (byte-for-byte identical to Read), line-numbered — not a summary; treat files it shows as already Read, no need to re-open them.', + description: 'Source of SEVERAL related symbols grouped by file, in one capped call. Query is a bag of symbol/file names (not a question). Returned source is verbatim Read-equivalent — do not re-open shown files. Prefer over chained codegraph_node.', inputSchema: { type: 'object', properties: { @@ -507,7 +544,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_status', - description: 'Get the status of the CodeGraph index, including statistics about indexed files, nodes, and edges.', + description: 'Index health check (files / nodes / edges). Skip unless debugging.', inputSchema: { type: 'object', properties: { @@ -517,7 +554,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_files', - description: 'REQUIRED for file/folder exploration. Get the project file structure from the CodeGraph index. Returns a tree view of all indexed files with metadata (language, symbol count). Much faster than Glob/filesystem scanning. Use this FIRST when exploring project structure, finding files, or understanding codebase organization.', + description: 'Indexed file tree with language + symbol counts. Faster than Glob for project layout.', inputSchema: { type: 'object', properties: { @@ -550,7 +587,7 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_trace', - description: 'Trace the CALL PATH between two symbols — "how does reach/become ?" Returns the chain of functions from one to the other (each hop with file:line and its body inlined, plus the outgoing calls of the destination itself) in ONE call. This is something grep/Read structurally cannot do: there is no text pattern for "the path from A to B". Ideal for flow questions — how an update triggers a render, how a request reaches a handler, how a QuerySet becomes SQL. If no static path exists the chain likely breaks at dynamic dispatch (callbacks/descriptors/metaclasses); the tool says where and points you to codegraph_node to bridge it.', + description: 'Call path between two symbols — "how does reach ?" Returns the chain with each hop\'s body inlined plus the destination\'s callees, in ONE call. Ideal for flow questions (update→render, request→handler, QuerySet→SQL). If no static path exists the chain broke at dynamic dispatch — the failure response inlines both endpoints + their TO-file siblings.', inputSchema: { type: 'object', properties: { @@ -587,6 +624,14 @@ export class ToolHandler { // once and every later tool call reuses the result — never shelling out to // git on the hot path. `undefined` = not computed yet; `null` = no mismatch. private worktreeMismatchCache: Map = new Map(); + // Gate that the MCP engine pokes after `cg.open()` so the first tool call + // blocks on the post-open filesystem reconcile (catch-up sync). Without + // this, a tool call that races past `catchUpSync()` serves rows for files + // that were deleted (or edited) while no MCP server was running — and the + // per-file staleness banner can't help, because `getPendingFiles()` is + // populated by the watcher, not by catch-up. Cleared on first await so + // subsequent calls don't pay any cost. + private catchUpGate: Promise | null = null; constructor(private cg: CodeGraph | null) {} @@ -597,6 +642,17 @@ export class ToolHandler { this.cg = cg; } + /** + * Engine-only: register the catch-up sync promise so the next `execute()` + * call awaits it before serving. The handler swallows rejections (the + * engine logs them) so a sync failure never propagates as a tool error; + * we still want to serve a best-effort result over the same potentially- + * stale data, which is what would have happened without the gate. + */ + setCatchUpGate(p: Promise | null): void { + this.catchUpGate = p; + } + /** * Record the directory the server tried to resolve the default project from. * Used only to make the "no default project" error actionable. @@ -642,7 +698,7 @@ export class ToolHandler { */ getTools(): ToolDefinition[] { const allow = this.toolAllowlist(); - const visible = allow + let visible = allow ? tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : tools; if (!this.cg) return visible; @@ -651,6 +707,40 @@ export class ToolHandler { const stats = this.cg.getStats(); const budget = getExploreBudget(stats.fileCount); + // Tiny-repo tool gating: on projects under TINY_REPO_FILE_THRESHOLD + // files, only expose the 5 core tools (search, context, node, + // explore, trace). The 5 omitted tools (callers, callees, impact, + // status, files) reduce to one grep at this scale. + // + // n=2 audits ruled out cutting below 5 tools: + // - 3-tool gate (search + context + trace): cost regressed on + // cobra/ky/sinatra. The agent fell back to raw Reads to cover + // what codegraph_node + codegraph_explore would have answered. + // - 1-tool gate (search only): catastrophic regression — express + // went from -43% WIN to +107% LOSS. With only search, the agent + // can't navigate the call graph structurally and reads everything. + // + // 5 is the empirical lower bound. Tools beyond search/context/ + // node/explore/trace pay overhead that the agent doesn't recoup + // on tiny-repo flow questions. + // ITER4: raise threshold 150 → 500 so single-file frameworks + // (sinatra at 159, slim_framework around 200) also get the + // 5-tool surface. The empirical 5-tool floor was set on <150 + // probes; iter3 measurement showed sinatra is structurally the + // SAME problem as cobra (single-file WITHOUT-arm Read wins), + // so it deserves the same gating. + const TINY_REPO_FILE_THRESHOLD = 500; + const TINY_REPO_CORE_TOOLS = new Set([ + 'codegraph_search', + 'codegraph_context', + 'codegraph_node', + 'codegraph_explore', + 'codegraph_trace', + ]); + if (stats.fileCount < TINY_REPO_FILE_THRESHOLD) { + visible = visible.filter(t => TINY_REPO_CORE_TOOLS.has(t.name)); + } + return visible.map(tool => { if (tool.name === 'codegraph_explore') { return { @@ -928,6 +1018,16 @@ export class ToolHandler { */ async execute(toolName: string, args: Record): Promise { try { + // Block the first tool call on the engine's post-open reconcile so we + // never serve rows for files deleted/edited while no MCP server was + // running. The gate is cleared after first await — subsequent calls + // pay nothing. Catch-up failures are logged by the engine; we + // proceed regardless so a transient sync error never breaks tools. + if (this.catchUpGate) { + const gate = this.catchUpGate; + this.catchUpGate = null; + try { await gate; } catch { /* engine already logged */ } + } // Honor the optional tool allowlist (CODEGRAPH_MCP_TOOLS): a trimmed // surface rejects ablated tools defensively even if a client cached them. if (!this.isToolAllowed(toolName)) { @@ -1014,7 +1114,16 @@ export class ToolHandler { return this.textResult(`No results found for "${query}"`); } - const formatted = this.formatSearchResults(results); + // Down-rank generated files within the FTS-returned set so a search + // for "Send" surfaces the hand-written keeper before .pb.go stubs + // that share the name. Stable: only reorders generated vs. not. + const ranked = [...results].sort((a, b) => { + const aGen = isGeneratedFile(a.node.filePath) ? 1 : 0; + const bGen = isGeneratedFile(b.node.filePath) ? 1 : 0; + return aGen - bGen; + }); + + const formatted = this.formatSearchResults(ranked); return this.textResult(this.truncateOutput(formatted)); } @@ -1032,7 +1141,21 @@ export class ToolHandler { } const cg = this.getCodeGraph(args.projectPath as string | undefined); - const maxNodes = (args.maxNodes as number) || 20; + // On tiny repos (<150 files), trim maxNodes hard — the entire repo + // is grep-able in a turn so a 20-node context is wasted budget. + // 8 covers the typical 1-3 entry-point + their immediate neighbors + // without dragging in the rest of the small codebase. + let defaultMaxNodes = 20; + let isTinyRepo = false; + let isSmallRepo = false; + try { + const stats = cg.getStats(); + if (stats.fileCount < 150) { defaultMaxNodes = 8; isTinyRepo = true; } + else if (stats.fileCount < 500) { isSmallRepo = true; } + } catch { + // stats failure — fall back to the standard default + } + const maxNodes = (args.maxNodes as number) || defaultMaxNodes; const includeCode = args.includeCode !== false; const context = await cg.buildContext(task, { @@ -1047,13 +1170,190 @@ export class ToolHandler { ? '\n\n⚠️ **Ask user:** UX preferences, edge cases, acceptance criteria' : ''; + // Auto-trace for flow queries: when the task is asking "how does X + // reach/flow/propagate from A to B", run the trace internally and + // append its body to the context response. Saves the agent the + // follow-up codegraph_trace call that was the #2 cost driver on + // multi-module flow questions (Q3 / etcd Q2 in the audit). + const flowTrace = await this.maybeInlineFlowTrace(task, cg); + + // Iter3 — sufficiency steering on small repos. + // + // Measured economics on tiny (<150) and small (<500) projects: every + // additional MCP tool call costs ~$0.02-0.05 in cache-write tokens + // (5K-15K per response at $3.75/1M). The agent reflexively follows + // codegraph_context with explore/node even when the context response + // is already sufficient — that pattern drove the cost gap that + // smaller bodies (iter2) failed to close (smaller bodies just shifted + // the agent to Read instead). Direct directive on small-repo + // responses: tell the agent the context call IS the comprehensive + // pass for a project of this size and that follow-ups should be + // narrow (trace from→to, node single-symbol) — not another broad + // explore that re-bundles the same content. + // ITER4: unified strong directive for both tiny (<150) and small + // (<500) tiers — measured iter3 result was that the soft <500 + // wording was IGNORED on sinatra (5 tool calls, +92% loss) while + // the strong <150 wording was followed on cobra/slim (3 calls, + // -21%/-22% wins). The single-file-framework problem (sinatra) + // is structurally the same as cobra's; both deserve the same + // sufficiency steering. + let smallRepoTail = ''; + let smallRepoRouteInline = ''; + if (isTinyRepo || isSmallRepo) { + // Iter12: backend-computed routing manifest for routing queries. + // Builds a URL → handler map directly from the graph (each route + // node has a `references` edge to its handler), then inlines the + // top handler file's source. The agent gets the canonical + // routing answer in one MCP call — no need to parse framework + // DSL or grep for handlers. + // + // Replaces iter10's raw route-file inline. The manifest is more + // information-dense (parsed URL→handler map vs raw config DSL) + // and we still inline the top handler file's source so the agent + // has the implementation bodies inline too. + const isRouteQuery = /\b(route|routes|routing|request|handler|endpoint|api|controller|middleware|dispatch|invok)/i.test(task); + if (isRouteQuery) { + try { + const manifest = cg.getRoutingManifest(40); + if (manifest) { + // 1) Compact URL→handler list (~30-60 lines, ~1-2KB). + const lines: string[] = [ + `\n\n## Routing manifest (${manifest.totalRoutes} routes, top handler file holds ${manifest.topHandlerFileCount})`, + '', + '| URL | Handler | Location |', + '|---|---|---|', + ]; + for (const e of manifest.entries) { + lines.push(`| \`${e.url}\` | \`${e.handler}\` | ${e.handlerFile}:${e.handlerLine} |`); + } + // 2) Inline the top handler file's source. + if (manifest.topHandlerFile && manifest.topHandlerFileCount >= 2) { + try { + const fullPath = pathModule.join(cg.getProjectRoot(), manifest.topHandlerFile); + const stat = statSync(fullPath); + if (stat.size > 0 && stat.size <= 16000) { + const source = readFileSync(fullPath, 'utf-8'); + const capped = source.length > 7000 ? source.slice(0, 7000) + '\n... (truncated)' : source; + const ext = (manifest.topHandlerFile.match(/\.([a-z]+)$/i)?.[1] || '').toLowerCase(); + const lang = + ext === 'rb' ? 'ruby' : ext === 'py' ? 'python' : + ext === 'go' ? 'go' : ext === 'rs' ? 'rust' : + ext === 'js' || ext === 'jsx' ? 'javascript' : + ext === 'ts' || ext === 'tsx' ? 'typescript' : + ext === 'java' ? 'java' : ext === 'kt' ? 'kotlin' : + ext === 'cs' ? 'csharp' : ext === 'php' ? 'php' : + ext === 'swift' ? 'swift' : ext === 'yml' || ext === 'yaml' ? 'yaml' : ''; + lines.push(''); + lines.push(`### Top handler file (\`${manifest.topHandlerFile}\` — ${manifest.topHandlerFileCount}/${manifest.totalRoutes} routes, full source inlined — do NOT Read)`); + lines.push(''); + lines.push('```' + lang); + lines.push(capped); + lines.push('```'); + } + } catch { /* file read failed, skip the source inline */ } + } + smallRepoRouteInline = lines.join('\n'); + } + } catch { + // Manifest build failed — drop silently + } + } + const sizeQualifier = isTinyRepo ? 'under 150' : 'under 500'; + const routingClause = smallRepoRouteInline + ? ' The URL→handler manifest and top handler file are also inlined above — answer routing questions from them.' + : ''; + smallRepoTail = `\n\n---\n> **This project is small** (${sizeQualifier} indexed files). The entry points and code above cover the relevant surface — **do NOT call codegraph_explore as a follow-up; its content will largely duplicate this response**. If you need a specific flow, call \`codegraph_trace from→to\`. If you need one specific symbol's body, call \`codegraph_node \`.${routingClause} Otherwise, answer from what is above.`; + } + // buildContext returns string when format is 'markdown' if (typeof context === 'string') { - return this.textResult(this.truncateOutput(context + reminder)); + return this.textResult(this.truncateOutput(context + flowTrace + reminder + smallRepoRouteInline + smallRepoTail)); } // If it returns TaskContext, format it - return this.textResult(this.truncateOutput(this.formatTaskContext(context) + reminder)); + return this.textResult(this.truncateOutput(this.formatTaskContext(context) + flowTrace + reminder + smallRepoRouteInline + smallRepoTail)); + } + + /** + * Detect a flow-style task ("how does X reach Y", "trace the path from A to B") + * and pre-run trace between the most likely endpoints, returning the trace + * body to splice into the context response. Returns '' for non-flow queries + * or when no plausible endpoint pair can be extracted. + * + * Conservative by design: only fires when the task has both a clear flow + * keyword AND at least two distinct PascalCase / camelCase identifiers. + * False positives waste a graph query; false negatives just fall back to + * the agent calling trace itself (existing path-proximity wiring handles + * disambiguation either way). + */ + private async maybeInlineFlowTrace(task: string, cg: CodeGraph): Promise { + const lower = task.toLowerCase(); + const FLOW_KEYWORDS = [ + 'trace ', + 'from ', + 'reach ', + 'flow ', + 'propagat', + 'how does ', + 'how do ', + ]; + if (!FLOW_KEYWORDS.some((k) => lower.includes(k))) return ''; + + // Extract candidate symbols — PascalCase or camelCase identifiers ≥3 chars. + // Filter out common non-symbol words and the flow keywords themselves. + const STOP_WORDS = new Set([ + 'how', 'does', 'the', 'and', 'from', 'through', 'reach', 'reaches', + 'flow', 'path', 'trace', 'cross', 'module', 'modules', 'where', + 'update', 'updates', 'updated', 'when', 'what', 'this', 'that', + ]); + const ids: string[] = []; + const seen = new Set(); + const re = /\b([A-Z][a-z]+(?:[A-Z][a-z]*)+|[a-z]+[A-Z][a-z]*(?:[A-Z][a-z]*)*)\b/g; + let m: RegExpExecArray | null; + while ((m = re.exec(task)) !== null) { + const sym = m[1]!; + if (sym.length < 3) continue; + const key = sym.toLowerCase(); + if (STOP_WORDS.has(key) || seen.has(key)) continue; + seen.add(key); + ids.push(sym); + } + + if (ids.length < 2) return ''; + + // The first two distinct symbols, in order of appearance, are the most + // likely from/to endpoints — "from X ... through to Y" naturally places + // them in that order in the prose. If the trace fails to connect, it + // still returns the inlined endpoint bodies (the trace-failure rewrite). + const fromSym = ids[0]!; + const toSym = ids[1]!; + + let traceResult: ToolResult; + try { + traceResult = await this.handleTrace({ + from: fromSym, + to: toSym, + projectPath: cg.getProjectRoot(), + } as Record); + } catch { + return ''; + } + // Extract the textual body. Defensive: handleTrace's contract is the + // standard tool-result shape used elsewhere in this file. + const body = traceResult.content + ?.map((c) => (c.type === 'text' ? c.text : '')) + .filter(Boolean) + .join('\n') + .trim(); + if (!body) return ''; + return [ + '', + '## Inline flow trace', + '', + `Auto-traced \`${fromSym}\` → \`${toSym}\` because the query looks like a flow question. No follow-up codegraph_trace is needed for this pair.`, + '', + body, + ].join('\n'); } /** @@ -1232,41 +1532,185 @@ export class ToolHandler { // (which, on real code, means the flow breaks at dynamic dispatch). const edgeKinds: Edge['kind'][] = ['calls']; const MAX_HOPS = 7; - const fromTry = fromMatches.nodes.slice(0, 3); - const toTry = toMatches.nodes.slice(0, 3); + // Path-proximity pairing: in a multi-module repo a symbol name like + // `EndBlocker` exists in 20+ modules. FTS picks one almost arbitrarily; + // the WRONG pair (e.g. simapp's wrapper EndBlocker paired with gov's Tally) + // has no static path, falls through to the dynamic-dispatch failure branch, + // and surfaces unrelated bodies — exactly the cosmos-Q3 trace failure mode. + // Score every from×to combo by shared file-path prefix length; try the + // most-co-located pair first (e.g. `x/gov/abci.go::EndBlocker` × + // `x/gov/keeper/tally.go::Tally` share `x/gov/`). + // + // Consider the FULL candidate set, not just the FTS top-5: the right + // EndBlocker for a gov-module flow may rank 8th in FTS but share the + // entire `x/gov/` prefix with the destination. Path-proximity supersedes + // FTS for this disambiguation. Findpath trials are still capped by + // FINDPATH_PAIR_BUDGET below to bound graph traversal cost. + const sharedDirPrefixLen = (a: string, b: string): number => { + const aDir = a.replace(/[^/]+$/, ''); + const bDir = b.replace(/[^/]+$/, ''); + let i = 0; + while (i < aDir.length && i < bDir.length && aDir[i] === bDir[i]) i++; + return i; + }; + // Cosmos-Q3 surfaced a second-order failure: `enterprise/group/x/group/` + // SHARES MORE of its path with `enterprise/group/x/group/keeper/tally.go` + // (24 chars) than `x/gov/abci.go` shares with `x/gov/keeper/tally.go` + // (6 chars), so pure shared-prefix prefers the side-experiment module + // over the canonical one — even though the user's question is clearly + // about the main gov module. Penalize candidates living under prefixes + // that conventionally hold extensions / experiments / vendored code, so + // the canonical-path pair wins even when its shared prefix is short. + const isLessCanonicalPath = (p: string): boolean => + /^(enterprise|contrib|examples?|sample|playground|vendor|third[_-]?party|deprecated|legacy)\//i.test(p); + const LESS_CANONICAL_PENALTY = 100; // any canonical candidate beats any less-canonical one + const scorePair = (a: string, b: string): number => + sharedDirPrefixLen(a, b) + - (isLessCanonicalPath(a) ? LESS_CANONICAL_PENALTY : 0) + - (isLessCanonicalPath(b) ? LESS_CANONICAL_PENALTY : 0); + const fromCands = fromMatches.nodes; + const toCands = toMatches.nodes; + const pairs: Array<{ f: Node; t: Node; score: number }> = []; + for (const f of fromCands) { + for (const t of toCands) { + pairs.push({ f, t, score: scorePair(f.filePath, t.filePath) }); + } + } + // Sort by shared prefix desc, then by FTS order (already encoded in the + // pairs' insertion order — both for f and t). The tiebreaker preserves + // findAllSymbols' generated-file-last ranking. + pairs.sort((a, b) => b.score - a.score); + // Cap how many graph-path probes we attempt so a 50×50 cross-product + // doesn't blow up on a god-named symbol like `Get` (well-named flows have + // their good pair near the top of the sort anyway). + const FINDPATH_PAIR_BUDGET = 20; + const fromTry = fromCands; + const toTry = toCands; let path: Array<{ node: Node; edge: Edge | null }> | null = null; let overCap: Array<{ node: Node; edge: Edge | null }> | null = null; - for (const f of fromTry) { - for (const t of toTry) { - const p = cg.findPath(f.id, t.id, edgeKinds); - if (!p || p.length <= 1) continue; - if (p.length <= MAX_HOPS) { path = p; break; } - if (!overCap || p.length < overCap.length) overCap = p; - } + let bestPair: { f: Node; t: Node } | null = null; + let triedPairs = 0; + for (const { f, t } of pairs) { if (path) break; + if (triedPairs >= FINDPATH_PAIR_BUDGET) break; + triedPairs++; + const p = cg.findPath(f.id, t.id, edgeKinds); + if (p && p.length > 1) { + if (p.length <= MAX_HOPS) { path = p; bestPair = { f, t }; break; } + if (!overCap || p.length < overCap.length) { overCap = p; bestPair = { f, t }; } + } else if (!bestPair) { + // No path yet — remember the top-scored pair so the failure branch + // surfaces the most-co-located candidates' bodies, not whatever FTS + // happened to put first. + bestPair = { f, t }; + } } if (!path) { - // No static path — almost always a dynamic-dispatch break. Surface the - // start symbol's outgoing calls so the agent can bridge the gap. - const start = fromTry[0]!; - const callees = cg.getCallees(start.id).slice(0, 10) - .map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`); + // No static path — almost always a dynamic-dispatch break. INSTEAD of + // telling the agent to chase the gap with codegraph_node/callers/callees + // (which fans out into 3-4 follow-up tool calls + a Read), inline the + // material those would have returned right here. Measured on cosmos-Q3: + // the failed-trace + subsequent fan-out used to cost ~2× a single + // sufficient trace call; this branch closes that gap. + // Prefer the path-proximity-best pair we identified above (e.g. gov's + // EndBlocker × gov's Tally) over the FTS top-pick (simapp's wrapper). + const start = bestPair?.f ?? fromTry[0]!; + const end = bestPair?.t ?? toTry[0]!; + const fileCache = new Map(); const lines = [ - `No direct call path from "${from}" to "${to}".`, + `No direct static call path from "${from}" to "${to}" — the chain almost certainly breaks at dynamic dispatch (a callback / interface dispatch / framework hook / metaclass). Both endpoint bodies + their immediate neighbors are inlined below; answer from them — a follow-up codegraph_node/callers/callees on these would just return what is already here.`, '', - (overCap - ? `(Only a ${overCap.length}-hop indirect chain connects them — almost certainly a BFS wander through unrelated code, not the real flow.) ` - : '') + - 'The direct chain most likely breaks at **dynamic dispatch** (a callback, descriptor, ' + - 'metaclass, or attribute-as-callable) that static parsing cannot resolve into an edge. ' + - `Inspect \`${start.name}\` (${start.filePath}:${start.startLine}) with codegraph_node ` + - '(includeCode=true) — its body usually shows the dynamic call to follow next.', ]; - if (callees.length > 0) { - lines.push('', `**${start.name} statically calls:** ${callees.join(', ')}`); + if (overCap) { + lines.push( + `> Indirect chain of ${overCap.length} hops exists but is over the ${MAX_HOPS}-hop cap (usually a BFS wander through unrelated code, not the real execution flow).`, + '', + ); } - return this.textResult(lines.join('\n') + fromMatches.note + toMatches.note); + + // Track which node IDs we've already inlined a body for so we don't + // double-emit when a callee of FROM is also surfaced separately. + const inlinedBodies = new Set(); + const inlineBody = (n: Node, lineCap: number, charCap: number): boolean => { + if (inlinedBodies.has(n.id)) return false; + inlinedBodies.add(n.id); + const body = this.sourceRangeAt(cg, n.filePath, n.startLine, n.endLine, fileCache, lineCap, charCap); + if (body) { lines.push(body); return true; } + return false; + }; + + const inlineEndpoint = ( + label: 'FROM' | 'TO', + node: Node, + ) => { + lines.push(`### ${label}: \`${node.name}\` (${node.filePath}:${node.startLine}-${node.endLine})`); + inlineBody(node, 120, 3600); + const callers = cg.getCallers(node.id).slice(0, 6); + if (callers.length > 0) { + lines.push(`**Callers of \`${node.name}\`:** ` + + callers.map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`).join(', ')); + } + const callees = cg.getCallees(node.id).slice(0, 8); + if (callees.length > 0) { + lines.push(`**\`${node.name}\` calls:** ` + + callees.map(c => `${c.node.name} (${c.node.filePath}:${c.node.startLine})`).join(', ')); + } + lines.push(''); + }; + inlineEndpoint('FROM', start); + if (end.id !== start.id) inlineEndpoint('TO', end); + + // Inline the OTHER top-level functions/methods in TO's file — that's + // where the missing dynamic-dispatch flow usually lives. Concrete + // measurement from cosmos-Q1: `msgServer.Send` statically calls only + // utility functions (`StringToBytes`, `Wrapf`); its real next-hop + // `SendCoins` is invoked via an embedded-interface call (`k.Keeper.SendCoins`) + // that static parsing CAN'T see. The flow IS in the same file as the + // destination (`x/bank/keeper/send.go`: SendCoins → subUnlockedCoins → + // addCoins → setBalance). Pre-inlining those file-mates is what + // replaces the agent's "trace fail → search SendCoins → node SendCoins + // → trace again" fan-out. + const NEIGHBOR_LINES = 40; + const NEIGHBOR_CHARS = 1200; + const NEIGHBOR_K = 5; + const fileSiblings = (anchor: Node): Node[] => { + // Functions and methods in the same file as the anchor, excluding + // the anchor itself and anything we've already inlined. Sort by + // distance from the anchor's startLine so the closest symbols come + // first (the flow is usually adjacent in the file). + const sameFile = cg + .getNodesByKind('function') + .filter((n) => n.filePath === anchor.filePath) + .concat( + cg.getNodesByKind('method').filter((n) => n.filePath === anchor.filePath), + ); + return sameFile + .filter((n) => n.id !== anchor.id && !inlinedBodies.has(n.id)) + .sort((a, b) => + Math.abs(a.startLine - anchor.startLine) - Math.abs(b.startLine - anchor.startLine), + ) + .slice(0, NEIGHBOR_K); + }; + const renderSiblings = (label: string, siblings: Node[]) => { + if (siblings.length === 0) return; + lines.push(`### ${label}`); + for (const sib of siblings) { + lines.push(''); + lines.push(`- \`${sib.name}\` (${sib.filePath}:${sib.startLine}-${sib.endLine})`); + inlineBody(sib, NEIGHBOR_LINES, NEIGHBOR_CHARS); + } + lines.push(''); + }; + renderSiblings( + `Other functions in \`${end.filePath}\` (the flow that the dynamic-dispatch hop reaches — bodies inlined)`, + fileSiblings(end), + ); + + lines.push( + '> Endpoint bodies + the other functions in the destination\'s file are inlined above. Together they typically cover the missing dynamic-dispatch boundary (interface-method calls like `k.Keeper.SendCoins` that static parsing can\'t follow). **No further codegraph_node / codegraph_callers / codegraph_callees / Read / Grep is needed for any symbol already shown here** — call them again only if you need to walk DEEPER than what is inlined.', + ); + return this.textResult(this.truncateOutput(lines.join('\n') + fromMatches.note + toMatches.note)); } const lines: string[] = [ @@ -1649,11 +2093,52 @@ export class ToolHandler { } // Only include files that have entry points or nodes directly connected to entry points - const relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3); + let relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3); // Extract query terms for relevance checking const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3); + // Test/spec/icon/i18n file detector — used both for the pre-sort hard + // filter (tiny tier) and the comparator deprioritization (all tiers). + const isLowValue = (p: string) => { + const lp = p.toLowerCase(); + return ( + /\/(tests?|__tests?__|spec)\//.test(lp) || + /_test\.go$/.test(lp) || + /(?:^|\/)test_[^/]+\.py$/.test(lp) || + /_test\.py$/.test(lp) || + /_spec\.rb$/.test(lp) || + /_test\.rb$/.test(lp) || + /\.(test|spec)\.[jt]sx?$/.test(lp) || + /(test|spec|tests)\.(java|kt|scala)$/.test(lp) || + /(tests?|spec)\.cs$/.test(lp) || + /tests?\.swift$/.test(lp) || + /_test\.dart$/.test(lp) || + /\bicons?\b/.test(lp) || + /\bi18n\b/.test(lp) + ); + }; + + // Tiny-tier hard-exclude: on small projects (`excludeLowValueFiles` + // budget flag), one slipped test/spec file dominates the per-file budget + // (cobra's `command_test.go` displaced `args.go` and contributed ~10KB of + // pure noise to "How does cobra parse commands?"). The sort-step + // deprioritization isn't enough at small N. Skip the hard-exclude when + // the query itself is about tests — that's the legitimate "explore the + // tests" case where the agent does want them. + if (budget.excludeLowValueFiles) { + const queryMentionsTests = /\b(test|tests|testing|spec|verify|verifies)\b/i.test(query); + if (!queryMentionsTests) { + const nonLow = relevantFiles.filter(([p]) => !isLowValue(p)); + // Only apply the hard-filter if we still have at least 2 non-test + // candidates after the cut — otherwise the agent is asking about an + // area where tests are the only signal, and we should not strip them. + if (nonLow.length >= 2) { + relevantFiles = nonLow; + } + } + } + // Sort files: highest relevance first, deprioritize low-value files const sortedFiles = relevantFiles.sort((a, b) => { const aPath = a[0].toLowerCase(); @@ -1670,15 +2155,20 @@ export class ToolHandler { const bRelevant = hasQueryRelevance(bPath, b[1].nodes); if (aRelevant !== bRelevant) return aRelevant ? -1 : 1; - // Deprioritize test files, icon files, and i18n files - const isLowValue = (p: string) => - /\/(tests?|__tests?__|spec)\//i.test(p) || - /\bicons?\b/i.test(p) || - /\bi18n\b/i.test(p); const aLow = isLowValue(aPath); const bLow = isLowValue(bPath); if (aLow !== bLow) return aLow ? 1 : -1; + // Deprioritize generated source (.pb.go / .pulsar.go / _mocks.go / …) — + // the agent rarely needs to see the protobuf scaffold or gomock output + // when asking about the actual flow, and dumping their bodies inflates + // the response (the cosmos Q3 explore otherwise leads with + // `expected_keepers_mocks.go`, displacing the real `tally.go` content + // and forcing the agent to Read tally.go anyway). + const aGen = isGeneratedFile(a[0]); + const bGen = isGeneratedFile(b[0]); + if (aGen !== bGen) return aGen ? 1 : -1; + if (a[1].score !== b[1].score) return b[1].score - a[1].score; return b[1].nodes.length - a[1].nodes.length; }); @@ -2519,12 +3009,21 @@ export class ToolHandler { } if (exactMatches.length > 1) { + // Down-rank generated files (.pb.go, .pulsar.go, _grpc.pb.go, …) + // so a query like "Send" prefers the keeper implementation over + // the protobuf-generated interface stub. Stable sort preserves + // FTS order within each group. See generated-detection.ts. + const ranked = [...exactMatches].sort((a, b) => { + const aGen = isGeneratedFile(a.node.filePath) ? 1 : 0; + const bGen = isGeneratedFile(b.node.filePath) ? 1 : 0; + return aGen - bGen; + }); // Multiple exact matches - pick first, note the others - const picked = exactMatches[0]!.node; - const others = exactMatches.slice(1).map(r => + const picked = ranked[0]!.node; + const others = ranked.slice(1).map(r => `${r.node.name} (${r.node.kind}) at ${r.node.filePath}:${r.node.startLine}` ); - const note = `\n\n> **Note:** ${exactMatches.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`; + const note = `\n\n> **Note:** ${ranked.length} symbols named "${symbol}". Showing results for \`${picked.filePath}:${picked.startLine}\`. Others: ${others.join(', ')}`; return { node: picked, note }; } @@ -2562,11 +3061,20 @@ export class ToolHandler { return { nodes: [node], note: '' }; } - const locations = exactMatches.map(r => + // Same generated-file down-rank as findSymbol — keeps callers/callees + // /impact aggregation aligned (a query against "Send" returns the + // hand-written implementations before the protobuf scaffold). + const ranked = [...exactMatches].sort((a, b) => { + const aGen = isGeneratedFile(a.node.filePath) ? 1 : 0; + const bGen = isGeneratedFile(b.node.filePath) ? 1 : 0; + return aGen - bGen; + }); + + const locations = ranked.map(r => `${r.node.kind} at ${r.node.filePath}:${r.node.startLine}` ); - const note = `\n\n> **Note:** Aggregated results across ${exactMatches.length} symbols named "${symbol}": ${locations.join(', ')}`; - return { nodes: exactMatches.map(r => r.node), note }; + const note = `\n\n> **Note:** Aggregated results across ${ranked.length} symbols named "${symbol}": ${locations.join(', ')}`; + return { nodes: ranked.map(r => r.node), note }; } /** diff --git a/src/resolution/callback-synthesizer.ts b/src/resolution/callback-synthesizer.ts index c3047569e..def7ff6fe 100644 --- a/src/resolution/callback-synthesizer.ts +++ b/src/resolution/callback-synthesizer.ts @@ -24,6 +24,7 @@ import type { Edge, Node } from '../types'; import type { QueryBuilder } from '../db/queries'; import type { ResolutionContext } from './types'; +import { isGeneratedFile } from '../extraction/generated-detection'; const REGISTRAR_NAME = /^(on[A-Z]\w*|subscribe|addListener|addEventListener|register|watch|listen|addCallback)$/; const DISPATCHER_NAME = /(emit|trigger|notify|dispatch|fire|publish|flush)/i; @@ -337,7 +338,16 @@ function cppOverrideEdges(queries: QueryBuilder): Edge[] { * trace/callees reach the implementation. Over-approximation accepted * (reachability-correct); capped per class, gated to JVM languages. */ -const IFACE_OVERRIDE_LANGS = new Set(['java', 'kotlin']); +// Languages whose static `implements`/`extends` edges should bridge an +// interface (or abstract base) method to the matching concrete-class method. +// The set is "languages with explicit nominal subtyping and a single class +// kind that holds methods" — i.e. the shape this loop expects. Swift and +// Scala fit shape-wise (Swift `protocol`/`class`, Scala `trait`/`class`) +// and are added below; their concrete-side nodes can be a `struct` (Swift) +// or an `object` (Scala) so the loop also iterates those kinds. +const IFACE_OVERRIDE_LANGS = new Set([ + 'java', 'kotlin', 'csharp', 'typescript', 'javascript', 'swift', 'scala', +]); function interfaceOverrideEdges(queries: QueryBuilder): Edge[] { const edges: Edge[] = []; const seen = new Set(); @@ -346,7 +356,12 @@ function interfaceOverrideEdges(queries: QueryBuilder): Edge[] { .getOutgoingEdges(classId, ['contains']) .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); - for (const cls of queries.getNodesByKind('class')) { + // Concrete-side kinds vary by language: `class` covers Java / Kotlin / + // C# / TS / Swift-classes / Scala-classes; `struct` covers Swift value + // types that conform to protocols. Iterate both. + const concreteKinds = ['class', 'struct'] as const; + for (const kind of concreteKinds) { + for (const cls of queries.getNodesByKind(kind)) { const implMethods = methodsOf(cls.id).filter((n) => IFACE_OVERRIDE_LANGS.has(n.language)); if (implMethods.length === 0) continue; for (const sup of queries.getOutgoingEdges(cls.id, ['implements', 'extends'])) { @@ -383,6 +398,116 @@ function interfaceOverrideEdges(queries: QueryBuilder): Edge[] { } } } + } + return edges; +} + +/** + * Go gRPC stub → impl bridge. The protoc-gen-go-grpc codegen emits an + * `UnimplementedXxxServer` struct in `*_grpc.pb.go` carrying one method + * per service RPC; the real handler is a hand-written struct in another + * file (`x/bank/keeper/msg_server.go::msgServer.Send` in cosmos-sdk). + * Go's structural typing means no `implements` edge exists for our + * resolver to follow, so `trace("Send","SendCoins")` lands on the + * empty stub and reports "no path" (validated empirically — the cosmos + * Q1 r1 trace failure that drove this work). + * + * Bridge: for each `UnimplementedXxxServer` whose RPC-method names are + * a SUBSET of some other Go struct's method names, emit `calls` edges + * `stub.method → impl.method` (paired by name). Excludes the gRPC + * internal markers `mustEmbedUnimplementedXxxServer` and + * `testEmbeddedByValue`, and skips candidate impls that themselves + * live in a generated file (their `xxxClient` / sibling stubs would + * otherwise look like impls). + * + * Multiple candidates is allowed and capped at MAX_CALLBACKS_PER_CHANNEL — + * a service often has both a production impl and one or more test + * mocks; linking to all preserves trace utility without false-favoring. + * + * Provenance: `heuristic`, `synthesizedBy: 'go-grpc-stub-impl'`. The + * stub's source line is the wiring site shown in the trace trail. + */ +function goGrpcStubImplEdges(queries: QueryBuilder): Edge[] { + const edges: Edge[] = []; + const seen = new Set(); + + const STUB_RE = /^Unimplemented.*Server$/; + // gRPC internal-helper methods that appear on every Unimplemented*Server; + // not part of the service contract, so exclude when computing the RPC-method + // signature used to match impls. + const isInternalMarker = (n: string) => n.startsWith('mustEmbed') || n === 'testEmbeddedByValue'; + + // Methods directly contained by each Go struct, name-only. Built once. + const methodNamesByStruct = new Map>(); + const methodNodesByStruct = new Map(); + const goStructs: Node[] = []; + for (const s of queries.getNodesByKind('struct')) { + if (s.language !== 'go') continue; + goStructs.push(s); + const ms = queries + .getOutgoingEdges(s.id, ['contains']) + .map((e) => queries.getNodeById(e.target)) + .filter((n): n is Node => !!n && n.kind === 'method'); + methodNodesByStruct.set(s.id, ms); + methodNamesByStruct.set(s.id, new Set(ms.map((m) => m.name))); + } + + for (const stub of goStructs) { + if (!STUB_RE.test(stub.name)) continue; + // The stub MUST live in a generated file — that's what tells us this is + // a protoc-emitted scaffold rather than someone naming a struct + // `UnimplementedXxxServer` by hand. Without this gate we'd also bridge + // such hand-written structs and create misleading edges. + if (!isGeneratedFile(stub.filePath)) continue; + + const stubMethods = (methodNodesByStruct.get(stub.id) ?? []).filter( + (m) => !isInternalMarker(m.name), + ); + if (stubMethods.length === 0) continue; + const stubMethodNames = stubMethods.map((m) => m.name); + + for (const cand of goStructs) { + if (cand.id === stub.id) continue; + // Skip generated-file candidates — they're siblings (msgClient, + // UnsafeMsgServer, …) whose method sets coincidentally match. + if (isGeneratedFile(cand.filePath)) continue; + + const candNames = methodNamesByStruct.get(cand.id); + if (!candNames) continue; + // Subset: every RPC method must exist on the candidate by name. + // Signature-level match would tighten this further, but name-match + // alone already gives one-to-one pairing in real codebases because + // gRPC method-name sets are highly distinctive (Send + MultiSend + + // UpdateParams + SetSendEnabled is unique to bank's MsgServer). + if (!stubMethodNames.every((n) => candNames.has(n))) continue; + + const candMethods = methodNodesByStruct.get(cand.id) ?? []; + let added = 0; + for (const sm of stubMethods) { + if (added >= MAX_CALLBACKS_PER_CHANNEL) break; + for (const cm of candMethods) { + if (added >= MAX_CALLBACKS_PER_CHANNEL) break; + if (cm.name !== sm.name) continue; + const key = `${sm.id}>${cm.id}`; + if (seen.has(key)) continue; + seen.add(key); + edges.push({ + source: sm.id, + target: cm.id, + kind: 'calls', + line: sm.startLine, + provenance: 'heuristic', + metadata: { + synthesizedBy: 'go-grpc-stub-impl', + via: cm.name, + registeredAt: `${cm.filePath}:${cm.startLine}`, + }, + }); + added++; + } + } + } + } return edges; } @@ -856,6 +981,7 @@ export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionCo const flutterEdges = flutterBuildEdges(queries, ctx); const cppEdges = cppOverrideEdges(queries); const ifaceEdges = interfaceOverrideEdges(queries); + const goGrpcEdges = goGrpcStubImplEdges(queries); const rnEventEdgesList = rnEventEdges(ctx); const fabricNativeEdges = fabricNativeImplEdges(ctx); const mybatisEdges = mybatisJavaXmlEdges(queries); @@ -871,6 +997,7 @@ export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionCo ...flutterEdges, ...cppEdges, ...ifaceEdges, + ...goGrpcEdges, ...rnEventEdgesList, ...fabricNativeEdges, ...mybatisEdges, From cea78ceb1b989f7f1bf5b4c20b50d67601cc29f3 Mon Sep 17 00:00:00 2001 From: csw-chen <66931558+csw-chen@users.noreply.github.com> Date: Fri, 29 May 2026 02:12:54 +0800 Subject: [PATCH 05/10] fix(windows): suppress console popup on child_process calls (#498) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, v0.9.5's detached shared daemon (#411) has no inherited console, so any console-subsystem child it spawns gets a fresh visible console window unless the spawn passes `windowsHide: true`. The fix adds the flag to all ten `spawnSync` / `execFileSync` / `execSync` call sites across extraction, sync, installer, and the WASM-flags relaunch. macOS/Linux ignore the option, so this is a no-op elsewhere. Fixes #485, #510, #530. Co-authored work: - #498 (csw-chen) — full sweep across extraction, sync, installer, and wasm-runtime. **This is the change being merged.** - #505 (yushengruohui) — independently identified and fixed the 7 git execFileSync sites. Superseded by #498's broader sweep; same diagnosis. - #521 (JirA44) — independently identified and fixed the WASM-runtime spawnSync re-exec. Superseded by #498's broader sweep; same diagnosis. Validated on Windows 11 ARM64 (Parallels): a detached parent's 15 git spawns produce 15 visible black flash-windows without the fix and 0 with it. --- CHANGELOG.md | 14 ++++++++++++++ src/extraction/index.ts | 8 ++++---- src/extraction/wasm-runtime-flags.ts | 1 + src/installer/index.ts | 2 +- src/installer/targets/antigravity.ts | 1 + src/mcp/engine.ts | 3 +-- src/sync/git-hooks.ts | 2 ++ src/sync/worktree.ts | 1 + 8 files changed, 25 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ecf14e00..8d3bb14d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -167,6 +167,20 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). calls pay nothing. Most visible on the "deleted everything between sessions" case, where MCP now returns the correct empty index instead of stale rows. Validated end-to-end on a 10,640-file VS Code index. +- **Windows: black console windows no longer flash on every file save / MCP + reconnect (#485, #510, #530).** v0.9.5 moved the MCP server to a detached + shared daemon (#411). Detached processes have no inherited console on + Windows, so any console-subsystem child they spawn (the daemon's `git` + invocations during auto-sync, the WASM-runtime `node` re-exec, the + installer's `npm` shell-out) is created with a fresh console window + visible to the user unless the spawn passes `windowsHide: true` (which + libuv translates to `STARTF_USESHOWWINDOW | SW_HIDE`, so the window is + created hidden and never flashes). All ten `spawnSync` / `execFileSync` / + `execSync` call sites across extraction, sync, installer, and the + WASM-flags relaunch now pass `windowsHide: true`. macOS/Linux ignore the + option, so this is a no-op elsewhere. The daemon launcher itself + (`src/mcp/index.ts`) already passed the flag — these children had been + missed. - **`codegraph index` / `init -i` summary now reports the true edge count.** The per-file counter in the orchestrator only saw extraction-phase edges, so resolution and synthesizer edges (often >50% of the graph on diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 95e47e85f..db69d25ad 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -191,7 +191,7 @@ export function buildDefaultIgnore(rootDir: string): Ignore { * (See issue #193.) */ function collectGitFiles(repoDir: string, prefix: string, files: Set): void { - const gitOpts = { cwd: repoDir, encoding: 'utf-8' as const, timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] as ['pipe', 'pipe', 'pipe'] }; + const gitOpts = { cwd: repoDir, encoding: 'utf-8' as const, timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] as ['pipe', 'pipe', 'pipe'], windowsHide: true }; // Tracked files. --recurse-submodules pulls in files from active submodules, // which the index would otherwise represent only as a commit pointer. @@ -241,7 +241,7 @@ function getGitVisibleFiles(rootDir: string): Set | null { const gitRoot = execFileSync( 'git', ['rev-parse', '--show-toplevel'], - { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] } + { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true } ).trim(); if (path.resolve(gitRoot) !== path.resolve(rootDir)) { @@ -250,7 +250,7 @@ function getGitVisibleFiles(rootDir: string): Set | null { execFileSync( 'git', ['check-ignore', '-q', path.resolve(rootDir)], - { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] } + { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true } ); // Directory is gitignored by parent repo — fall back to filesystem walk return null; @@ -291,7 +291,7 @@ function getGitChangedFiles(rootDir: string): GitChanges | null { const output = execFileSync( 'git', ['status', '--porcelain', '--no-renames'], - { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] } + { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true } ); const modified: string[] = []; diff --git a/src/extraction/wasm-runtime-flags.ts b/src/extraction/wasm-runtime-flags.ts index e44c84d8d..c1b30a63e 100644 --- a/src/extraction/wasm-runtime-flags.ts +++ b/src/extraction/wasm-runtime-flags.ts @@ -98,6 +98,7 @@ export function relaunchWithWasmRuntimeFlagsIfNeeded(scriptPath: string): void { const result = spawnSync(process.execPath, argv, { stdio: 'inherit', env: { ...process.env, [RELAUNCH_GUARD_ENV]: '1', [HOST_PPID_ENV]: String(process.ppid) }, + windowsHide: true, }); if (result.error) { diff --git a/src/installer/index.ts b/src/installer/index.ts index ce102aa2e..5893f7b6e 100644 --- a/src/installer/index.ts +++ b/src/installer/index.ts @@ -119,7 +119,7 @@ export async function runInstallerWithOptions(opts: RunInstallerOptions): Promis const s = clack.spinner(); s.start('Installing codegraph CLI...'); try { - execSync('npm install -g @colbymchenry/codegraph', { stdio: 'pipe' }); + execSync('npm install -g @colbymchenry/codegraph', { stdio: 'pipe', windowsHide: true }); s.stop('Installed codegraph CLI on PATH'); } catch { s.stop('Could not install (permission denied)'); diff --git a/src/installer/targets/antigravity.ts b/src/installer/targets/antigravity.ts index 9ecc4bc8c..1c128491a 100644 --- a/src/installer/targets/antigravity.ts +++ b/src/installer/targets/antigravity.ts @@ -124,6 +124,7 @@ function resolveCodegraphCommand(): string { encoding: 'utf-8', stdio: ['ignore', 'pipe', 'ignore'], shell: '/bin/bash', + windowsHide: true, }).trim(); if (resolved && fs.existsSync(resolved)) return resolved; } catch { diff --git a/src/mcp/engine.ts b/src/mcp/engine.ts index 9ba89da1e..193f2bbd0 100644 --- a/src/mcp/engine.ts +++ b/src/mcp/engine.ts @@ -147,8 +147,7 @@ export class MCPEngine { const resolvedRoot = findNearestCodeGraphRoot(searchFrom); if (!resolvedRoot) { - // No .codegraph/ above searchFrom — that's not an error, sessions may - // still discover one later via roots/list. + // No .codegraph/ above searchFrom. Sessions may still discover one later via roots/list this.projectPath = searchFrom; return; } diff --git a/src/sync/git-hooks.ts b/src/sync/git-hooks.ts index 3344c5ff9..a657d7545 100644 --- a/src/sync/git-hooks.ts +++ b/src/sync/git-hooks.ts @@ -44,6 +44,7 @@ export function isGitRepo(projectRoot: string): boolean { cwd: projectRoot, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'], + windowsHide: true, }).trim(); return out === 'true'; } catch { @@ -61,6 +62,7 @@ function gitHooksDir(projectRoot: string): string | null { cwd: projectRoot, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'], + windowsHide: true, }).trim(); if (!out) return null; return path.isAbsolute(out) ? out : path.resolve(projectRoot, out); diff --git a/src/sync/worktree.ts b/src/sync/worktree.ts index 27bfca546..bf370b178 100644 --- a/src/sync/worktree.ts +++ b/src/sync/worktree.ts @@ -35,6 +35,7 @@ export function gitWorktreeRoot(dir: string): string | null { cwd: dir, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'], + windowsHide: true, }).trim(); return out ? realpath(out) : null; } catch { From a9c9e76d8c77b114886bdc2e4c5283df546baa20 Mon Sep 17 00:00:00 2001 From: Colby Mchenry Date: Thu, 28 May 2026 15:13:23 -0500 Subject: [PATCH 06/10] fix(installer): stop duplicating agent instructions; MCP server is the single source of truth (#529) (#538) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The installer wrote a `## CodeGraph` usage block into each agent's instructions file (CLAUDE.md / AGENTS.md / GEMINI.md / .cursor/rules / Kiro steering) that duplicated, almost verbatim, the guidance the MCP server already emits in its `initialize` response — so agents that surface MCP instructions (Claude Code) read the same playbook twice every turn. All 6 instruction-writing targets (claude, cursor, codex, opencode, gemini, kiro) now stop writing the block. install self-heals by stripping a block a previous version wrote (uninstall already did), so the next `codegraph install`/`uninstall` cleans up existing installs; upgrading the package alone does not (the leftover block is harmless). server-instructions.ts is now the single source of truth — the two steers unique to the old template ("trust codegraph, don't re-verify with grep" and the not-initialized -> `init -i` hint) are ported there. Removes the now-dead INSTRUCTIONS_TEMPLATE / CLAUDE_MD_TEMPLATE, claude-md-template.ts, writeClaudeMd / hasClaudeMdSection, and the Cursor-only wireProjectSurfaces bootstrap. The install log learned a "Removed" verb. Tests rewritten to the new contract + self-heal coverage (140/140 installer tests pass). Co-authored-by: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 18 +++ CLAUDE.md | 9 +- README.md | 44 ++---- __tests__/installer-targets.test.ts | 198 +++++++++++++++++-------- __tests__/installer.test.ts | 124 +--------------- src/bin/codegraph.ts | 23 --- src/installer/claude-md-template.ts | 19 --- src/installer/config-writer.ts | 33 +---- src/installer/index.ts | 40 +---- src/installer/instructions-template.ts | 68 ++------- src/installer/targets/claude.ts | 73 +++------ src/installer/targets/codex.ts | 36 ++--- src/installer/targets/cursor.ts | 59 +------- src/installer/targets/gemini.ts | 37 ++--- src/installer/targets/kiro.ts | 40 ++--- src/installer/targets/opencode.ts | 37 ++--- src/installer/targets/types.ts | 15 -- src/mcp/server-instructions.ts | 2 + 18 files changed, 282 insertions(+), 593 deletions(-) delete mode 100644 src/installer/claude-md-template.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d3bb14d7..9535c9a96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -154,6 +154,24 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). bodies). An agent investigating `Splitter.SplittingIterator.separatorStart` now sees the four anonymous overrides in its trail without a Read. +### Changed +- **The installer no longer writes a `## CodeGraph` instructions block into + your agent's instructions file** (`CLAUDE.md`, `AGENTS.md`, `GEMINI.md`, + Cursor's `.cursor/rules/codegraph.mdc`, or Kiro's steering doc). That block + duplicated, almost verbatim, the usage guidance the MCP server already + emits in its `initialize` response — so every agent that surfaces MCP + instructions (Claude Code does) read the same playbook twice each turn + (#529). The MCP server instructions are now the single source of truth. + `codegraph install` stops writing the block, and **the next time you run + `codegraph install` (or `codegraph uninstall`) it strips a block a previous + version wrote**, preserving everything else in the file (and deleting Cursor + `.mdc` / Kiro steering files that were ours outright). Note: simply upgrading + the npm package does not remove an existing block — re-run the installer to + clean it up. The leftover block is harmless meanwhile (just redundant with + the MCP instructions). If you'd added your own notes inside the + ``/`` markers, move them outside + the markers first — only the marked block is removed. + ### Fixed - **MCP tools no longer return rows for files deleted while no server was running.** The post-open catch-up sync that reconciles the index against diff --git a/CLAUDE.md b/CLAUDE.md index 6636bf606..a3c93d29b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -74,12 +74,11 @@ Defined in `src/types.ts`. Both extractors and resolvers must use these exact st `src/installer/` is the entry point for `codegraph install` (and the bare `codegraph`/`npx @colbymchenry/codegraph` invocation). Architecture: - `targets/registry.ts` lists every supported agent. -- `targets/types.ts` defines the `AgentTarget` interface — adding a 5th agent (Continue, Zed, Windsurf…) is **one new file in `targets/` + one entry in `registry.ts`**. Each target owns its config-file location, MCP-server JSON/TOML/JSONC writing, and instructions-file path. +- `targets/types.ts` defines the `AgentTarget` interface — adding a 5th agent (Continue, Zed, Windsurf…) is **one new file in `targets/` + one entry in `registry.ts`**. Each target owns its config-file location and MCP-server JSON/TOML/JSONC writing. (Targets no longer write an instructions file — see below.) - Current targets: `claude.ts`, `cursor.ts`, `codex.ts`, `opencode.ts`. - `targets/toml.ts` is a hand-rolled TOML serializer scoped to `[mcp_servers.codegraph]` (used by Codex). Sibling tables and `[[array_of_tables]]` are preserved verbatim. No new dependency. - opencode reads `opencode.jsonc` by default; the installer prefers existing `.jsonc`, falls back to `.json`, and creates `.jsonc` for greenfield installs. Edits are surgical via `jsonc-parser` so user comments and formatting survive install/re-install/uninstall round-trips. -- `instructions-template.ts` is the agent-agnostic instructions file written to each target (e.g. `CLAUDE.md`, `.cursor/rules/codegraph.mdc`, `~/.codex/AGENTS.md`, `~/.config/opencode/AGENTS.md`). It explicitly says "trust codegraph results, don't re-verify with grep" — earlier versions prescribed Claude-specific "spawn an Explore agent" and confused other agents. -- `claude-md-template.ts` is the legacy Claude-only template, retained for compatibility paths. +- `instructions-template.ts` no longer holds an instructions body — it exports only the ``/`` markers. The installer **stopped writing** a `## CodeGraph` block into each agent's instructions file (`CLAUDE.md` / `~/.codex/AGENTS.md` / `~/.config/opencode/AGENTS.md` / `~/.gemini/GEMINI.md` / `.cursor/rules/codegraph.mdc` / Kiro steering doc) because it duplicated the MCP `initialize` instructions verbatim (issue #529). Each target's `install` (self-heal on upgrade) and `uninstall` use the markers to **strip** a block a previous install left behind. `server-instructions.ts` is the single source of truth for agent-facing guidance. - All installer changes need matching coverage in `__tests__/installer-targets.test.ts` — there are ~47 parameterized contract tests covering install idempotency, sibling preservation, uninstall reverses install, byte-equal re-runs returning `unchanged`, and partial-state recovery for Codex. ### Cursor MCP working-directory quirk @@ -88,7 +87,7 @@ Cursor launches MCP subprocesses with the wrong cwd and doesn't pass `rootUri` i ### MCP server instructions -`src/mcp/server-instructions.ts` is sent back to the agent in the MCP `initialize` response. This is the *first* thing every agent sees about how to use the tools — treat it as the authoritative tool guidance and keep it in sync with `instructions-template.ts` and `.cursor/rules/codegraph.mdc`. +`src/mcp/server-instructions.ts` is sent back to the agent in the MCP `initialize` response. This is the *first* thing every agent sees about how to use the tools, and as of issue #529 it is the **single source of truth** for agent-facing tool guidance — the installer no longer writes a duplicate `## CodeGraph` instructions block into `CLAUDE.md` / `AGENTS.md` / `.cursor/rules/codegraph.mdc`. Edit tool guidance here and nowhere else. ## Retrieval performance & dynamic-dispatch coverage (do not regress) @@ -254,7 +253,7 @@ publish actions on shared state. Write the files, hand the user the commands. ## House rules - The `0.7.x` line is in active multi-agent rollout. Any change to `src/installer/` (especially `targets/`) needs corresponding test coverage and a CHANGELOG entry — installer regressions break every new install silently. -- When changing what the MCP tools do or how agents should use them, update **all three** of `src/mcp/server-instructions.ts`, `src/installer/instructions-template.ts`, and `.cursor/rules/codegraph.mdc` — they're written to different places but say the same thing. +- When changing what the MCP tools do or how agents should use them, edit `src/mcp/server-instructions.ts` — it is the **single source of truth** for agent-facing tool guidance (issue #529). The installer no longer writes a duplicate instructions block into `CLAUDE.md` / `AGENTS.md` / `GEMINI.md` / `.cursor/rules/codegraph.mdc` / Kiro steering, so there's nothing to keep in sync anymore. (The repo's own checked-in `.cursor/rules/codegraph.mdc` is dogfooding config — update it too if you use Cursor on this repo, but it ships nowhere.) - CodeGraph provides **code context**, not product requirements. For new features, ask the user about UX, edge cases, and acceptance criteria — the graph won't tell you. - **When the user references issues, PR comments, or external reports, anchor them to a date and version before drawing conclusions.** Check the comment's `createdAt` against: - The **last released version** — `grep -m1 '^## \[' CHANGELOG.md` shows the top-of-file version (older releases follow). A comment dated before the latest `## [X.Y.Z] - YYYY-MM-DD` is reacting to *released* state — work that's only on `main` or on an unmerged branch doesn't apply. diff --git a/README.md b/README.md index 1db026097..08f2838ce 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ The installer will: - Ask which agent(s) to configure — auto-detects installed ones from: **Claude Code**, **Cursor**, **Codex CLI**, **opencode**, **Hermes Agent**, **Gemini CLI**, **Antigravity IDE**, **Kiro** - Prompt to install `codegraph` on your PATH (so agents can launch the MCP server) - Ask whether configs apply to all your projects or just this one -- Write each chosen agent's MCP server config + an instructions file (e.g. `CLAUDE.md`, `.cursor/rules/codegraph.mdc`, `~/.codex/AGENTS.md`, `~/.gemini/GEMINI.md`) +- Write each chosen agent's MCP server config (the codegraph usage guide is delivered by the MCP server itself, so no instructions file is added to `CLAUDE.md` / `AGENTS.md` / etc.) - Set up auto-allow permissions when Claude Code is one of the targets - Initialize your current project (local installs only) @@ -268,7 +268,7 @@ cd your-project codegraph init -i ``` -Builds the per-project knowledge graph index. Also wires up any project-local agent surfaces (e.g. Cursor's `.cursor/rules/codegraph.mdc`) so a single global `codegraph install` works in every project you open — no need to re-run the installer per project. +Builds the per-project knowledge graph index. A single global `codegraph install` works in every project you open — no need to re-run the installer per project. That's it — your agent will use CodeGraph tools automatically when a `.codegraph/` directory exists. @@ -314,39 +314,16 @@ npm install -g @colbymchenry/codegraph
-Global Instructions Reference +Agent Tool Guidance -The installer automatically adds these instructions to `~/.claude/CLAUDE.md`: +CodeGraph's MCP server delivers its usage guidance to your agent **automatically**, in the MCP `initialize` response — there's no instructions file to manage and nothing is added to your `CLAUDE.md` / `AGENTS.md` / `GEMINI.md`. In short, it tells the agent to: -```markdown -## CodeGraph +- **Answer structural questions directly with CodeGraph** — it *is* the pre-built index, so a grep/read loop just repeats work it already did. Treat the returned source as already read. +- **Pick the tool by intent:** `codegraph_context` to map an area, `codegraph_trace` for "how does X reach Y", `codegraph_explore` to survey several symbols, `codegraph_search` to find a symbol, `codegraph_callers`/`codegraph_callees` to walk call flow, `codegraph_impact` before editing, `codegraph_node` for one symbol's source. +- **Trust the results — don't re-verify with grep**, and check the staleness banner after edits. +- If `.codegraph/` doesn't exist yet, offer to run `codegraph init -i`. -CodeGraph builds a semantic knowledge graph of codebases for faster, smarter code exploration. - -### If `.codegraph/` exists in the project - -**Answer directly with CodeGraph — don't delegate exploration to a file-reading sub-agent or a grep/read loop.** CodeGraph *is* the pre-built search index; re-deriving its answers with grep + Read repeats work it already did and costs more for the same result. For "how does X work?", architecture, trace, or where-is-X questions, answer in a handful of CodeGraph calls and stop — typically with **zero file reads**. The returned source is complete and authoritative: treat it as already read and do not re-open those files. Reach for raw Read/Grep only to confirm a specific detail CodeGraph didn't cover. - -**Tool selection by intent:** - -| Tool | Use For | -|------|---------| -| `codegraph_context` | Map a task / feature / area first — composes search + node + callers + callees in one call | -| `codegraph_trace` | "How does X reach Y" — the call path, each hop's body inline (follows dynamic-dispatch hops grep can't) | -| `codegraph_explore` | Survey several related symbols' source in ONE budget-capped call | -| `codegraph_search` | Find a symbol by name | -| `codegraph_callers` / `codegraph_callees` | Walk call flow one hop at a time | -| `codegraph_impact` | Check what's affected before editing | -| `codegraph_node` | Get a single symbol's source / signature | - -A direct CodeGraph answer is a handful of calls; a grep/read exploration is dozens. - -### If `.codegraph/` does NOT exist - -At the start of a session, ask the user if they'd like to initialize CodeGraph: - -"I notice this project doesn't have CodeGraph initialized. Would you like me to run `codegraph init -i` to build a code knowledge graph?" -``` +The exact text is `src/mcp/server-instructions.ts` — the single source of truth.
@@ -517,7 +494,8 @@ See [Get Started](#get-started) for the one-line install commands. ## Supported Agents The interactive installer auto-detects and configures each of these — wiring up -the MCP server and writing its instructions file: +the MCP server (which delivers its own usage guidance, so no instructions file +is written): - **Claude Code** - **Cursor** diff --git a/__tests__/installer-targets.test.ts b/__tests__/installer-targets.test.ts index 697f8e976..27fcbd6e8 100644 --- a/__tests__/installer-targets.test.ts +++ b/__tests__/installer-targets.test.ts @@ -55,6 +55,18 @@ function setHome(dir: string): { restore: () => void } { }; } +// A marker-delimited CodeGraph block exactly as a previous installer +// wrote it. Issue #529: the installer no longer writes an instructions +// file, but install (self-heal on upgrade) and uninstall both still +// strip a block a prior install left, so we plant this to exercise it. +const LEGACY_BLOCK = [ + '', + '## CodeGraph', + '', + 'Prefer `codegraph_search` / `codegraph_callers` over grep.', + '', +].join('\n'); + describe('Installer targets — contract', () => { let tmpHome: string; let tmpCwd: string; @@ -180,23 +192,35 @@ describe('Installer targets — partial-state idempotency', () => { fs.rmSync(tmpCwd, { recursive: true, force: true }); }); - it('codex: install after only config.toml exists — second pass is fully unchanged', () => { + it('codex: install writes config.toml but never an AGENTS.md instructions file (#529)', () => { const codex = getTarget('codex')!; - // First install creates both files. - codex.install('global', { autoAllow: false }); - // Delete the AGENTS.md to simulate partial state (user wiped one file). + const first = codex.install('global', { autoAllow: false }); const agentsMd = path.join(tmpHome, '.codex', 'AGENTS.md'); - expect(fs.existsSync(agentsMd)).toBe(true); - fs.unlinkSync(agentsMd); - // Reinstall — TOML stays unchanged, AGENTS.md is recreated. + // No instructions file is created, and no file action references it. + expect(fs.existsSync(agentsMd)).toBe(false); + expect(first.files.some((f) => f.path.endsWith('AGENTS.md'))).toBe(false); + expect(first.files.some((f) => f.path.endsWith('config.toml'))).toBe(true); + // Re-install is fully unchanged (config.toml only, nothing to strip). const second = codex.install('global', { autoAllow: false }); - const tomlEntry = second.files.find((f) => f.path.endsWith('config.toml'))!; - const mdEntry = second.files.find((f) => f.path.endsWith('AGENTS.md'))!; - expect(tomlEntry.action).toBe('unchanged'); - expect(mdEntry.action).toBe('created'); - // Third install — both unchanged (full idempotency restored). - const third = codex.install('global', { autoAllow: false }); - for (const f of third.files) expect(f.action).toBe('unchanged'); + for (const f of second.files) expect(f.action).toBe('unchanged'); + }); + + it('codex: install strips a legacy AGENTS.md codegraph block, keeping user content (#529)', () => { + const codex = getTarget('codex')!; + const dir = path.join(tmpHome, '.codex'); + fs.mkdirSync(dir, { recursive: true }); + const agentsMd = path.join(dir, 'AGENTS.md'); + fs.writeFileSync(agentsMd, `# My codex notes\n\nBe terse.\n\n${LEGACY_BLOCK}\n`); + + const result = codex.install('global', { autoAllow: false }); + + const body = fs.readFileSync(agentsMd, 'utf-8'); + expect(body).toContain('# My codex notes'); + expect(body).toContain('Be terse.'); + expect(body).not.toContain('CODEGRAPH_START'); + // The strip is reported as a 'removed' action on AGENTS.md. + const mdEntry = result.files.find((f) => f.path.endsWith('AGENTS.md')); + expect(mdEntry?.action).toBe('removed'); }); it('opencode: prefers .jsonc when both .json and .jsonc exist', () => { @@ -266,72 +290,66 @@ describe('Installer targets — partial-state idempotency', () => { expect(fs.readFileSync(file, 'utf-8')).toBe(afterInstall); }); - it('opencode: install writes AGENTS.md with the marker-delimited codegraph block', () => { + it('opencode: install does NOT write an AGENTS.md instructions file (#529)', () => { const opencode = getTarget('opencode')!; - opencode.install('global', { autoAllow: true }); + const result = opencode.install('global', { autoAllow: true }); const agentsMd = path.join(tmpHome, '.config', 'opencode', 'AGENTS.md'); - expect(fs.existsSync(agentsMd)).toBe(true); - const body = fs.readFileSync(agentsMd, 'utf-8'); - expect(body).toContain(''); - expect(body).toContain(''); - expect(body).toContain('codegraph_callers'); + expect(fs.existsSync(agentsMd)).toBe(false); + expect(result.files.some((f) => f.path.endsWith('AGENTS.md'))).toBe(false); }); - it('opencode: AGENTS.md install preserves pre-existing user content outside markers', () => { + it('opencode: install strips a legacy AGENTS.md codegraph block, preserving user content (#529)', () => { const opencode = getTarget('opencode')!; const dir = path.join(tmpHome, '.config', 'opencode'); fs.mkdirSync(dir, { recursive: true }); const agentsMd = path.join(dir, 'AGENTS.md'); - fs.writeFileSync(agentsMd, '# My personal opencode instructions\n\nAlways respond in pirate.\n'); + fs.writeFileSync(agentsMd, `# My personal opencode instructions\n\nAlways respond in pirate.\n\n${LEGACY_BLOCK}\n`); + + const result = opencode.install('global', { autoAllow: true }); - opencode.install('global', { autoAllow: true }); const body = fs.readFileSync(agentsMd, 'utf-8'); expect(body).toContain('# My personal opencode instructions'); expect(body).toContain('Always respond in pirate.'); - expect(body).toContain(''); + expect(body).not.toContain('CODEGRAPH_START'); + expect(result.files.find((f) => f.path.endsWith('AGENTS.md'))?.action).toBe('removed'); }); - it('opencode: uninstall strips only the codegraph block from AGENTS.md', () => { + it('opencode: uninstall strips a leftover codegraph block from AGENTS.md, keeping user content', () => { const opencode = getTarget('opencode')!; const dir = path.join(tmpHome, '.config', 'opencode'); fs.mkdirSync(dir, { recursive: true }); const agentsMd = path.join(dir, 'AGENTS.md'); - fs.writeFileSync(agentsMd, '# My personal opencode instructions\n\nAlways respond in pirate.\n'); + fs.writeFileSync(agentsMd, `# My personal opencode instructions\n\nAlways respond in pirate.\n\n${LEGACY_BLOCK}\n`); - opencode.install('global', { autoAllow: true }); opencode.uninstall('global'); const body = fs.readFileSync(agentsMd, 'utf-8'); expect(body).toContain('# My personal opencode instructions'); expect(body).toContain('Always respond in pirate.'); expect(body).not.toContain('CODEGRAPH_START'); - expect(body).not.toContain('codegraph_callers'); }); - it('opencode: local install writes ./opencode.jsonc and ./AGENTS.md in cwd', () => { + it('opencode: local install writes ./opencode.jsonc and never an ./AGENTS.md (#529)', () => { const opencode = getTarget('opencode')!; const result = opencode.install('local', { autoAllow: true }); const paths = result.files.map((f) => f.path.replace(/\\/g, '/')); // macOS realpath shenanigans (/var vs /private/var) — suffix match. expect(paths.some((p) => p.endsWith('/opencode.jsonc'))).toBe(true); - expect(paths.some((p) => p.endsWith('/AGENTS.md'))).toBe(true); + expect(paths.some((p) => p.endsWith('/AGENTS.md'))).toBe(false); + expect(fs.existsSync(path.join(process.cwd(), 'AGENTS.md'))).toBe(false); }); - it('gemini: install writes settings.json (mcpServers.codegraph) and GEMINI.md with marker block', () => { + it('gemini: install writes settings.json (mcpServers.codegraph) and no GEMINI.md (#529)', () => { const gemini = getTarget('gemini')!; const result = gemini.install('global', { autoAllow: true }); const settings = path.join(tmpHome, '.gemini', 'settings.json'); const geminiMd = path.join(tmpHome, '.gemini', 'GEMINI.md'); expect(result.files.some((f) => f.path === settings)).toBe(true); - expect(result.files.some((f) => f.path === geminiMd)).toBe(true); + expect(result.files.some((f) => f.path === geminiMd)).toBe(false); + expect(fs.existsSync(geminiMd)).toBe(false); const cfg = JSON.parse(fs.readFileSync(settings, 'utf-8')); expect(cfg.mcpServers.codegraph).toEqual({ type: 'stdio', command: 'codegraph', args: ['serve', '--mcp'] }); - - const md = fs.readFileSync(geminiMd, 'utf-8'); - expect(md).toContain(''); - expect(md).toContain(''); - expect(md).toContain('codegraph_callers'); }); it('gemini: install preserves pre-existing settings (security.auth survives)', () => { @@ -365,45 +383,51 @@ describe('Installer targets — partial-state idempotency', () => { expect(after.mcpServers).toBeUndefined(); }); - it('gemini: local install writes ./.gemini/settings.json and ./GEMINI.md (project root)', () => { + it('gemini: local install writes ./.gemini/settings.json and never a ./GEMINI.md (#529)', () => { const gemini = getTarget('gemini')!; const result = gemini.install('local', { autoAllow: true }); const paths = result.files.map((f) => f.path.replace(/\\/g, '/')); expect(paths.some((p) => p.endsWith('/.gemini/settings.json'))).toBe(true); - // Local GEMINI.md sits at the project root, NOT under .gemini/. - expect(paths.some((p) => p.endsWith('/GEMINI.md') && !p.endsWith('/.gemini/GEMINI.md'))).toBe(true); + expect(paths.some((p) => p.endsWith('/GEMINI.md'))).toBe(false); + expect(fs.existsSync(path.join(process.cwd(), 'GEMINI.md'))).toBe(false); }); - it('gemini: GEMINI.md uninstall preserves user content outside the codegraph markers', () => { + it('gemini: uninstall strips a leftover GEMINI.md codegraph block, keeping user content', () => { const gemini = getTarget('gemini')!; const geminiMd = path.join(tmpHome, '.gemini', 'GEMINI.md'); fs.mkdirSync(path.dirname(geminiMd), { recursive: true }); - fs.writeFileSync(geminiMd, '# My personal Gemini context\n\nAlways respond concisely.\n'); + fs.writeFileSync(geminiMd, `# My personal Gemini context\n\nAlways respond concisely.\n\n${LEGACY_BLOCK}\n`); - gemini.install('global', { autoAllow: true }); gemini.uninstall('global'); const body = fs.readFileSync(geminiMd, 'utf-8'); expect(body).toContain('# My personal Gemini context'); expect(body).toContain('Always respond concisely.'); expect(body).not.toContain('CODEGRAPH_START'); - expect(body).not.toContain('codegraph_callers'); }); - it('kiro: install writes settings/mcp.json (mcpServers.codegraph) and steering/codegraph.md', () => { + it('kiro: install writes settings/mcp.json (mcpServers.codegraph) and no steering doc (#529)', () => { const kiro = getTarget('kiro')!; const result = kiro.install('global', { autoAllow: true }); const mcp = path.join(tmpHome, '.kiro', 'settings', 'mcp.json'); const steering = path.join(tmpHome, '.kiro', 'steering', 'codegraph.md'); expect(result.files.some((f) => f.path === mcp)).toBe(true); - expect(result.files.some((f) => f.path === steering)).toBe(true); + expect(result.files.some((f) => f.path === steering)).toBe(false); + expect(fs.existsSync(steering)).toBe(false); const cfg = JSON.parse(fs.readFileSync(mcp, 'utf-8')); expect(cfg.mcpServers.codegraph).toEqual({ type: 'stdio', command: 'codegraph', args: ['serve', '--mcp'] }); + }); + + it('kiro: install deletes a leftover steering codegraph.md (self-heal) (#529)', () => { + const kiro = getTarget('kiro')!; + const steering = path.join(tmpHome, '.kiro', 'steering', 'codegraph.md'); + fs.mkdirSync(path.dirname(steering), { recursive: true }); + fs.writeFileSync(steering, `${LEGACY_BLOCK}\n`); - const md = fs.readFileSync(steering, 'utf-8'); - expect(md).toContain('codegraph_callers'); - expect(md).toContain('CodeGraph MCP server'); + const result = kiro.install('global', { autoAllow: true }); + expect(fs.existsSync(steering)).toBe(false); + expect(result.files.find((f) => f.path === steering)?.action).toBe('removed'); }); it('kiro: install preserves a pre-existing sibling MCP server in mcp.json', () => { @@ -437,35 +461,37 @@ describe('Installer targets — partial-state idempotency', () => { expect(after.mcpServers.codegraph).toBeUndefined(); }); - it('kiro: uninstall removes the steering codegraph.md file outright', () => { + it('kiro: uninstall removes a leftover steering codegraph.md file outright', () => { const kiro = getTarget('kiro')!; - kiro.install('global', { autoAllow: true }); const steering = path.join(tmpHome, '.kiro', 'steering', 'codegraph.md'); - expect(fs.existsSync(steering)).toBe(true); + fs.mkdirSync(path.dirname(steering), { recursive: true }); + fs.writeFileSync(steering, `${LEGACY_BLOCK}\n`); kiro.uninstall('global'); expect(fs.existsSync(steering)).toBe(false); }); - it('kiro: uninstall leaves a sibling steering file (product.md) untouched', () => { + it('kiro: uninstall removes our steering doc but leaves a sibling (product.md) untouched', () => { const kiro = getTarget('kiro')!; const sibling = path.join(tmpHome, '.kiro', 'steering', 'product.md'); + const ours = path.join(tmpHome, '.kiro', 'steering', 'codegraph.md'); fs.mkdirSync(path.dirname(sibling), { recursive: true }); fs.writeFileSync(sibling, '# Product\n\nMy team practices.\n'); + fs.writeFileSync(ours, `${LEGACY_BLOCK}\n`); - kiro.install('global', { autoAllow: true }); kiro.uninstall('global'); + expect(fs.existsSync(ours)).toBe(false); expect(fs.existsSync(sibling)).toBe(true); expect(fs.readFileSync(sibling, 'utf-8')).toContain('My team practices.'); }); - it('kiro: local install writes ./.kiro/settings/mcp.json and ./.kiro/steering/codegraph.md', () => { + it('kiro: local install writes ./.kiro/settings/mcp.json and no steering doc (#529)', () => { const kiro = getTarget('kiro')!; const result = kiro.install('local', { autoAllow: true }); const paths = result.files.map((f) => f.path.replace(/\\/g, '/')); expect(paths.some((p) => p.endsWith('/.kiro/settings/mcp.json'))).toBe(true); - expect(paths.some((p) => p.endsWith('/.kiro/steering/codegraph.md'))).toBe(true); + expect(paths.some((p) => p.endsWith('/.kiro/steering/codegraph.md'))).toBe(false); }); it('antigravity: install writes to LEGACY ~/.gemini/antigravity/mcp_config.json when no migration marker', () => { @@ -854,6 +880,29 @@ describe('Installer targets — partial-state idempotency', () => { expect(cfg.mcpServers.codegraph).toBeDefined(); }); + it('claude: install does NOT create a CLAUDE.md instructions file (#529)', () => { + const claude = getTarget('claude')!; + const result = claude.install('local', { autoAllow: false }); + const claudeMd = path.join(tmpCwd, '.claude', 'CLAUDE.md'); + expect(fs.existsSync(claudeMd)).toBe(false); + expect(result.files.some((f) => f.path.endsWith('CLAUDE.md'))).toBe(false); + }); + + it('claude: install strips a legacy CLAUDE.md codegraph block, keeping user content (#529)', () => { + const claude = getTarget('claude')!; + const claudeMd = path.join(tmpCwd, '.claude', 'CLAUDE.md'); + fs.mkdirSync(path.dirname(claudeMd), { recursive: true }); + fs.writeFileSync(claudeMd, `# My project rules\n\nUse tabs.\n\n${LEGACY_BLOCK}\n`); + + const result = claude.install('local', { autoAllow: false }); + + const body = fs.readFileSync(claudeMd, 'utf-8'); + expect(body).toContain('# My project rules'); + expect(body).toContain('Use tabs.'); + expect(body).not.toContain('CODEGRAPH_START'); + expect(result.files.find((f) => f.path.endsWith('CLAUDE.md'))?.action).toBe('removed'); + }); + it('claude: global install targets ~/.claude.json (user scope)', () => { const claude = getTarget('claude')!; claude.install('global', { autoAllow: false }); @@ -1282,22 +1331,41 @@ describe('Installer — Cursor rules file cleanup on uninstall', () => { const rulesFile = () => path.join(process.cwd(), '.cursor', 'rules', 'codegraph.mdc'); - it('deletes the dedicated codegraph.mdc entirely (no orphaned frontmatter left behind)', () => { - cursor.install('local', { autoAllow: true }); + // The frontmatter a previous install wrote ahead of the marked block. + // `removeRulesEntry` recognizes it to decide whether the leftover .mdc + // is ours-to-delete or carries user content worth keeping. + const MDC_FRONTMATTER = [ + '---', + 'description: CodeGraph MCP usage guide — when to use which tool', + 'alwaysApply: true', + '---', + '', + ].join('\n'); + + function plantLegacyRulesFile(extra = ''): void { + fs.mkdirSync(path.dirname(rulesFile()), { recursive: true }); + fs.writeFileSync(rulesFile(), MDC_FRONTMATTER + LEGACY_BLOCK + '\n' + extra); + } + + it('uninstall deletes a leftover codegraph.mdc entirely (no orphaned frontmatter left behind)', () => { + plantLegacyRulesFile(); expect(fs.existsSync(rulesFile())).toBe(true); cursor.uninstall('local'); // The whole file — frontmatter included — is gone, not just the block. expect(fs.existsSync(rulesFile())).toBe(false); - expect(cursor.detect('local').alreadyConfigured).toBe(false); }); - it('preserves user content added outside the codegraph markers (strips only our block)', () => { - cursor.install('local', { autoAllow: true }); - const withUserContent = - fs.readFileSync(rulesFile(), 'utf-8') + '\n## My own rule\nkeep me\n'; - fs.writeFileSync(rulesFile(), withUserContent); + it('install self-heals a leftover codegraph.mdc (#529)', () => { + plantLegacyRulesFile(); + const result = cursor.install('local', { autoAllow: true }); + expect(fs.existsSync(rulesFile())).toBe(false); + expect(result.files.some((f) => f.path.endsWith('codegraph.mdc') && f.action === 'removed')).toBe(true); + }); + + it('uninstall preserves user content added outside the codegraph markers (strips only our block)', () => { + plantLegacyRulesFile('## My own rule\nkeep me\n'); cursor.uninstall('local'); diff --git a/__tests__/installer.test.ts b/__tests__/installer.test.ts index 728ed7c35..6f174f62d 100644 --- a/__tests__/installer.test.ts +++ b/__tests__/installer.test.ts @@ -3,7 +3,10 @@ * * Tests for installer config-writer fixes: * - readJsonFile error handling - * - writeClaudeMd section replacement + * + * (The CLAUDE.md instructions block is no longer written — see issue + * #529. The marker-based install/uninstall self-heal is covered in + * `installer-targets.test.ts`.) */ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; @@ -14,11 +17,6 @@ import * as os from 'os'; // We test the exported functions from config-writer import { writeMcpConfig, - writePermissions, - writeClaudeMd, - hasMcpConfig, - hasPermissions, - hasClaudeMdSection, } from '../src/installer/config-writer'; function createTempDir(): string { @@ -103,118 +101,4 @@ describe('Installer Config Writer', () => { expect(content.customField).toBe('preserved'); }); }); - - describe('writeClaudeMd section replacement', () => { - it('should create new CLAUDE.md with markers', () => { - const result = writeClaudeMd('local'); - - expect(result.created).toBe(true); - const content = fs.readFileSync(path.join(tempDir, '.claude', 'CLAUDE.md'), 'utf-8'); - expect(content).toContain(''); - expect(content).toContain(''); - expect(content).toContain('## CodeGraph'); - }); - - it('should replace marked section on update', () => { - // First write - writeClaudeMd('local'); - - // Modify file to add custom content before and after - const claudeMdPath = path.join(tempDir, '.claude', 'CLAUDE.md'); - const original = fs.readFileSync(claudeMdPath, 'utf-8'); - const modified = '## My Custom Section\n\nCustom content\n\n' + original + '\n\n## Another Section\n\nMore content\n'; - fs.writeFileSync(claudeMdPath, modified); - - // Second write should leave the marked block as-is (byte-identical - // body, so result is `created:false, updated:false` — both flags - // are off but the surrounding custom content must survive). - writeClaudeMd('local'); - - const final = fs.readFileSync(claudeMdPath, 'utf-8'); - expect(final).toContain('## My Custom Section'); - expect(final).toContain('Custom content'); - expect(final).toContain('## Another Section'); - expect(final).toContain('More content'); - expect(final).toContain('## CodeGraph'); - }); - - it('should use atomic writes (no temp files left behind)', () => { - writeClaudeMd('local'); - - const claudeDir = path.join(tempDir, '.claude'); - const files = fs.readdirSync(claudeDir); - const tmpFiles = files.filter(f => f.includes('.tmp.')); - expect(tmpFiles).toHaveLength(0); - }); - - it('should not overwrite content after unmarked section with ### subsections', () => { - // Create a CLAUDE.md with an unmarked CodeGraph section that has ### subsections - // followed by another ## section - const claudeDir = path.join(tempDir, '.claude'); - fs.mkdirSync(claudeDir, { recursive: true }); - const claudeMdPath = path.join(claudeDir, 'CLAUDE.md'); - fs.writeFileSync(claudeMdPath, [ - '## Pre-existing Section', - '', - 'Some content', - '', - '## CodeGraph', - '', - '### Subsection A', - '', - 'Old codegraph content', - '', - '### Subsection B', - '', - 'More old content', - '', - '## Important Section After', - '', - 'This content must not be overwritten!', - '', - ].join('\n')); - - const result = writeClaudeMd('local'); - expect(result.updated).toBe(true); - - const final = fs.readFileSync(claudeMdPath, 'utf-8'); - // The section after CodeGraph must be preserved - expect(final).toContain('## Important Section After'); - expect(final).toContain('This content must not be overwritten!'); - // Pre-existing section should also be preserved - expect(final).toContain('## Pre-existing Section'); - // New CodeGraph content should be present with markers - expect(final).toContain(''); - expect(final).toContain(''); - }); - - it('should replace unmarked section without subsections', () => { - const claudeDir = path.join(tempDir, '.claude'); - fs.mkdirSync(claudeDir, { recursive: true }); - const claudeMdPath = path.join(claudeDir, 'CLAUDE.md'); - // Note: regex needs \n before ## CodeGraph, so prefix with another section - fs.writeFileSync(claudeMdPath, [ - '## Intro', - '', - 'Preamble', - '', - '## CodeGraph', - '', - 'Old simple content', - '', - '## Next Section', - '', - 'Must be preserved', - '', - ].join('\n')); - - writeClaudeMd('local'); - - const final = fs.readFileSync(claudeMdPath, 'utf-8'); - expect(final).toContain(''); - expect(final).toContain('## Next Section'); - expect(final).toContain('Must be preserved'); - expect(final).not.toContain('Old simple content'); - }); - }); }); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 86a59b2ab..fb542987e 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -429,15 +429,6 @@ program if (isInitialized(projectPath)) { clack.log.warn(`Already initialized in ${projectPath}`); clack.log.info('Use "codegraph index" to re-index or "codegraph sync" to update'); - // Re-run agent surface wiring so re-running `init` is the - // documented way to recover a project that's missing its - // Cursor rules file (or future per-agent project surfaces). - try { - const { wireProjectSurfacesForGlobalAgents } = await import('../installer'); - for (const { target, file } of wireProjectSurfacesForGlobalAgents()) { - clack.log.success(`${target.displayName}: ${file.action} ${file.path}`); - } - } catch { /* non-fatal */ } try { const { offerWatchFallback } = await import('../installer'); await offerWatchFallback(clack, projectPath); @@ -450,20 +441,6 @@ program const cg = await CodeGraph.init(projectPath, { index: false }); clack.log.success(`Initialized in ${projectPath}`); - // Bootstrap project-local surfaces for any agent that's - // configured globally (Cursor needs ./.cursor/rules/codegraph.mdc - // to actually prefer codegraph over native grep). Silent when - // there's nothing to write. - try { - const { wireProjectSurfacesForGlobalAgents } = await import('../installer'); - for (const { target, file } of wireProjectSurfacesForGlobalAgents()) { - clack.log.success(`${target.displayName}: ${file.action} ${file.path}`); - } - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - clack.log.warn(`Skipped wiring project-local agent surfaces: ${msg}`); - } - if (options.index) { let result: IndexResult; diff --git a/src/installer/claude-md-template.ts b/src/installer/claude-md-template.ts deleted file mode 100644 index f1093b0cc..000000000 --- a/src/installer/claude-md-template.ts +++ /dev/null @@ -1,19 +0,0 @@ -/** - * Backwards-compat re-export shim. - * - * The instructions template moved to `instructions-template.ts` so it - * can be shared across all agent targets (Claude Code, Cursor, Codex - * CLI, opencode). This file is preserved purely so existing imports - * (`@colbymchenry/codegraph` consumers, downstream tooling) keep - * working unchanged. New code should import from - * `./instructions-template` directly. - * - * @deprecated Import from `./instructions-template` instead. - */ - -export { - CODEGRAPH_SECTION_START, - CODEGRAPH_SECTION_END, - CLAUDE_MD_TEMPLATE, - INSTRUCTIONS_TEMPLATE, -} from './instructions-template'; diff --git a/src/installer/config-writer.ts b/src/installer/config-writer.ts index e9c9e93f7..acf5f4cfa 100644 --- a/src/installer/config-writer.ts +++ b/src/installer/config-writer.ts @@ -11,13 +11,11 @@ * abstraction instead. */ -import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; import { writeMcpEntry, writePermissionsEntry, - writeInstructionsEntry, } from './targets/claude'; import { readJsonFile } from './targets/shared'; @@ -25,9 +23,13 @@ export type InstallLocation = 'global' | 'local'; /** * Each shim calls ONLY the named per-file helper — writeMcpConfig - * writes only the MCP JSON, writePermissions only settings.json, - * writeClaudeMd only CLAUDE.md. The full multi-file install lives - * in `claudeTarget.install()` which the new orchestrator uses. + * writes only the MCP JSON, writePermissions only settings.json. The + * full multi-file install lives in `claudeTarget.install()` which the + * new orchestrator uses. + * + * There is no `writeClaudeMd` shim anymore: codegraph stopped writing a + * CLAUDE.md instructions block (issue #529) now that the MCP server's + * `initialize` instructions are the single source of truth. */ export function writeMcpConfig(location: InstallLocation): void { writeMcpEntry(location); @@ -37,14 +39,6 @@ export function writePermissions(location: InstallLocation): void { writePermissionsEntry(location); } -export function writeClaudeMd(location: InstallLocation): { created: boolean; updated: boolean } { - const file = writeInstructionsEntry(location); - return { - created: file.action === 'created', - updated: file.action === 'updated', - }; -} - export function hasMcpConfig(location: InstallLocation): boolean { // local scope lives in ./.mcp.json (project scope); global is the // user-scope ~/.claude.json. Mirrors the Claude target's paths. @@ -64,16 +58,3 @@ export function hasPermissions(location: InstallLocation): boolean { if (!Array.isArray(allow)) return false; return allow.some((p: string) => p.startsWith('mcp__codegraph__')); } - -export function hasClaudeMdSection(location: InstallLocation): boolean { - const file = location === 'global' - ? path.join(os.homedir(), '.claude', 'CLAUDE.md') - : path.join(process.cwd(), '.claude', 'CLAUDE.md'); - try { - if (!fs.existsSync(file)) return false; - const content = fs.readFileSync(file, 'utf-8'); - return content.includes('') || content.includes('## CodeGraph'); - } catch { - return false; - } -} diff --git a/src/installer/index.ts b/src/installer/index.ts index 5893f7b6e..edd48ecaf 100644 --- a/src/installer/index.ts +++ b/src/installer/index.ts @@ -21,7 +21,7 @@ import { getTarget, resolveTargetFlag, } from './targets/registry'; -import type { AgentTarget, Location, TargetId, WriteResult } from './targets/types'; +import type { AgentTarget, Location, TargetId } from './targets/types'; import { getGlyphs } from '../ui/glyphs'; // Import the lightweight submodules directly (not the ../sync barrel, which // re-exports FileWatcher and would transitively pull in ../extraction — the @@ -35,10 +35,8 @@ import { isGitRepo, isSyncHookInstalled, installGitSyncHook } from '../sync/git- export { writeMcpConfig, writePermissions, - writeClaudeMd, hasMcpConfig, hasPermissions, - hasClaudeMdSection, } from './config-writer'; export type { InstallLocation } from './config-writer'; @@ -194,7 +192,9 @@ export async function runInstallerWithOptions(opts: RunInstallerOptions): Promis for (const file of result.files) { const verb = file.action === 'unchanged' ? 'Unchanged' - : file.action === 'created' ? 'Created' : 'Updated'; + : file.action === 'created' ? 'Created' + : file.action === 'removed' ? 'Removed' + : 'Updated'; clack.log.success(`${target.displayName}: ${verb} ${tildify(file.path)}`); } for (const note of result.notes ?? []) { @@ -378,38 +378,6 @@ export async function runUninstaller(opts: RunUninstallerOptions): Promise } } -/** - * For every target that has a global config and exposes - * `wireProjectSurfaces`, write its project-local surfaces (e.g. - * Cursor's `.cursor/rules/codegraph.mdc`). Idempotent — runs - * silently when there's nothing to write. - * - * Called by `codegraph init` so that a user who ran - * `codegraph install` once globally doesn't have to re-run it per - * project to get full agent support. - * - * Returns the list of `(target, file)` pairs that were created or - * updated — caller decides how to surface them. - */ -export function wireProjectSurfacesForGlobalAgents(): Array<{ - target: AgentTarget; - file: WriteResult['files'][number]; -}> { - const written: Array<{ target: AgentTarget; file: WriteResult['files'][number] }> = []; - for (const target of ALL_TARGETS) { - if (typeof target.wireProjectSurfaces !== 'function') continue; - const detection = target.detect('global'); - if (!detection.alreadyConfigured) continue; - const result = target.wireProjectSurfaces(); - for (const file of result.files) { - if (file.action === 'created' || file.action === 'updated') { - written.push({ target, file }); - } - } - } - return written; -} - /** * Replace home-directory prefix in a path with `~/` for cleaner log * lines. Pure cosmetic. diff --git a/src/installer/instructions-template.ts b/src/installer/instructions-template.ts index 134b6397c..e4040927c 100644 --- a/src/installer/instructions-template.ts +++ b/src/installer/instructions-template.ts @@ -1,64 +1,18 @@ /** - * Agent-instructions template — the markdown body each agent target - * writes into its conventional instructions file (CLAUDE.md / - * AGENTS.md / codegraph.mdc / etc.). + * Marker constants for the legacy agent-instructions block. * - * The body content is identical across agents because the codegraph - * usage advice is agent-agnostic — only the destination filename and - * any optional frontmatter (Cursor `.mdc`) varies per target. + * Codegraph used to write a `## CodeGraph` usage guide into each + * agent's instructions file (CLAUDE.md / AGENTS.md / GEMINI.md / + * codegraph.mdc / Kiro steering doc). That duplicated the guidance the + * MCP server already emits in its `initialize` response — every agent + * read the same playbook twice each turn (issue #529). The installer no + * longer writes an instructions file; the MCP server instructions in + * `mcp/server-instructions.ts` are the single source of truth. * - * The legacy `claude-md-template.ts` re-exports these names for - * backwards compatibility with downstream importers. + * These markers are retained so install (self-heal on upgrade) and + * uninstall can find and strip the block a previous install wrote. */ -/** Markers used by the marker-based section replacement. */ +/** Markers used by the marker-based section removal. */ export const CODEGRAPH_SECTION_START = ''; export const CODEGRAPH_SECTION_END = ''; - -/** - * The full marker-delimited block written into each agent's - * instructions file. Includes the start/end markers so the section - * can be detected and replaced on re-install. - */ -export const INSTRUCTIONS_TEMPLATE = `${CODEGRAPH_SECTION_START} -## CodeGraph - -This project has a CodeGraph MCP server (\`codegraph_*\` tools) configured. CodeGraph is a tree-sitter-parsed knowledge graph of every symbol, edge, and file. Reads are sub-millisecond and return structural information grep cannot. - -### When to prefer codegraph over native search - -Use codegraph for **structural** questions — what calls what, what would break, where is X defined, what is X's signature. Use native grep/read only for **literal text** queries (string contents, comments, log messages) or after you already have a specific file open. - -| Question | Tool | -|---|---| -| "Where is X defined?" / "Find symbol named X" | \`codegraph_search\` | -| "What calls function Y?" | \`codegraph_callers\` | -| "What does Y call?" | \`codegraph_callees\` | -| "How does X reach/become Y? / trace the flow from X to Y" | \`codegraph_trace\` (one call = the whole path, incl. callback/React/JSX dynamic hops) | -| "What would break if I changed Z?" | \`codegraph_impact\` | -| "Show me Y's signature / source / docstring" | \`codegraph_node\` | -| "Give me focused context for a task/area" | \`codegraph_context\` | -| "See several related symbols' source at once" | \`codegraph_explore\` | -| "What files exist under path/" | \`codegraph_files\` | -| "Is the index healthy?" | \`codegraph_status\` | - -### Rules of thumb - -- **Answer directly — don't delegate exploration.** For "how does X work" / architecture questions, answer with 2-3 codegraph calls: \`codegraph_context\` first, then ONE \`codegraph_explore\` for the source of the symbols it surfaces. For a specific **flow** ("how does X reach Y") start with \`codegraph_trace\` from→to — one call returns the whole path with dynamic hops bridged — then ONE \`codegraph_explore\` for the bodies; don't rebuild the path with \`codegraph_search\` + \`codegraph_callers\`. Codegraph IS the pre-built index, so spawning a separate file-reading sub-task/agent — or running a grep + read loop — repeats work codegraph already did and costs more for the same answer. -- **Trust codegraph results.** They come from a full AST parse. Do NOT re-verify them with grep — that's slower, less accurate, and wastes context. -- **Don't grep first** when looking up a symbol by name. \`codegraph_search\` is faster and returns kind + location + signature in one call. -- **Don't chain \`codegraph_search\` + \`codegraph_node\`** when you just want context — \`codegraph_context\` is one call. -- **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns several symbols' source grouped in a single capped call, while each separate node/Read call re-reads the whole context and costs far more. -- **Index lag — check the staleness banner, don't guess a wait.** When a codegraph response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Files NOT in that banner are fresh and codegraph is authoritative for them. \`codegraph_status\` also lists pending files under "Pending sync". - -### If \`.codegraph/\` doesn't exist - -The MCP server returns "not initialized." Ask the user: *"I notice this project doesn't have CodeGraph initialized. Want me to run \`codegraph init -i\` to build the index?"* -${CODEGRAPH_SECTION_END}`; - -/** - * Backwards-compat alias. Existing downstream code may import - * `CLAUDE_MD_TEMPLATE` from this module via the re-export shim in - * `claude-md-template.ts`. - */ -export const CLAUDE_MD_TEMPLATE = INSTRUCTIONS_TEMPLATE; diff --git a/src/installer/targets/claude.ts b/src/installer/targets/claude.ts index d5e878824..3259dea1b 100644 --- a/src/installer/targets/claude.ts +++ b/src/installer/targets/claude.ts @@ -28,19 +28,16 @@ import { WriteResult, } from './types'; import { - atomicWriteFileSync, getCodeGraphPermissions, getMcpServerConfig, jsonDeepEqual, readJsonFile, removeMarkedSection, - replaceOrAppendMarkedSection, writeJsonFile, } from './shared'; import { CODEGRAPH_SECTION_END, CODEGRAPH_SECTION_START, - INSTRUCTIONS_TEMPLATE, } from '../instructions-template'; function configDir(loc: Location): string { @@ -123,8 +120,15 @@ class ClaudeCodeTarget implements AgentTarget { const hookCleanup = cleanupLegacyHooks(loc); if (hookCleanup.action === 'removed') files.push(hookCleanup); - // 3. CLAUDE.md instructions - files.push(writeInstructionsEntry(loc)); + // 3. CLAUDE.md instructions — no longer written. The codegraph + // usage guidance now ships solely in the MCP server's `initialize` + // response (see `mcp/server-instructions.ts`), which Claude Code + // surfaces in the system prompt automatically. Writing it into + // CLAUDE.md as well meant the agent read the same playbook twice + // every turn (issue #529). Strip any block a previous install left + // behind so an upgrade self-heals — same idiom as the hook cleanup. + const instrCleanup = removeInstructionsEntry(loc); + if (instrCleanup.action === 'removed') files.push(instrCleanup); return { files }; } @@ -185,10 +189,8 @@ class ClaudeCodeTarget implements AgentTarget { const hookCleanup = cleanupLegacyHooks(loc); if (hookCleanup.action === 'removed') files.push(hookCleanup); - // 3. Instructions - const instr = instructionsPath(loc); - const action = removeMarkedSection(instr, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); - files.push({ path: instr, action }); + // 3. Instructions — strip the legacy CodeGraph block if present. + files.push(removeInstructionsEntry(loc)); return { files }; } @@ -359,48 +361,19 @@ export function writePermissionsEntry(loc: Location): WriteResult['files'][numbe return { path: file, action: created ? 'created' : 'updated' }; } -export function writeInstructionsEntry(loc: Location): WriteResult['files'][number] { +/** + * Strip the marker-delimited CodeGraph block from CLAUDE.md if a prior + * install wrote one. Codegraph no longer maintains an instructions file + * (issue #529) — the MCP server's `initialize` instructions are the + * single source of truth — so both install (self-heal on upgrade) and + * uninstall call this. `removeMarkedSection` returns `not-found`/`kept` + * when there's nothing to strip; the install caller drops those from + * the report so a fresh install stays quiet. + */ +export function removeInstructionsEntry(loc: Location): WriteResult['files'][number] { const file = instructionsPath(loc); - // Ensure config dir exists (for global ~/.claude/). - const dir = path.dirname(file); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - - // Honor the legacy "unmarked ## CodeGraph" rewrite path that the - // original installer supported (some users hand-pasted a section - // before markers existed). Detect first and migrate inline. - if (fs.existsSync(file)) { - const content = fs.readFileSync(file, 'utf-8'); - if (!content.includes(CODEGRAPH_SECTION_START)) { - const headerMatch = content.match(/\n## CodeGraph\n/); - if (headerMatch && headerMatch.index !== undefined) { - const sectionStart = headerMatch.index; - const after = content.substring(sectionStart + 1); - const nextHeader = after.match(/\n## (?!#)/); - const sectionEnd = nextHeader && nextHeader.index !== undefined - ? sectionStart + 1 + nextHeader.index - : content.length; - const merged = - content.substring(0, sectionStart) + - '\n' + INSTRUCTIONS_TEMPLATE + - content.substring(sectionEnd); - atomicWriteFileSync(file, merged); - return { path: file, action: 'updated' }; - } - } - } - - const action = replaceOrAppendMarkedSection( - file, - INSTRUCTIONS_TEMPLATE, - CODEGRAPH_SECTION_START, - CODEGRAPH_SECTION_END, - ); - // Map the four-state action to WriteResult's action vocabulary. - const mapped: 'created' | 'updated' | 'unchanged' = - action === 'created' ? 'created' - : action === 'unchanged' ? 'unchanged' - : 'updated'; - return { path: file, action: mapped }; + const action = removeMarkedSection(file, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); + return { path: file, action }; } export const claudeTarget: AgentTarget = new ClaudeCodeTarget(); diff --git a/src/installer/targets/codex.ts b/src/installer/targets/codex.ts index f3af705cb..ccd9bf64e 100644 --- a/src/installer/targets/codex.ts +++ b/src/installer/targets/codex.ts @@ -28,12 +28,10 @@ import { atomicWriteFileSync, getMcpServerConfig, removeMarkedSection, - replaceOrAppendMarkedSection, } from './shared'; import { CODEGRAPH_SECTION_END, CODEGRAPH_SECTION_START, - INSTRUCTIONS_TEMPLATE, } from '../instructions-template'; import { buildTomlTable, removeTomlTable, upsertTomlTable } from './toml'; @@ -84,7 +82,12 @@ class CodexTarget implements AgentTarget { const files: WriteResult['files'] = []; files.push(writeMcpEntry()); - files.push(writeInstructionsEntry()); + + // AGENTS.md is no longer written — the codegraph usage guidance + // ships in the MCP server's `initialize` response (issue #529). + // Strip a block a previous install left so an upgrade self-heals. + const instrCleanup = removeInstructionsEntry(); + if (instrCleanup.action === 'removed') files.push(instrCleanup); return { files }; } @@ -111,9 +114,7 @@ class CodexTarget implements AgentTarget { files.push({ path: tomlPath, action: 'not-found' }); } - const instr = instructionsPath(); - const instrAction = removeMarkedSection(instr, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); - files.push({ path: instr, action: instrAction }); + files.push(removeInstructionsEntry()); return { files }; } @@ -160,22 +161,15 @@ function writeMcpEntry(): WriteResult['files'][number] { return { path: file, action: created ? 'created' : 'updated' }; } -function writeInstructionsEntry(): WriteResult['files'][number] { +/** + * Strip the marker-delimited CodeGraph block from `~/.codex/AGENTS.md` + * if a prior install wrote one. Used by both install (self-heal on + * upgrade) and uninstall — see issue #529. + */ +function removeInstructionsEntry(): WriteResult['files'][number] { const file = instructionsPath(); - const dir = path.dirname(file); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - - const action = replaceOrAppendMarkedSection( - file, - INSTRUCTIONS_TEMPLATE, - CODEGRAPH_SECTION_START, - CODEGRAPH_SECTION_END, - ); - const mapped: 'created' | 'updated' | 'unchanged' = - action === 'created' ? 'created' - : action === 'unchanged' ? 'unchanged' - : 'updated'; - return { path: file, action: mapped }; + const action = removeMarkedSection(file, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); + return { path: file, action }; } export const codexTarget: AgentTarget = new CodexTarget(); diff --git a/src/installer/targets/cursor.ts b/src/installer/targets/cursor.ts index fb60a0028..c2d3595ca 100644 --- a/src/installer/targets/cursor.ts +++ b/src/installer/targets/cursor.ts @@ -46,13 +46,11 @@ import { getMcpServerConfig, jsonDeepEqual, readJsonFile, - replaceOrAppendMarkedSection, writeJsonFile, } from './shared'; import { CODEGRAPH_SECTION_END, CODEGRAPH_SECTION_START, - INSTRUCTIONS_TEMPLATE, } from '../instructions-template'; function mcpJsonPath(loc: Location): string { @@ -112,8 +110,13 @@ class CursorTarget implements AgentTarget { files.push(writeMcpEntry(loc)); + // We no longer write `.cursor/rules/codegraph.mdc` — the codegraph + // usage guidance ships in the MCP server's `initialize` response, + // the single source of truth (issue #529). Strip a rules file a + // previous install created so an upgrade self-heals. if (loc === 'local') { - files.push(writeRulesEntry()); + const rulesCleanup = removeRulesEntry(); + if (rulesCleanup.action === 'removed') files.push(rulesCleanup); } return { @@ -156,16 +159,6 @@ class CursorTarget implements AgentTarget { ? [mcpJsonPath(loc), rulesPath()] : [mcpJsonPath(loc)]; } - - /** - * Write the project-local `.cursor/rules/codegraph.mdc` file. Used - * by `codegraph init` to bootstrap projects that have only the - * global `~/.cursor/mcp.json` — without the rules file, the Cursor - * agent has no signal to prefer codegraph over native grep. - */ - wireProjectSurfaces(): WriteResult { - return { files: [writeRulesEntry()] }; - } } /** @@ -197,45 +190,9 @@ function writeMcpEntry(loc: Location): WriteResult['files'][number] { return { path: file, action }; } -function writeRulesEntry(): WriteResult['files'][number] { - const file = rulesPath(); - const dir = path.dirname(file); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - - // Body is frontmatter + the shared instructions block. The - // marker-based replacement targets only the marker block, so the - // frontmatter is preserved across re-runs. - const body = MDC_FRONTMATTER + INSTRUCTIONS_TEMPLATE; - - if (!fs.existsSync(file)) { - atomicWriteFileSync(file, body + '\n'); - return { path: file, action: 'created' }; - } - - // For .mdc files we own outright, do byte-equality first. - const existing = fs.readFileSync(file, 'utf-8'); - const wantWithNL = body + '\n'; - if (existing === wantWithNL) { - return { path: file, action: 'unchanged' }; - } - - // Otherwise, marker-based section swap (preserves any user-added - // content outside the markers). - const action = replaceOrAppendMarkedSection( - file, - INSTRUCTIONS_TEMPLATE, - CODEGRAPH_SECTION_START, - CODEGRAPH_SECTION_END, - ); - const mapped: 'created' | 'updated' | 'unchanged' = - action === 'created' ? 'created' - : action === 'unchanged' ? 'unchanged' - : 'updated'; - return { path: file, action: mapped }; -} - /** - * Remove the Cursor rules file on uninstall. + * Remove the Cursor rules file on uninstall (and as a self-heal on + * install — see issue #529). * * Unlike the shared CLAUDE.md / AGENTS.md files (where codegraph owns * only a marker-delimited section), `.cursor/rules/codegraph.mdc` is a diff --git a/src/installer/targets/gemini.ts b/src/installer/targets/gemini.ts index aced5d1c9..b6cc3bdd5 100644 --- a/src/installer/targets/gemini.ts +++ b/src/installer/targets/gemini.ts @@ -37,13 +37,11 @@ import { jsonDeepEqual, readJsonFile, removeMarkedSection, - replaceOrAppendMarkedSection, writeJsonFile, } from './shared'; import { CODEGRAPH_SECTION_END, CODEGRAPH_SECTION_START, - INSTRUCTIONS_TEMPLATE, } from '../instructions-template'; function configDir(loc: Location): string { @@ -85,7 +83,13 @@ class GeminiTarget implements AgentTarget { install(loc: Location, _opts: InstallOptions): WriteResult { const files: WriteResult['files'] = []; files.push(writeMcpEntry(loc)); - files.push(writeInstructionsEntry(loc)); + + // GEMINI.md is no longer written — the codegraph usage guidance + // ships in the MCP server's `initialize` response (issue #529). + // Strip a block a previous install left so an upgrade self-heals. + const instrCleanup = removeInstructionsEntry(loc); + if (instrCleanup.action === 'removed') files.push(instrCleanup); + return { files }; } @@ -108,9 +112,7 @@ class GeminiTarget implements AgentTarget { files.push({ path: file, action: 'not-found' }); } - const instr = instructionsPath(loc); - const action = removeMarkedSection(instr, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); - files.push({ path: instr, action }); + files.push(removeInstructionsEntry(loc)); return { files }; } @@ -146,22 +148,15 @@ function writeMcpEntry(loc: Location): WriteResult['files'][number] { return { path: file, action }; } -function writeInstructionsEntry(loc: Location): WriteResult['files'][number] { +/** + * Strip the marker-delimited CodeGraph block from GEMINI.md if a prior + * install wrote one. Used by both install (self-heal on upgrade) and + * uninstall — see issue #529. + */ +function removeInstructionsEntry(loc: Location): WriteResult['files'][number] { const file = instructionsPath(loc); - const dir = path.dirname(file); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - - const action = replaceOrAppendMarkedSection( - file, - INSTRUCTIONS_TEMPLATE, - CODEGRAPH_SECTION_START, - CODEGRAPH_SECTION_END, - ); - const mapped: 'created' | 'updated' | 'unchanged' = - action === 'created' ? 'created' - : action === 'unchanged' ? 'unchanged' - : 'updated'; - return { path: file, action: mapped }; + const action = removeMarkedSection(file, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); + return { path: file, action }; } export const geminiTarget: AgentTarget = new GeminiTarget(); diff --git a/src/installer/targets/kiro.ts b/src/installer/targets/kiro.ts index b92c39142..5658dd0ee 100644 --- a/src/installer/targets/kiro.ts +++ b/src/installer/targets/kiro.ts @@ -34,13 +34,11 @@ import { WriteResult, } from './types'; import { - atomicWriteFileSync, getMcpServerConfig, jsonDeepEqual, readJsonFile, writeJsonFile, } from './shared'; -import { INSTRUCTIONS_TEMPLATE } from '../instructions-template'; function configDir(loc: Location): string { return loc === 'global' @@ -76,7 +74,14 @@ class KiroTarget implements AgentTarget { install(loc: Location, _opts: InstallOptions): WriteResult { const files: WriteResult['files'] = []; files.push(writeMcpEntry(loc)); - files.push(writeSteeringEntry(loc)); + + // The steering doc is no longer written — the codegraph usage + // guidance ships in the MCP server's `initialize` response (issue + // #529). Delete a `codegraph.md` a previous install created so an + // upgrade self-heals. + const steeringCleanup = removeSteeringEntry(loc); + if (steeringCleanup.action === 'removed') files.push(steeringCleanup); + return { files, // The IDE-only enable-MCP step is load-bearing: Kiro IDE ships @@ -143,37 +148,12 @@ function writeMcpEntry(loc: Location): WriteResult['files'][number] { return { path: file, action }; } -/** - * Write the dedicated steering file. Unlike CLAUDE.md / GEMINI.md - * (shared files where codegraph owns a marker-delimited section), - * Kiro's steering dir loads every `*.md` as a discrete document — so - * `codegraph.md` is ours outright. Byte-equality short-circuits - * idempotent re-runs; mismatched content gets a clean rewrite. - */ -function writeSteeringEntry(loc: Location): WriteResult['files'][number] { - const file = steeringPath(loc); - const dir = path.dirname(file); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - - const body = INSTRUCTIONS_TEMPLATE + '\n'; - - if (!fs.existsSync(file)) { - atomicWriteFileSync(file, body); - return { path: file, action: 'created' }; - } - const existing = fs.readFileSync(file, 'utf-8'); - if (existing === body) { - return { path: file, action: 'unchanged' }; - } - atomicWriteFileSync(file, body); - return { path: file, action: 'updated' }; -} - /** * Delete the steering file we own. If a user has hand-edited the file * out of recognition we still remove it — codegraph.md is a name we * claim, and a partial install leaving the file behind is worse than - * a clean delete. + * a clean delete. Used by both install (self-heal on upgrade — see + * issue #529) and uninstall. */ function removeSteeringEntry(loc: Location): WriteResult['files'][number] { const file = steeringPath(loc); diff --git a/src/installer/targets/opencode.ts b/src/installer/targets/opencode.ts index bb3388bf0..5ec97436d 100644 --- a/src/installer/targets/opencode.ts +++ b/src/installer/targets/opencode.ts @@ -41,12 +41,10 @@ import { atomicWriteFileSync, jsonDeepEqual, removeMarkedSection, - replaceOrAppendMarkedSection, } from './shared'; import { CODEGRAPH_SECTION_END, CODEGRAPH_SECTION_START, - INSTRUCTIONS_TEMPLATE, } from '../instructions-template'; function globalConfigDir(): string { @@ -128,7 +126,13 @@ class OpencodeTarget implements AgentTarget { install(loc: Location, _opts: InstallOptions): WriteResult { const files: WriteResult['files'] = []; files.push(writeMcpEntry(loc)); - files.push(writeInstructionsEntry(loc)); + + // AGENTS.md is no longer written — the codegraph usage guidance + // ships in the MCP server's `initialize` response (issue #529). + // Strip a block a previous install left so an upgrade self-heals. + const instrCleanup = removeInstructionsEntry(loc); + if (instrCleanup.action === 'removed') files.push(instrCleanup); + return { files }; } @@ -163,9 +167,7 @@ class OpencodeTarget implements AgentTarget { } } - const instr = instructionsPath(loc); - const instrAction = removeMarkedSection(instr, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); - files.push({ path: instr, action: instrAction }); + files.push(removeInstructionsEntry(loc)); return { files }; } @@ -223,22 +225,15 @@ function writeMcpEntry(loc: Location): WriteResult['files'][number] { return { path: file, action: existed ? 'updated' : 'created' }; } -function writeInstructionsEntry(loc: Location): WriteResult['files'][number] { +/** + * Strip the marker-delimited CodeGraph block from AGENTS.md if a prior + * install wrote one. Used by both install (self-heal on upgrade) and + * uninstall — see issue #529. + */ +function removeInstructionsEntry(loc: Location): WriteResult['files'][number] { const file = instructionsPath(loc); - const dir = path.dirname(file); - if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - - const action = replaceOrAppendMarkedSection( - file, - INSTRUCTIONS_TEMPLATE, - CODEGRAPH_SECTION_START, - CODEGRAPH_SECTION_END, - ); - const mapped: 'created' | 'updated' | 'unchanged' = - action === 'created' ? 'created' - : action === 'unchanged' ? 'unchanged' - : 'updated'; - return { path: file, action: mapped }; + const action = removeMarkedSection(file, CODEGRAPH_SECTION_START, CODEGRAPH_SECTION_END); + return { path: file, action }; } export const opencodeTarget: AgentTarget = new OpencodeTarget(); diff --git a/src/installer/targets/types.ts b/src/installer/targets/types.ts index 0ded6ce09..4b3267e97 100644 --- a/src/installer/targets/types.ts +++ b/src/installer/targets/types.ts @@ -103,19 +103,4 @@ export interface AgentTarget { printConfig(loc: Location): string; /** Filesystem paths this target would write to at this location. */ describePaths(loc: Location): string[]; - /** - * Optional. Write any project-local surfaces this target needs in - * order to work fully when its MCP config is configured globally. - * Called by `codegraph init` to bootstrap new projects without - * forcing the user to re-run `codegraph install` per project. - * - * Most targets need nothing here — their global config is complete. - * Cursor is the notable exception: its rules system - * (`.cursor/rules/*.mdc`) is project-scoped only, and is what makes - * Cursor's agent prefer codegraph over its built-in grep. - * - * Must be idempotent. Targets that have nothing project-local omit - * the method entirely. - */ - wireProjectSurfaces?(): WriteResult; } diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts index 2c4eca07c..db9949a74 100644 --- a/src/mcp/server-instructions.ts +++ b/src/mcp/server-instructions.ts @@ -56,6 +56,7 @@ of calls; a grep/read exploration is dozens. ## Anti-patterns +- **Trust codegraph's results — don't re-verify them with grep.** They come from a full AST parse; re-checking with grep is slower, less accurate, and wastes context. - **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature. - **Don't chain \`codegraph_search\` + \`codegraph_node\`** when you just want context — \`codegraph_context\` is one round-trip. - **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns them all grouped by file, while each separate call re-reads the whole context and costs far more. Use \`codegraph_node\` for a single symbol. @@ -63,6 +64,7 @@ of calls; a grep/read exploration is dozens. ## Limitations +- If a tool reports the project isn't initialized, \`.codegraph/\` doesn't exist yet — offer to run \`codegraph init -i\` to build the index. - Index lags file writes by ~1 second. - Cross-file resolution is best-effort name matching; ambiguous calls may return multiple candidates. - No live correctness validation — that's still the TypeScript compiler / test suite / linter's job. Codegraph supplements those with structural context they don't have. From 15dbcdbac0831919f45b03f8d73acbe4e4605732 Mon Sep 17 00:00:00 2001 From: Colby McHenry Date: Thu, 28 May 2026 15:24:38 -0500 Subject: [PATCH 07/10] chore(release): bump version to 0.9.7 Co-Authored-By: Claude Opus 4.8 (1M context) --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index e8c9d0568..e418a36e5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.6", + "version": "0.9.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@colbymchenry/codegraph", - "version": "0.9.6", + "version": "0.9.7", "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", diff --git a/package.json b/package.json index a02891415..913433ff0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.6", + "version": "0.9.7", "description": "Supercharge Claude Code with semantic code intelligence. 94% fewer tool calls • 77% faster exploration • 100% local.", "main": "dist/index.js", "types": "dist/index.d.ts", From f29825c090f37ee629cade6d9cae9821461dcd14 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 28 May 2026 20:26:55 +0000 Subject: [PATCH 08/10] docs(changelog): promote [Unreleased] into [0.9.7] [skip ci] Auto-generated by Release workflow. --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9535c9a96..bb63c66b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + +## [0.9.7] - 2026-05-28 + ### Added - **Generated-file down-ranking across search, trace, and explore.** A new filename-based classifier (`src/extraction/generated-detection.ts`) flags @@ -1121,3 +1124,4 @@ Thank you. [0.7.6]: https://github.com/colbymchenry/codegraph/releases/tag/v0.7.6 [0.9.6]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.6 +[0.9.7]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.7 From 2e19234578b304b86b0fcde78c9a7c71dba3667f Mon Sep 17 00:00:00 2001 From: Colby McHenry Date: Thu, 28 May 2026 15:48:23 -0500 Subject: [PATCH 09/10] docs(changelog): rewrite all release notes into friendly New Features / Fixes format Distill every release's engineer-facing entry into plain-language, user-readable notes (New Features / Fixes, with Breaking Changes / Security surfaced where they apply). Also reconcile the mislabeled [0.7.8] block to [0.7.9] to match the published GitHub release tag and fix its dead link reference. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 1251 ++++++++------------------------------------------ 1 file changed, 179 insertions(+), 1072 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb63c66b2..d484acda7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,1119 +9,226 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] - ## [0.9.7] - 2026-05-28 -### Added -- **Generated-file down-ranking across search, trace, and explore.** A new - filename-based classifier (`src/extraction/generated-detection.ts`) flags - protobuf / gRPC / mockgen / build-output files (`.pb.go`, `.pulsar.go`, - `_grpc.pb.go`, `_mock.go`, `_mocks.go`, `mock_*.go`, `.generated.[jt]sx`, - `_pb2(_grpc)?.py`, `.pb.{cc,h}`, `.g.dart`, `.freezed.dart`) and pushes them - LAST in disambiguation. Before this, a `codegraph_search "Send"` on - cosmos-sdk returned the gRPC interface stub at `tx_grpc.pb.go:124` as the - first match — the trace landed on that empty stub, reported "no path", and - the agent fell back to Read. With the down-rank applied to `findSymbol`, - `findAllSymbols`, `codegraph_search`, the CLI `query` command, AND the - context Entry Points / Related Symbols / Code blocks, the bank keeper's - `msgServer.Send` (the real implementation) ranks #3 instead of #9 and - trace lands on it directly. Pure path-based classifier — no schema change, - no index migration. -- **gRPC interface→implementation bridge for Go.** New synthesizer - `goGrpcStubImplEdges` in `src/resolution/callback-synthesizer.ts` finds - `UnimplementedXxxServer` structs in `.pb.go` / `_grpc.pb.go` files, - identifies their RPC-method signatures (excluding the `mustEmbed*` / - `testEmbeddedByValue` gRPC markers), and links each stub method to the - hand-written impl method on any struct whose method-name set is a - superset. Closes Go's structural-typing gap that the Java/Kotlin-only - `interfaceOverrideEdges` couldn't bridge. Excludes other generated files - from candidate impls so a sibling `msgClient` in the same `.pb.go` doesn't - get falsely paired. Measured on cosmos-sdk: 467 stub→impl `calls` edges - synthesized, bank's `UnimplementedMsgServer::Send` now points only to - `x/bank/keeper/msg_server.go::msgServer::Send` — not to mocks, not to - client wrappers. -- **Trace-failure response now inlines both endpoints' bodies + neighbors.** - When `codegraph_trace` can't find a static call path (typically a - dynamic-dispatch break), it used to return a one-liner telling the agent - to call `codegraph_node` next — which triggered 3-4 follow-up calls plus a - Read. The new failure response inlines each endpoint's source (capped at - 120 lines / 3600 chars), callers, and callees in one response. On the - cosmos-Q3 / etcd-Q2 audits this eliminated the entire fan-out pattern - (5-11 codegraph calls collapsed into 1-2). -- **Path-proximity pairing in trace endpoint selection.** In a multi-module - Go repo, a symbol like `EndBlocker` exists in 20+ modules; FTS picks one - almost arbitrarily. Trace now scores every `from` × `to` candidate pair by - shared directory prefix length (longest match wins) so - `x/gov/abci.go::EndBlocker` + `x/gov/keeper/tally.go::Tally` are paired - before `simapp/app.go`'s wrapper EndBlocker is even considered. A - less-canonical-path penalty (`enterprise/`, `contrib/`, `examples/`, - `vendor/`, `third_party/`, `deprecated/`, `legacy/`) ensures a side-module - with a longer shared prefix doesn't beat the canonical module with a - shorter one. FindPath probe budget capped at 20 pairs. -- **Test-file deprioritization in `codegraph_explore`.** Existing - `isLowValue` only caught directory-style patterns (`/tests/`, `/spec/`); - now also catches Go's `_test.go`, Ruby's `_spec.rb`, JS/TS `.test.ts` / - `.spec.tsx`, and Java/Kotlin/Scala `*Test.java` / `*Spec.kt`. Without - this, etcd's `watchable_store_test.go` consumed 5K chars of explore - budget that should have gone to the hand-written flow source. -- **Small-repo retrieval tuning (`<500` indexed files).** Three coordinated - changes so small projects resolve flow questions in 1-2 MCP calls instead - of 3-5. (i) MCP tool surface drops to the 5 core tools - (`codegraph_search` / `codegraph_context` / `codegraph_node` / - `codegraph_explore` / `codegraph_trace`); the other 5 (`codegraph_callers` - /`codegraph_callees`/`codegraph_impact`/`codegraph_status`/`codegraph_files`) - cost more in tool-list overhead than they recoup at this scale. - Empirically validated as the floor — n=2 audits showed cutting below - 5 regresses cobra/ky/sinatra (3-tool gate) and catastrophically regresses - express (1-tool gate, +107% LOSS). (ii) `codegraph_context` responses end - with a strong directive telling the agent the response IS the - comprehensive pass for a project this size and follow-ups should be - narrow (`trace from→to`, single-symbol `node`) — not another broad - `codegraph_explore` that re-bundles the same content. (iii) Explore - output budget gets a sub-150 tier (13K total / 4 files / 3.8K each, - Relationships section dropped, test/spec/icon/i18n files hard-excluded - from the relevant-file set unless the query is about tests), and - `codegraph_context` `maxNodes` defaults to 8 instead of 20. -- **`codegraph_context` auto-traces flow queries.** When the task reads - like "how does X reach Y", "trace the path from A to B", or "how does - X propagate through Z", `codegraph_context` now runs the trace - internally and splices its body into the response. Detection is - conservative — needs a flow keyword AND ≥2 distinct PascalCase / - camelCase identifiers, with the first two ordered by appearance taken - as `from`/`to`. On dynamic-dispatch breaks it falls back to the - trace-failure response (which already inlines both endpoint bodies + - neighbors). Saves the follow-up `codegraph_trace` that was the #2 - cost driver on multi-module flow questions in the audit. -- **Routing-manifest inline in `codegraph_context` for small-repo - routing queries.** When the task mentions - routes/handlers/endpoints/middleware/etc. on a sub-500-file project, - `codegraph_context` now appends a compact URL → handler table built - from `route` nodes + their `references`/`calls` edges, then inlines - the full source (≤16KB) of the file holding the most handler - endpoints. Targets the Glob+Read pattern that was beating codegraph - on realworld template repos (rails-realworld, laravel-realworld, - drupal-admintoolbar, …) where the agent would just read `routes.rb` / - `web.php` instead of asking the graph. Manifest is silently skipped - when fewer than 3 non-test routes exist or no file holds ≥30% of - them (no single answer file). -- **Core-directory ranking boost in `codegraph_context` search.** - Projects with one file holding the dense majority of internal call - edges (e.g. sinatra's `lib/sinatra/base.rb` at ~85% of all in-file - edges) now get search results in that file's directory boosted by - +25 score. Fixes the case where a small extension file with a - verbatim name match outranks the actual framework core - (sinatra-contrib's `multi_route.rb` `route` was outranking - base.rb's `route!`). Test and generated files are excluded from - "dominant file" candidacy so etcd's `rpc.pb.go` (1916 in-file - edges, generated protobuf) can't beat the hand-written - `server/etcdserver/server.go` (470 edges). -- **Interface → implementation synthesis extended beyond JVM.** - `interfaceOverrideEdges` previously bridged interface methods to - concrete impls in Java/Kotlin only. Now also runs for C#, TypeScript, - JavaScript, Swift, and Scala — Swift conformance also iterates - `struct` nodes (value-type protocol conformance) alongside `class`. - Closes the same structural-typing gap the new Go gRPC bridge closes, - for any language where the resolver emits explicit - `implements`/`extends` edges. -- **Shorter MCP tool descriptions.** All 10 `codegraph_*` tool - descriptions condensed (typically ~50% shorter), keeping the - "use this for X / prefer over Y" steering but dropping the longer - rationale (which lives in `server-instructions.ts`, the - load-bearing channel). Tool-list bytes on the agent side drop - proportionally; cumulative across multi-tool sessions. -- **Java / Kotlin imports now resolve by fully-qualified name.** Extraction - wraps every top-level declaration of a `.kt` / `.java` file in a `namespace` - node carrying the file's `package` (so a class `Bar` in - `package com.example.foo` is indexed with qualifiedName - `com.example.foo::Bar`), and `import com.example.foo.Bar` looks the target - up through that index — regardless of whether the class lives in `Bar.kt`, - `Models.kt`, or a top-level function. Disambiguates same-name classes - across packages (the central failure mode of the previous name-matcher - fallback in multi-module Spring / Android codebases), works across the - Java↔Kotlin interop boundary, and lays groundwork for binding-precise - Dagger2 / Hilt resolution. Wildcard imports (`com.example.*`) still go - through name-matcher. -- **Java / C# anonymous classes (`new T() { ... }`) are now extracted as - first-class class nodes with their overrides.** Previously, an anonymous - subclass returned from a factory or lambda — `return new BaseIter() { - @Override int separatorStart(int s) { ... } };` — produced only an - `instantiates` edge: the override methods were invisible to the graph and - Phase 5.5 interface-impl synthesis had no class to bridge. The anon class - now lands as `` with an `extends` reference to the - named base/interface, scoped under the enclosing method, and its - `method_declaration` members become normal method nodes. The interface→impl - synthesizer then bridges the base's abstract methods to the anonymous - overrides automatically. Concrete effect on `google/guava` (3,227 .java - files): 3,608 anonymous classes extracted, +2,534 interface-impl edges - reach overrides hidden in `new T() { ... }` blocks (including lambda - bodies). An agent investigating `Splitter.SplittingIterator.separatorStart` - now sees the four anonymous overrides in its trail without a Read. - -### Changed -- **The installer no longer writes a `## CodeGraph` instructions block into - your agent's instructions file** (`CLAUDE.md`, `AGENTS.md`, `GEMINI.md`, - Cursor's `.cursor/rules/codegraph.mdc`, or Kiro's steering doc). That block - duplicated, almost verbatim, the usage guidance the MCP server already - emits in its `initialize` response — so every agent that surfaces MCP - instructions (Claude Code does) read the same playbook twice each turn - (#529). The MCP server instructions are now the single source of truth. - `codegraph install` stops writing the block, and **the next time you run - `codegraph install` (or `codegraph uninstall`) it strips a block a previous - version wrote**, preserving everything else in the file (and deleting Cursor - `.mdc` / Kiro steering files that were ours outright). Note: simply upgrading - the npm package does not remove an existing block — re-run the installer to - clean it up. The leftover block is harmless meanwhile (just redundant with - the MCP instructions). If you'd added your own notes inside the - ``/`` markers, move them outside - the markers first — only the marked block is removed. - -### Fixed -- **MCP tools no longer return rows for files deleted while no server was - running.** The post-open catch-up sync that reconciles the index against - the working tree (catching `git pull`/`checkout`/`rebase` and any edits - or deletes made between sessions) was fire-and-forget — so a tool call - that landed in the first ~50–300ms could race past it and serve rows - for files that no longer exist on disk. The per-file staleness banner - couldn't help here, because that signal is populated by the file - watcher (which doesn't see pre-startup changes). Now the first tool - call of the session awaits the catch-up before serving; subsequent - calls pay nothing. Most visible on the "deleted everything between - sessions" case, where MCP now returns the correct empty index instead - of stale rows. Validated end-to-end on a 10,640-file VS Code index. -- **Windows: black console windows no longer flash on every file save / MCP - reconnect (#485, #510, #530).** v0.9.5 moved the MCP server to a detached - shared daemon (#411). Detached processes have no inherited console on - Windows, so any console-subsystem child they spawn (the daemon's `git` - invocations during auto-sync, the WASM-runtime `node` re-exec, the - installer's `npm` shell-out) is created with a fresh console window - visible to the user unless the spawn passes `windowsHide: true` (which - libuv translates to `STARTF_USESHOWWINDOW | SW_HIDE`, so the window is - created hidden and never flashes). All ten `spawnSync` / `execFileSync` / - `execSync` call sites across extraction, sync, installer, and the - WASM-flags relaunch now pass `windowsHide: true`. macOS/Linux ignore the - option, so this is a no-op elsewhere. The daemon launcher itself - (`src/mcp/index.ts`) already passed the flag — these children had been - missed. -- **`codegraph index` / `init -i` summary now reports the true edge count.** - The per-file counter in the orchestrator only saw extraction-phase edges, - so resolution and synthesizer edges (often >50% of the graph on - cross-file-heavy repos like Spring multi-module Java) were missing from - the `X nodes, Y edges` line. Snapshotting the DB before/after the full - pipeline now reports the actual additions. Example: indexing - `macrozheng/mall` previously reported `20 047 edges` while the DB held - `45 629`. +### New Features + +- Go: gRPC interface stubs now connect to their hand-written implementation, so callers, callees, impact, and trace land on the real method instead of an empty generated stub. +- Generated files (protobuf, gRPC stubs, mocks, build output) now rank last in search, trace, and explore, so results land on your real implementation instead of an auto-generated placeholder. +- When `codegraph_trace` can't find a static path (a dynamic-dispatch break), it now inlines both endpoints' source, callers, and callees in one response, so the agent gets the full picture without a flurry of follow-up calls. +- Trace now picks the right endpoints in large multi-module repos by preferring symbols that share a directory, instead of grabbing an arbitrary same-named symbol from an unrelated module. +- Test files are now deprioritized in `codegraph_explore` (Go, Ruby, JS/TS, Java/Kotlin/Scala), so the explore budget goes to your real implementation source. +- Small projects (under ~500 files) now resolve flow questions in fewer MCP calls, with a leaner tool surface and tuned context and explore output sized for the project. +- `codegraph_context` now auto-traces flow questions like "how does X reach Y" or "trace the path from A to B", splicing the trace into the response so you don't need a separate `codegraph_trace` call. +- `codegraph_context` now inlines a URL-to-handler routing table and the source of your main routes file for routing questions on small projects, so you don't have to go read `routes.rb` or `web.php` yourself. +- `codegraph_context` search now boosts results in the directory of a project's core framework file, so a small same-named extension file no longer outranks the actual framework core. +- Interface-to-implementation linking now works for C#, TypeScript, JavaScript, Swift, and Scala (previously Java/Kotlin only), so investigating an interface method surfaces its concrete implementations. +- MCP tool descriptions are now shorter, trimming per-session overhead while keeping the steering guidance. +- Java and Kotlin imports now resolve by fully-qualified name, so same-name classes in different packages are told apart correctly in multi-module Spring and Android codebases, including across the Java/Kotlin interop boundary. +- Java and C# anonymous classes (`new T() { ... }`) and their overridden methods are now indexed as real class nodes, so an agent sees those hidden overrides in its trail without a Read. +- The installer no longer writes a duplicate `## CodeGraph` instructions block into your agent's instructions file (`CLAUDE.md`, `AGENTS.md`, `GEMINI.md`, Cursor's `.cursor/rules/codegraph.mdc`, or Kiro's steering doc) — the MCP server is now the single source of truth, and re-running `codegraph install` or `codegraph uninstall` strips a block a previous version left behind (#529). If you added your own notes inside the `CODEGRAPH_START`/`CODEGRAPH_END` markers, move them outside the markers first, since the whole marked block is removed. + +### Fixes + +- MCP tools no longer return results for files that were deleted while no server was running — the first query of a session now waits for the catch-up sync, so you get the correct index instead of stale rows. +- Windows: black console windows no longer flash on every file save or MCP reconnect (#485, #510, #530). +- `codegraph index` and `init -i` now report the true edge count in their summary, instead of undercounting by missing resolution and synthesizer edges. ## [0.9.6] - 2026-05-27 -- **C/C++ `#include` resolution — bare-basename includes now connect to the - actual header file, not a phantom import node (#453).** Path-prefixed - includes (`#include "common/args.h"`) already resolved via file-path - suffix matching, but bare-basename includes (`#include "uint256.h"` from a - caller in another directory) used to leave only a phantom edge to a - floating `import` node owned by the including file. The resolver now walks - C/C++ include search directories — pulled from `compile_commands.json` - (`-I`/`-isystem` flags) when present, otherwise discovered by probing - conventional dirs (`include/`, `src/`, `lib/`, `api/`, `inc/`) plus any - top-level directory containing `.h`/`.hpp` files — and resolves the - include to a real file node, producing a true file→file `imports` edge. - System headers (``, ``, ``, ~80 C and ~80 C++ - stdlib names) are filtered before the scan so they don't false-resolve - via heuristic dir matching. C/C++ built-in symbols (`std::*` unconditionally, - plus `printf`/`malloc`/`cout`/`make_shared`/etc. when **no user-defined - symbol with that name exists**) are filtered from name-matching too — - C/C++ projects routinely shadow stdlib names (custom allocators, stream - wrappers, logging libs), so the filter only fires when there's no real - definition to bind to. Measured on bitcoin-core (1,989 indexed files): - C/C++ file→file `imports` edges 6,027 → 8,086 (**+34%**), false-positive - call edges from `std::move`/`std::swap` etc. into similarly-named user - methods −2,154 (**−3.6%** of C/C++ `calls`). -- **Enterprise Spring / MyBatis flow now traces end-to-end (#389).** Three gaps that previously forced agents back to grep on large Spring/MyBatis codebases are closed: - - **MyBatis XML mapper indexing + Java↔XML bridge.** `*.xml` files containing `` are now first-class: each `` and `` becomes a method-shaped node qualified as `::`, and a new synthesizer (`mybatis-java-xml`) links the matching Java mapper interface method → its XML statement with a `calls` edge. `` to a `` fragment in the same mapper also resolves. Non-mapper XML (`pom.xml`, `web.xml`, `log4j.xml`, etc.) emits just a file node — no symbol noise. Validated on macrozheng/mall-tiny: all 6 custom-SQL Java mapper methods reach their XML counterparts; `trace(UmsRoleController.listResource, UmsResourceMapper::getResourceListByRoleId-xml)` connects in 4 hops across controller → service-iface → impl → mapper-iface → XML. - - **Spring `@Value`/`@ConfigurationProperties` config-key linkage.** `application.{yml,yaml,properties}` (+ profile variants `application-dev.yml`, `bootstrap.yml`, etc.) is parsed during indexing, with one `constant` node per leaf key qualified by its dotted path (`app.cache.name.user-token`). `@Value("${app.cache.name.user-token}")` and `@ConfigurationProperties(prefix = "app.cache")` references in Java/Kotlin emit binding nodes that resolve to the matching key (or, for `@ConfigurationProperties`, a key under the prefix). Spring's **relaxed binding** applies (kebab `cache-list` ↔ camel `cacheList` ↔ snake `cache_list` ↔ `CACHE_LIST`), so a Java `@Value("${app.retryCount}")` finds `app.retry-count` in `application.properties`. `${key:default}` form is supported; the default is stripped before lookup. - - **Field-injected concrete-bean trace.** A Spring controller's `@Resource(name="userBO") private UserBO userbo;` followed by `this.userbo.toLogin2(...)` now resolves through to `UserBO.toLogin2` even when the field type is a concrete class whose name doesn't match the field by Java naming convention (`userbo` → `UserBO`). The fix is two layered changes in the language layer (Java only): (a) the call extractor unwraps `this.` receivers (previously surfaced as `this.userbo.toLogin2` and dropped through every name-matcher strategy); (b) the resolver looks up the receiver name in the enclosing class's field declarations and uses the declared type to resolve the method. This generalizes beyond Spring — any Java code using `this.field.method()` now resolves correctly. - -### Fixed -- **Java/Kotlin imports now disambiguate same-name classes across modules (#314).** A Maven multi-module project where `dao/converter/FooConverter` and `service/converter/FooConverter` both expose a `convert` method used to resolve via file-path proximity — picking whichever class was closer to the caller, which is wrong any time the caller lives in an equidistant cross-cutting module. The import resolver had no Java branch at all (`extractImportMappings` returned `[]` for `.java`/`.kt`), so the FQN signal Java imports carry — `import com.example.dao.converter.FooConverter;` — was being thrown away. New `extractJavaImports` parses regular and `import static` directives. `resolveViaImport` now has a Java/Kotlin cross-file branch that converts the imported FQN to a file-path suffix (`com/example/dao/converter/FooConverter.java`) and resolves the symbol against the file whose path matches. For the `@Autowired private FooConverter fooConverter; fooConverter.convert(...)` field-receiver pattern (Spring's typical shape), `matchMethodCall` now passes the imported FQN to `resolveMethodOnType` so when multiple `FooConverter::convert` candidates exist, the import — not iteration order — picks the right one. Validated end-to-end on a synthetic two-module repro: swapping only the `import` line on the caller (with identical field declaration and call site) switches the resolved target between dao and service correctly. On spring-petclinic, +15 newly import-resolved Java edges with no regression in `calls`/`imports`/`extends`. -- **TypeScript `type` aliases with object shapes no longer cause cross-module false-positive call edges (#359).** Receiver-typed `handle.stop()` where `handle: RecorderHandle` and `RecorderHandle = { stop: () => Promise }` used to attach the call edge to an unrelated `class Foo { stop() {} }` in a sibling directory via path-proximity matching, because the type alias had no `stop` node — only the look-alike class did. The fix surfaces type-alias object-shape members (and intersection-type members) as first-class `property`/`method` nodes under the alias: `type X = { foo: T; bar(): T }` now produces `X::foo` and `X::bar` in the graph. Function-typed properties (`stop: () => Promise`) are emitted as `method` kind so `obj.stop()` resolves to them; non-function properties remain `property` kind. With the alias's members in the graph, the existing camelCase receiver-name word overlap (`recorder` ↔ `RecorderHandle`) routes the call to the correct alias member instead of the wrong class. Anonymous nested object types inside generic arguments (`Promise<{ ok: true }>`) intentionally don't produce phantom members — only immediate `object_type` / `intersection_type` operands of the alias value are walked. Measured on excalidraw/excalidraw (314 .ts files): **+776 new property nodes** + **+1,008 method nodes from type-alias members** + **+226 newly accurate `calls` edges** pointing at alias members (some shifted from incorrect class targets, some previously unresolved). -- **C# now produces `references` edges for parameter, return, property, and field types (#381).** Indexing any C# project used to yield **zero** `references` edges, so `codegraph_callers SomeDto` returned no results even when the DTO was used as a parameter or return type across the codebase, and `codegraph_callees` on a service class only saw its `using` imports. Two root causes: `csharp.ts` was missing `returnField`, and the type-leaf walker only matched `type_identifier` nodes — but C# tree-sitter emits `identifier`/`predefined_type`/`qualified_name`/`generic_name` instead. The fix adds the missing extractor field, routes C# through a dedicated type walker that only descends into known type-position fields (so parameter NAMES like `request` in `Build(UserDto request)` never mis-emit as type refs), and hooks `extractField`/`extractProperty` to invoke the walker. Measured on dotnet/eShop (527 `.cs` files): C# `references` edges go from **35 → 925** (+26x), with no regression in `calls`/`imports`/`instantiates`/`extends`/`implements`. -- **Go cross-package qualified calls (`pkga.FuncX(...)`) now resolve to the right package (#388).** On a Go monorepo with a layered package layout (handler/service/domain/dao), `codegraph_callers`, `_callees`, `_impact`, and `_trace` used to return ~0-1 results where grep finds hundreds to thousands of real call sites — the central value proposition of CodeGraph silently degraded on entire Go codebases. Root cause: the import resolver flagged every Go import path without `/internal/` as third-party (because it had no idea what the project's own module path was), so cross-package calls fell through to name-matching with path-proximity scoring, which on real codebases picks ~one accidental candidate per call site. The Go branch now reads the project's `go.mod`, treats `/...` imports as in-module, and looks up the qualified symbol in the imported package's directory; same-name functions in *different* packages no longer collide. As a side fix, Go nodes now correctly carry `is_exported=1` for capitalized identifiers (the resolver needs this to filter candidates). Measured on gRPC-Go (1,031 `.go` files, layered packages): cross-package `calls` edges go from 10,880 → 19,929 (**+83%**), total `calls` from 23,803 → 34,105 (**+43%**), with no false-positive resolution of stdlib calls (`fmt.Println` etc. stay external). -- **`codegraph_files` now returns the whole project when an agent passes `path="/"`, `"."`, `"./"`, `""`, or a Windows-style `"\\"` — instead of "No files found matching the criteria."** Indexed file paths are stored as project-relative POSIX (e.g. `src/foo.ts`), but the path filter used a plain `startsWith`, so a leading slash or any of the other root-ish shapes an agent might guess matched nothing and pushed the agent back to Read/Glob — the exact opencode + Gemini Flash regression reported on Windows 11. Subdirectory filters are now equally forgiving: `"/src"`, `"./src"`, `"src/"`, `"src\\components"`, etc. all resolve correctly. Sibling-prefix bleed (`"src"` was previously matching `src-utils/...`) is also fixed — the filter now requires either an exact match or a `/` boundary. Closes #426. -- **File watcher no longer marks edited files as fresh when another process holds the index lock.** When a second writer (concurrent `codegraph index`, a git hook, another MCP daemon) held `.codegraph/codegraph.lock`, `CodeGraph.sync()` returned a zero-shape no-op instead of throwing. The file watcher took that as a successful sync and cleared `pendingFiles` — so the per-file staleness signal MCP tools surface to agents (issue #403) dropped immediately, even though the edit was never indexed. `CodeGraph.watch()` now converts that no-op into a typed `LockUnavailableError` thrown into the watcher; the existing retry path preserves `pendingFiles` and reschedules until the lock becomes available. The error is logged at debug only (no `onSyncError` callback) so a long-running external indexer doesn't spam stderr every debounce cycle. Closes #449. -- **TS/JS top-level initializer calls and inline-object-method calls are no longer dropped.** Calls inside a top-level variable initializer (`const token = getTokenMp()`) and inside methods of an inline object literal (`{ methods: { save() { getTokenMp() } } }`) were never walked by the variable / method-definition extractors, so `getTokenMp` showed up nowhere in `codegraph_callers`. The variable extractor now walks any non-object initializer value for calls; the method-definition extractor still avoids creating synthetic nodes for inline-object methods (the noise reason is unchanged) but now walks their bodies so the calls inside aren't lost. Surfaces in plain `.ts`/`.js` files (top-level `const x = foo()`) and in Vue SFCs (`