From 0e2789ab712bd570f775602955d9f1329133662e Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 10:25:40 -0400
Subject: [PATCH 01/51] docs(agent-eval): nested MCP attach is startup-latency,
 not a hard block (#735)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Corrects the "run non-nested only" conclusion from #734. The codegraph server
is healthy (handshake ~165ms); the flakiness is that on a multi-step
implementation task the agent dives into Read/grep before codegraph finishes
its ~2-3s startup (worse under nested CPU contention), so it runs with no
codegraph. Fix: pre-warm a persistent daemon (high idle timeout) + skip the
startup re-exec (CODEGRAPH_WASM_RELAUNCHED=1) so claude connects before the
agent's first turn. claude's init snapshot can show status:"pending" even when
it then connects — judge by actual codegraph usage, not the init line.

ab-new-vs-baseline.sh now bakes in the pre-warm + skip-re-exec. Validated: a
clean A/B showed the new build's agent used codegraph 2x / 5 Reads vs the
baseline's 0 / 8 on the same fully-implemented task.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                |  2 +-
 scripts/agent-eval/ab-new-vs-baseline.sh | 50 +++++++++++++++---------
 2 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index e2f40832c..bad199f20 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -137,7 +137,7 @@ For each **language × framework**, validate on **small, medium, and large** rea
 1. **Pick the canonical flow** for the framework ("how does X reach Y": state→render, request→handler→view, query→SQL, action→reducer→store…).
 2. **Deterministic probes** (`scripts/agent-eval/probe-{node,explore}.mjs` against the built `dist/`): `codegraph_explore` with the flow's symbol names connects from→to end-to-end with no break (its Flow section shows the path); **no node explosion** (`select count(*) from nodes` stable before/after re-index); synthesized-edge **precision** spot-check (`select … where provenance='heuristic'`).
 3. **Agent A/B** (`scripts/agent-eval/run-all.sh <repo> "<Q>"`): with vs without codegraph, **≥2 runs/arm** (run-to-run variance is large — never conclude from n=1). Record **duration, total tool calls, Read, Grep**. Optional forced-Read-0 sufficiency proof via the block-read hook (`scripts/agent-eval/hook-settings.json`).
-   - **Run agent-evals in a REAL terminal — NEVER nested inside a Claude Code session** (don't spawn `claude -p` from a Bash tool call). The codegraph MCP server is healthy (full `initialize`→`tools/list` handshake ~165ms, daemon and in-process modes both fine), but a nested `claude -p` marks it `status:"pending"` / 0 tools under CPU/timing contention and the agent silently runs with no codegraph — it can connect early in a session, then degrade to consistent failure as nested spawns pile up. `CODEGRAPH_NO_DAEMON=1` and `< /dev/null` do NOT fix it (it's the nested client, not the server). Confirm via `parse-run.mjs` (`codegraph tools exposed: 0` = void run). To isolate a change — **new-build vs baseline-build, both codegraph-on** (vs run-all.sh's with-vs-without) — use `scripts/agent-eval/ab-new-vs-baseline.sh <indexed-repo> "<task>" [baseline-ref]`.
+   - **MCP attach is a startup-latency issue, not a hard block.** On a multi-step task the agent dives into Read/grep before codegraph finishes its ~2-3s startup (worse when the eval is itself run nested inside a Claude session, under CPU contention), so it runs with no codegraph. Fix: **pre-warm a persistent daemon** for the target (`CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS` high; spawn `serve --mcp --path <target> </dev/null &`; wait for `.codegraph/daemon.sock`) **and skip the startup re-exec** (`CODEGRAPH_WASM_RELAUNCHED=1`) so claude connects before the agent's first turn. Don't trust claude's `init` snapshot — it can read `status:"pending"` / 0 tools even when it then connects; judge by actual codegraph usage in `parse-run.mjs`'s `by type`. To isolate a change — **new-build vs baseline-build, both codegraph-on** (vs run-all.sh's with-vs-without) — use `scripts/agent-eval/ab-new-vs-baseline.sh <indexed-repo> "<task>" [baseline-ref]` (it bakes in the pre-warm).
 4. **Pass bar:** a normal flow question reaches **~0 Read/Grep within the repo's explore-call budget**, runs **faster** than without-codegraph, and shows **no regression on a control repo**. Record the numbers in `docs/design/dynamic-dispatch-coverage-playbook.md` (the coverage matrix).
 
 Full playbook + per-mechanism design: `docs/design/dynamic-dispatch-coverage-playbook.md` and `docs/design/callback-edge-synthesis.md`.
diff --git a/scripts/agent-eval/ab-new-vs-baseline.sh b/scripts/agent-eval/ab-new-vs-baseline.sh
index 7f5d58d1d..7e5cc84e5 100755
--- a/scripts/agent-eval/ab-new-vs-baseline.sh
+++ b/scripts/agent-eval/ab-new-vs-baseline.sh
@@ -2,18 +2,20 @@
 # A/B a codegraph retrieval/steering change: the NEW build (current HEAD) vs a
 # BASELINE build (a git ref) — BOTH with codegraph attached — on the same
 # implementation task, measuring how many Read vs codegraph calls the agent
-# makes. This ISOLATES the change (unlike run-all.sh, which is with-vs-without
-# codegraph). The agent works on a throwaway copy of the target, so its edits
-# never touch your repos.
+# makes. ISOLATES the change (unlike run-all.sh's with-vs-without). The agent
+# works on a throwaway copy of the target, so your repos are never touched.
 #
-# *** RUN THIS IN A REAL TERMINAL — NOT nested inside a Claude Code session. ***
-# A `claude -p` spawned from within another Claude session (e.g. from a Bash
-# tool call) cannot reliably attach the codegraph MCP server: the server is
-# healthy (full handshake ~165ms) but the nested client marks it
-# status:"pending" / 0 tools under CPU/timing contention, and degrades to
-# consistent failure over a long session. NO_DAEMON + `< /dev/null` do NOT fix
-# it — it's the nested client, not the server. See codegraph/CLAUDE.md
-# ("Running agent-evals — do NOT nest").
+# Reliable attach (works even when this is itself run nested inside a Claude
+# session): each arm PRE-WARMS a persistent codegraph daemon for its target so
+# claude connects to an already-bound, index-loaded daemon instantly — before
+# the agent's first turn — and SKIPS codegraph's startup re-exec via
+# CODEGRAPH_WASM_RELAUNCHED=1. Without this, on a multi-step task the agent
+# dives into Read/grep before codegraph finishes its ~2-3s startup (worse under
+# the CPU contention of a nested run) and runs with NO codegraph.
+#
+# Gotcha: claude's `system/init` snapshot can read status:"pending" / 0 tools
+# even when the server then connects fine — judge by ACTUAL codegraph usage in
+# parse-run.mjs's "by type", not the init line.
 #
 # Usage: ab-new-vs-baseline.sh <indexed-repo> "<task>" [baseline-ref]
 #   <indexed-repo>  a repo with a .codegraph index (copied per arm)
@@ -38,9 +40,13 @@ fi
 CHANGED=$(git -C "$ENGINE" diff --name-only "$BASE_REF" HEAD -- src 2>/dev/null)
 [ -n "$CHANGED" ] || { echo "no src/ changes between $BASE_REF and HEAD — nothing to A/B"; exit 1; }
 
-# Always restore the engine to HEAD on exit, even if interrupted mid-arm.
-restore() { git -C "$ENGINE" checkout HEAD -- $CHANGED 2>/dev/null; ( cd "$ENGINE" && npm run build >/dev/null 2>&1 ); }
-trap restore EXIT
+# On exit: kill any eval daemons + restore the engine to HEAD.
+cleanup() {
+  pkill -9 -f "serve --mcp --path $OUT/" 2>/dev/null
+  git -C "$ENGINE" checkout HEAD -- $CHANGED 2>/dev/null
+  ( cd "$ENGINE" && npm run build >/dev/null 2>&1 )
+}
+trap cleanup EXIT
 
 mkdir -p "$OUT"
 echo "###### engine=$ENGINE  baseline=$BASE_REF"
@@ -54,17 +60,25 @@ rm -rf "$OUT/t-new" "$OUT/t-base"
 rsync -a --exclude node_modules --exclude .git --exclude dist --exclude .codegraph "$TARGET/" "$OUT/t-new/"
 cp -R "$OUT/t-new" "$OUT/t-base"
 
-cfg() { printf '{"mcpServers":{"codegraph":{"command":"%s","args":["serve","--mcp","--path","%s"]}}}' "$BIN" "$1" > "$2"; }
+prewarm() { # target — spawn a persistent daemon (current $BIN) and wait for its socket
+  pkill -9 -f "serve --mcp --path $1" 2>/dev/null
+  CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
+  node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" \
+    && echo "  daemon warm: $1" || echo "  WARN: daemon never bound for $1 (arm may run without codegraph)"
+}
 
 run_arm() { # label, target-copy
   local label="$1" tgt="$2" c="$OUT/mcp-$1.json"
-  cfg "$tgt" "$c"
+  # Connect to the pre-warmed daemon; skip the startup re-exec for a fast attach.
+  printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$tgt" > "$c"
+  prewarm "$tgt"
   echo "############## ARM [$label] ##############"
   ( cd "$tgt" && claude -p "$TASK" \
       --output-format stream-json --verbose --permission-mode bypassPermissions \
       --model opus --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
-      < /dev/null > "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
-  node "$PARSE" "$OUT/run-$label.jsonl" 2>&1 | grep -E "tools exposed|by type|Result" || echo "  (parse failed — see $OUT/run-$label.jsonl)"
+      </dev/null > "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
+  node "$PARSE" "$OUT/run-$label.jsonl" 2>&1 | grep -E "by type|Result" || echo "  (parse failed — see $OUT/run-$label.jsonl)"
+  pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
   echo
 }
 

From 1983590533a51c950b055937051b152e8473a30b Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 13:48:42 -0400
Subject: [PATCH 02/51] =?UTF-8?q?feat(mcp):=20codegraph=5Fnode=20reads=20f?=
 =?UTF-8?q?iles=20like=20the=20Read=20tool=20=E2=80=94=20offset/limit,=20b?=
 =?UTF-8?q?yte-parity=20(#738)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Makes codegraph_node a drop-in faster Read for indexed source files (file-read mode: <n>\t<line> like Read, offset/limit, + blast-radius header; symbolsOnly for the map). Fixes the old file-view dropping imports/line-numbers. #383/#527 preserved. Validated by A/B: explore/node already return source + line numbers, so Read=0 when used. Includes the A/B eval harness scripts. Full suite green (1270).
---
 CHANGELOG.md                             |   2 +-
 __tests__/node-file-view.test.ts         |  83 +++++++---
 docs/design/agent-codegraph-adoption.md  | 136 +++++++++++++++++
 scripts/agent-eval/ab-adoption.sh        |  91 +++++++++++
 scripts/agent-eval/ab-hook.sh            |  86 +++++++++++
 scripts/agent-eval/ab-impl.sh            |  78 ++++++++++
 scripts/agent-eval/ab-sufficiency.sh     |  78 ++++++++++
 scripts/agent-eval/redirect-read-hook.sh |  38 +++++
 src/mcp/server-instructions.ts           |   5 +-
 src/mcp/tools.ts                         | 187 +++++++++++++++--------
 10 files changed, 702 insertions(+), 82 deletions(-)
 create mode 100644 docs/design/agent-codegraph-adoption.md
 create mode 100644 scripts/agent-eval/ab-adoption.sh
 create mode 100644 scripts/agent-eval/ab-hook.sh
 create mode 100644 scripts/agent-eval/ab-impl.sh
 create mode 100644 scripts/agent-eval/ab-sufficiency.sh
 create mode 100755 scripts/agent-eval/redirect-read-hook.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 255b192a7..a94dadb45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,7 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
-- The `codegraph_node` MCP tool now accepts a file path on its own (no symbol) and returns that file's symbols plus which files depend on it — and the full source with `includeCode`. It's a drop-in upgrade for reading a source file: the same content, plus the file's blast radius, in one call. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns more accurate context for fewer tokens than re-reading files.
+- The `codegraph_node` MCP tool can now **read a whole source file like the built-in Read tool — only faster, served from the index**. Pass a file path with no symbol and it returns that file's current source with line numbers (the same `<n>⇥<line>` shape Read produces, so an assistant can edit straight from it), narrowable with `offset`/`limit` exactly like Read, plus a one-line note of which files depend on it (the file's blast radius). Use it anywhere you'd reach for Read on an indexed source file. Pass `symbolsOnly: true` for just the file's structure. Configuration/data files (`.yml` / `.properties`) are summarized by key only, never dumped, so secrets in them are never surfaced. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns the same bytes plus the blast radius, faster than re-reading the file.
 - New `codegraph upgrade` command updates CodeGraph to the latest release in place — it detects how you installed (the standalone `install.sh` / `install.ps1` bundle, npm, or npx) and does the right thing for each, on macOS, Linux, and Windows. Use `codegraph upgrade --check` to see whether an update is available without installing, or `codegraph upgrade <version>` to move to a specific version. After upgrading it reminds you to re-index your projects so they pick up the newer engine's improvements. (#679)
 - `codegraph status` now flags when a project's index was built by an older engine than the one you're running and recommends re-indexing (also surfaced in `codegraph status --json`), so you know when a `codegraph index -f` or `codegraph sync` will add coverage a newer release introduced.
 - Cross-file impact and blast-radius coverage now spans **all 22 supported languages and 14 web frameworks**, each validated on a real-world repo — see the new coverage table in the README. This release ships the cross-file resolution behind it, including Lua and Luau `require`, Shopify OS 2.0 Liquid section templates, Delphi form code-behind, Rust cross-module calls and Rocket route macros, Swift Fluent relationships, and the SvelteKit / Nuxt / Vapor / Axum route conventions. The residual everywhere is genuine static-analysis frontiers (runtime dispatch, reflection / DI, framework-convention entry points), never hidden.
diff --git a/__tests__/node-file-view.test.ts b/__tests__/node-file-view.test.ts
index 316ed555d..7d2a5703c 100644
--- a/__tests__/node-file-view.test.ts
+++ b/__tests__/node-file-view.test.ts
@@ -1,7 +1,9 @@
 /**
- * codegraph_node FILE-VIEW mode: a bare `file` (no `symbol`) returns that file's
- * symbol map + graph role (dependents), and verbatim bodies with includeCode —
- * a Read replacement for a source file that also surfaces the blast radius.
+ * codegraph_node FILE READ mode: a `file` with no `symbol` reads that file like
+ * the Read tool — current source with `<n>\t<line>` numbering (byte-for-byte
+ * Read's shape), narrowable with offset/limit — plus a one-line blast-radius
+ * header. `symbolsOnly` returns the structural map instead. Config/data files
+ * are summarized by key, never dumped (#383).
  */
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import * as fs from 'fs';
@@ -24,9 +26,23 @@ describe('codegraph_node file-view (Read replacement)', () => {
     );
     fs.writeFileSync(
       path.join(dir, 'src', 'b.ts'),
-      "import { helper } from './a';\nexport function useHelper() { return helper(2); }\n",
+      "import { helper } from './a';\n\n// a comment between symbols\nconst SETTING = 7;\nexport function useHelper() { return helper(2) + SETTING; }\n",
     );
-    cg = CodeGraph.initSync(dir, { config: { include: ['**/*.ts'], exclude: [] } });
+    // A config/data file (#383): its values may be secrets and must never be
+    // dumped verbatim by the file-view.
+    fs.writeFileSync(
+      path.join(dir, 'src', 'application.properties'),
+      'spring.datasource.password=SUPERSECRET123\nserver.port=8080\n',
+    );
+    // A large file: exceeds the file-view line budget, so it must be windowed
+    // honestly (not silently truncated).
+    fs.writeFileSync(
+      path.join(dir, 'src', 'big.ts'),
+      'export function big() {\n' +
+        Array.from({ length: 2000 }, (_, i) => `  const v${i} = ${i};`).join('\n') +
+        '\n  return 0;\n}\n',
+    );
+    cg = CodeGraph.initSync(dir, { config: { include: ['**/*.ts', '**/*.properties'], exclude: [] } });
     await cg.indexAll();
     h = new ToolHandler(cg);
   });
@@ -39,21 +55,54 @@ describe('codegraph_node file-view (Read replacement)', () => {
   const text = async (args: Record<string, unknown>): Promise<string> =>
     (await h.execute('codegraph_node', args)).content.map((c) => c.text).join('\n');
 
-  it("a bare file (no symbol) returns the file's symbols + dependents", async () => {
+  it('reads a whole file like Read by default — `<n>\\t<line>` lines (no pad), imports + gaps included', async () => {
+    const out = await text({ file: 'b.ts' }); // no includeCode needed — content is the default
+    // Byte-for-byte Read shape: line 1 is "1<TAB>import …", NOT space-padded.
+    expect(out).toMatch(/^1\timport \{ helper \} from '\.\/a';$/m);
+    expect(out).toContain('// a comment between symbols'); // inter-symbol gap (Read has it; old reconstruction dropped it)
+    expect(out).toContain('const SETTING = 7'); // top-level statement
+    expect(out).toContain('useHelper'); // the symbol body too
+    expect(out).not.toContain('```'); // Read has no code fence; neither do we
+  });
+
+  it('leads with a one-line blast-radius header (the value-add over Read)', async () => {
     const out = await text({ file: 'a.ts' });
-    expect(out).toContain('src/a.ts');
-    expect(out).toContain('helper');
-    expect(out).toContain('Widget');
-    expect(out).toMatch(/depended on by 1 file/i);
-    expect(out).toContain('src/b.ts'); // the dependent file (blast radius)
+    expect(out).toMatch(/used by 1 file: src\/b\.ts/); // a.ts is imported by b.ts
+    expect(out).toContain('return x + 1'); // still returns the source
+  });
+
+  it('offset/limit narrow the window exactly like Read', async () => {
+    const out = await text({ file: 'big.ts', offset: 1000, limit: 3 });
+    // Window starts at the requested line, numbered exactly: "1000<TAB>  const v998 = 998;"
+    expect(out).toMatch(/^1000\t {2}const v998 = 998;$/m);
+    expect(out).not.toMatch(/^1\t/m); // line 1 is NOT shown
+    expect(out).toMatch(/lines 1000[–-]1002 of \d+/); // honest pagination note
+  });
+
+  it('an offset past EOF is reported, not a crash', async () => {
+    const out = await text({ file: 'a.ts', offset: 9999 });
+    expect(out).toMatch(/past the end/i);
+  });
+
+  it('paginates a large file honestly by default — "lines 1–N of TOTAL", never a silent truncate', async () => {
+    const out = await text({ file: 'big.ts' });
+    expect(out).toMatch(/lines 1[–-]\d+ of \d+/); // explicit window note
+    expect(out).not.toContain('(output truncated)'); // not the generic 15k chop
+    expect(out).toMatch(/^1\texport function big/m); // the head of the window is real source
   });
 
-  it('resolves by basename and returns verbatim bodies with includeCode', async () => {
-    const out = await text({ file: 'a.ts', includeCode: true });
-    expect(out).toContain('return x + 1'); // helper body
-    expect(out).toContain('class Widget'); // class body, verbatim
-    // It must NOT steer the agent back to Read — it is the Read replacement.
-    expect(out.toLowerCase()).not.toContain('read `src/a.ts`');
+  it('does NOT dump a config/data file (yaml/properties) — #383 secret safety', async () => {
+    const out = await text({ file: 'application.properties' });
+    expect(out).not.toContain('SUPERSECRET123'); // the value never reaches the agent
+    expect(out.toLowerCase()).toMatch(/config|values withheld/);
+  });
+
+  it('symbolsOnly returns the structural map, not the source', async () => {
+    const out = await text({ file: 'a.ts', symbolsOnly: true });
+    expect(out).toContain('### Symbols');
+    expect(out).toContain('helper');
+    expect(out).toContain('Widget');
+    expect(out).not.toContain('return x + 1'); // bodies are NOT included in the map
   });
 
   it('still works as a normal symbol lookup (no regression)', async () => {
diff --git a/docs/design/agent-codegraph-adoption.md b/docs/design/agent-codegraph-adoption.md
new file mode 100644
index 000000000..8b6c1c061
--- /dev/null
+++ b/docs/design/agent-codegraph-adoption.md
@@ -0,0 +1,136 @@
+# Getting agents to actually use codegraph (not Read) — design notes & handoff
+
+> Working doc for a fresh session. Two problems to crack:
+> **(P1)** agents still reach for `Read`/`grep` during implementation instead of codegraph;
+> **(P2)** on startup the codegraph MCP server can be `pending` when the agent's first turn fires, so the agent runs with *no* codegraph at all.
+>
+> Read `codegraph/CLAUDE.md` → "Retrieval performance & dynamic-dispatch coverage" first — it's the doctrine these ideas must respect.
+
+---
+
+## Context — what already shipped (so you don't repeat it)
+
+- **#733 (`7175dc4`)** — reframed the agent-facing steering (`src/mcp/server-instructions.ts` + the `codegraph_node`/`codegraph_explore` descriptions in `src/mcp/tools.ts`) to cover *implementation*, not just Q&A; and added **file-view mode**: `codegraph_node` now accepts a bare `file` (no `symbol`) → returns that file's symbol map + its dependents (blast radius) + verbatim bodies (`includeCode`). `handleFileView` in `src/mcp/tools.ts`.
+- **Clean A/B result** (new build vs baseline build, both codegraph-connected, same fully-implemented task — `kindExclude` added to `codegraph_search`):
+  - **baseline:** 0 codegraph calls, 8 Reads (agent *ignored* available codegraph).
+  - **new:** 2 `codegraph_explore` calls, 5 Reads.
+  - So the reframe *did* move tool-choice — but the agent used `codegraph_explore`, **never the file-view**, and still Read 5×. n=1/arm.
+- **Eval harness fix** (`#735`): nested attach is a *startup-latency* problem, not a hard block. `scripts/agent-eval/ab-new-vs-baseline.sh` now pre-warms a daemon + skips the re-exec; use it (run non-nested for cleanest results).
+
+**Doctrine constraints (from CLAUDE.md — do not relitigate):**
+- *Adapt the tool to the agent.* Changing tool descriptions / `server-instructions.ts` is **low-salience** and has *regressed* wall-clock before. Wording alone won't reliably move tool-choice.
+- *New tools fare worse than extending an existing one* (the agent under-picks even `trace`; `codegraph_context` was removed).
+- The real levers that landed historically: **coverage** (more flows connect statically → `explore` surfaces them) and **sufficiency** (output complete enough that the agent *stops* reading).
+- The optimization target is **wall-clock + tool-call count + Read=0**, not token cost (cost is lower as a side effect).
+
+---
+
+## P1 — Agents under-use codegraph during implementation
+
+### STATUS — 2026-06-08 (RESOLVED via Read-parity, not a hook)
+
+**The fix: make `codegraph_node` read a file *exactly like the Read tool*, only
+faster — so the agent reaches for it naturally. No forcing.** The owner's steer
+settled the direction: *"codegraph should be able to Read just like the Read
+tool… make it as good as Read. Read is slow and old; querying the index is fast.
+You keep diverging away from using codegraph rather than pursuing the fix."*
+
+**DONE — `handleFileView` (`src/mcp/tools.ts`) is now full Read parity:**
+- A `file` with no `symbol` returns the file's current source numbered
+  **byte-for-byte the way Read does — `<n>\t<line>`, no padding, trailing empty
+  line kept** (verified by reading the same file with both and diffing). The only
+  addition is a **one-line blast-radius header** (`used by N files: …`).
+- **`offset` / `limit` mean exactly what they do on Read** (1-based start; max
+  lines; default whole file capped at 2000 lines like Read). Large files paginate
+  honestly (`(lines X–Y of N — pass offset/limit…)`), never the 15k `truncateOutput` chop.
+- Content is the **default** (no `includeCode` needed); `symbolsOnly: true` returns
+  the cheap structural map instead. Security preserved: `yaml`/`properties`
+  summarized by key, never dumped (#383); reads via `validatePathWithinRoot` (#527).
+- Tests: `__tests__/node-file-view.test.ts` (9, incl. strict format parity
+  `^1000\t  const v998 = 998;` and unpadded `^1\timport …`). Full suite green
+  (1270). Descriptions / `server-instructions.ts` / CHANGELOG reframed: "read a
+  source file with codegraph_node instead of Read — same bytes, faster."
+
+**The hook (idea 1) — A/B'd and REJECTED. Do not ship.** Kept only as an eval
+artifact (`scripts/agent-eval/redirect-read-hook.sh` + `ab-hook.sh`).
+- Clean A/B (2 runs/arm, devpit "add `dp ping`, build it"; both arms codegraph-attached):
+  - **nohook:** 0 codegraph calls, 1 Read, **5–7 tool calls, 6–8 turns, 55–77s.** (Reproduces P1: agent ignores codegraph — but read-once-and-edit is *efficient* here.)
+  - **hook (deny-redirect):** 0 *successful* Reads + 1 file-view call (parity worked, edit compiled), but **8–9 tool calls, 9–10 turns, 200–239s**, and the agent **fought the deny** — `ToolSearch` to find the tool, reflexive re-Read (denied), then **`Bash python3` to read the file around the block.**
+  - Verdict: a blanket Read-deny **regresses the target metrics (~2× tool calls, more turns) on a simple edit** and the agent routes around it. Forcing is the wrong lever; making the tool genuinely better than Read is the right one.
+- If routing is ever revisited: not a blanket hook. Either a narrow trigger (large
+  files only / after-N-reads) **with a clean A/B on a Read-heavy multi-file task**
+  (the hook's best case, untested), or just keep widening coverage + sufficiency.
+
+---
+
+**Symptom:** even with codegraph attached + the new steering, the agent reflexively `Read`s/`grep`s mid-implementation, and never reaches for the file-view. Descriptions can't fix this (low-salience wall).
+
+### Ideas, ranked by expected leverage
+
+1. **PreToolUse(Read/Grep) hook that redirects to codegraph** — *highest leverage; the only channel that actually changes behavior.*
+   - Claude Code **hooks** can intercept a tool call and inject context or block it — unlike descriptions, this is *not* low-salience. We already have `scripts/agent-eval/block-read-hook.sh` + `hook-settings.json` (used to force Read=0 in evals).
+   - Ship a **recommended (opt-in) hook**: on `Read` (or `Grep`) of a path that's *indexed*, inject "this file is indexed — `codegraph_node {file}` returns it + its blast radius for fewer tokens; treat its output as already-Read." Soft nudge (don't hard-block, or it'll frustrate users on configs/docs codegraph doesn't index).
+   - The installer (`src/installer/targets/claude.ts`) could offer to add this hook (opt-in, like the auto-allow permissions).
+   - **Validate** with `ab-new-vs-baseline.sh` (Read count, with vs without the hook). This is the experiment most likely to move the needle.
+   - Open Qs: how to know a path is indexed from inside a hook (query `codegraph files`/`status`, or a fast local check against `.codegraph`); avoiding noise on non-indexed files; per-language false positives.
+
+2. **Sufficiency: make the file-view the obvious Read replacement so the agent *wants* it.**
+   - The A/B showed the agent never passed a `file` to `codegraph_node`. Why? It doesn't think "Read this file" → "codegraph_node file=X". Investigate: is the file-view's value (symbols + dependents + bodies) actually *better than Read* for the agent's next step (an `Edit`)? It returns bodies — but does it return enough surrounding context to `Edit` confidently? If not, the agent Reads anyway.
+   - Consider: when the agent *does* Read an indexed file, is there a way to make codegraph's prior `explore`/`node` output have *already* given it what it needed? (i.e. fix the upstream sufficiency, not the Read itself.)
+
+3. **Coverage — the durable lever.** Every flow that connects statically is one the agent doesn't Read to reconstruct. Keep closing dynamic-dispatch gaps (`src/resolution/`). Less about "stop Reading," more about "never need to."
+
+4. **Naming / affordance experiments (low confidence, cheap).** The file-view is buried inside `codegraph_node`. A dedicated, obviously-named affordance might get picked more — *but* "new tools fare worse," so this likely loses. If tried, A/B it; don't assume.
+
+**Recommendation:** prototype **idea 1 (the Read-redirect hook)** and A/B it. It's the one lever with a real chance of moving behavior. Everything else is incremental.
+
+---
+
+## P2 — Agent runs without codegraph because the server is `pending` at startup
+
+**Symptom:** `serve --mcp` isn't ready when the agent's first turn fires (the host marks the MCP server `status:"pending"` / 0 tools), so the agent starts Read/grep and never uses codegraph. We saw this hard in nested evals (~2-3s startup vs the agent's turn-1); **real users hit a milder version** — the first query of a session may not have codegraph.
+
+### Root cause
+`serve --mcp` does a `--liftoff-only` **re-exec** (for a node memory flag) **and** spawns/binds a detached **daemon** before tools are usable. Under load that exceeds the host's MCP-startup window. (`CODEGRAPH_WASM_RELAUNCHED=1` skips the re-exec; pre-warming a daemon removes the bind latency — both proven in `ab-new-vs-baseline.sh`. But a real user can't pre-warm.)
+
+### Ideas, ranked
+
+1. **CODEGRAPH-SIDE — expose the static tool list INSTANTLY, decoupled from the daemon. *Biggest shippable win; helps every user.***
+   - Hypothesis: the host marks codegraph `pending` because `tools/list` (tool exposure) waits on the daemon connect. The local handshake already answers `initialize` fast (~107ms; `runLocalHandshakeProxy` in `src/mcp/proxy.ts`, `getStaticTools` is imported there). **Investigate: does `serve --mcp` answer `tools/list` *locally and instantly* from `getStaticTools`, or does it forward it to the still-connecting daemon?** If the latter, decouple it: advertise the static tools the moment the client asks, mark connected, and resolve the daemon in the background for actual tool *calls*.
+   - Verify with: `printf '<initialize>\n<initialized>\n<tools/list>\n' | node dist/bin/codegraph.js serve --mcp --path <repo>` and time the `tools/list` response, daemon-mode vs in-process. In-process answered in ~165ms; daemon-mode is the suspect.
+   - If this lands, `pending`-at-startup largely disappears without any host change.
+
+2. **CODEGRAPH-SIDE — speed/skip the re-exec on the MCP serve path.** The re-exec exists for a V8 memory flag (`src/extraction/wasm-runtime-flags.ts`, `RELAUNCH_GUARD_ENV = CODEGRAPH_WASM_RELAUNCHED`). For MCP serving on a normal repo the flag may be unnecessary, or settable without a full process re-exec. Removing one process spawn from the cold path shaves the startup window.
+
+3. **CODEGRAPH-SIDE — a SessionStart hook that pre-warms the daemon.** Ship an opt-in Claude Code `SessionStart` hook (installer-added) that spawns/warms the daemon for the project at session start, so it's bound before the first query. Mitigation if (1) is hard.
+
+4. **HOST-SIDE — "wait/retry on pending" — this is what you asked about, but it's a Claude Code (MCP client) behavior, not codegraph's to fix.** codegraph can't make the agent retry. Options: (a) raise it with Anthropic as an MCP-client improvement (don't let the agent's first turn proceed until configured MCP servers finish connecting, or retry `pending` servers); (b) note `MCP_TIMEOUT` exists but did **not** help here, because the problem is *tool exposure timing*, not a connection timeout. Frame this as a request, and lean on (1)–(3) for what we control.
+
+**Recommendation:** chase **idea 1** (decouple `tools/list` from the daemon). It's the fix that makes codegraph "connected" instantly for everyone. Ship **idea 3** (pre-warm SessionStart hook) as a cheap mitigation in parallel. File the host-side request (4) but don't depend on it.
+
+---
+
+## Key files / pointers
+
+- **Steering / tools:** `src/mcp/server-instructions.ts` (the `initialize` instructions — single source of truth), `src/mcp/tools.ts` (tool descriptions + handlers; `handleNode`/`handleFileView`/`handleSearch`, `getStaticTools`).
+- **Startup / daemon / proxy:** `src/mcp/proxy.ts` (`runProxy`, `connectWithHello`, `runLocalHandshakeProxy`, PPID watchdog), `src/mcp/index.ts` (`runProxyWithLocalHandshake`, `spawnDetachedDaemon`), `src/mcp/daemon.ts`.
+- **Runtime flags:** `src/extraction/wasm-runtime-flags.ts` (`RELAUNCH_GUARD_ENV=CODEGRAPH_WASM_RELAUNCHED`, `HOST_PPID_ENV=CODEGRAPH_HOST_PPID`).
+- **Hooks (existing):** `scripts/agent-eval/block-read-hook.sh`, `scripts/agent-eval/hook-settings.json` (the eval's force-Read-0 hook — basis for the P1 redirect hook).
+- **Installer (where to add a recommended hook):** `src/installer/targets/claude.ts`.
+- **Eval harness:** `scripts/agent-eval/ab-new-vs-baseline.sh` (new-vs-baseline, pre-warm baked in), `run-all.sh` (with-vs-without), `parse-run.mjs` (tool-by-type counts; `codegraph tools exposed: 0` + 0 codegraph calls = ran without).
+- **Doctrine:** `CLAUDE.md` → "Retrieval performance & dynamic-dispatch coverage" + the agent-eval note under "Validation methodology".
+
+## How to validate anything here
+- **P1 (Read displacement):** `bash scripts/agent-eval/ab-new-vs-baseline.sh <indexed-repo> "<implementation task>" [baseline-ref]` — compare `Read` vs `mcp__codegraph__*` counts. ≥2 runs/arm (n=1 is noisy). Run non-nested for cleanest results. Use a *genuinely new* feature task (verify it doesn't already exist — the first A/B attempt wasted a run on an already-implemented `--quiet`).
+- **P2 (startup):** time `tools/list` from `serve --mcp` (above); and count cold-start runs where `init` shows `connected` + tools > 0. Don't trust a single `pending` init snapshot — confirm by whether the agent actually called codegraph.
+
+## Constraints / gotchas to remember
+- Descriptions/instructions are low-salience — **A/B every behavioral claim**, don't ship wording on faith.
+- New tools < extending existing ones.
+- The host's `init` snapshot can say `pending` even when the server then connects — judge by actual usage.
+- Don't run evals nested for "clean" numbers unless pre-warmed; even then, a real terminal is better.
+
+## Suggested start order for the fresh session
+1. **P2 idea 1** — verify whether `serve --mcp` answers `tools/list` locally/instantly; if not, decouple it from the daemon. (Highest-value, shippable, helps all users, no behavioral guesswork.)
+2. **P1 idea 1** — prototype the PreToolUse(Read) redirect hook; A/B it. (Highest-value behavioral lever.)
+3. Ship the P2 SessionStart pre-warm hook as a mitigation; file the host-side wait/retry request.
diff --git a/scripts/agent-eval/ab-adoption.sh b/scripts/agent-eval/ab-adoption.sh
new file mode 100644
index 000000000..eabf802c3
--- /dev/null
+++ b/scripts/agent-eval/ab-adoption.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+# Does the agent PICK codegraph_node to read a file, vs the built-in Read tool?
+# Build A/B: NEW build (HEAD, codegraph_node has Read parity) vs BASELINE build
+# (a ref where it doesn't), BOTH codegraph-attached + pre-warmed, same task. The
+# metric is tool CHOICE: Read calls vs codegraph_node[file] calls per run.
+#
+# Usage: ab-adoption.sh <indexed-repo> "<task>" [runs-per-arm] [baseline-ref]
+# Env: AGENT_EVAL_OUT (default: /tmp/ab-adoption)
+set -uo pipefail
+TARGET="${1:?usage: ab-adoption.sh <indexed-repo> \"<task>\" [runs] [baseline-ref]}"
+TASK="${2:?task required}"
+RUNS="${3:-2}"
+BASE_REF="${4:-HEAD~1}"
+ENGINE="$(cd "$(dirname "$0")/../.." && pwd)"
+BIN="$ENGINE/dist/bin/codegraph.js"
+OUT="${AGENT_EVAL_OUT:-/tmp/ab-adoption}"
+
+command -v claude >/dev/null || { echo "claude CLI not on PATH"; exit 1; }
+[ -d "$TARGET/.codegraph" ] || { echo "target not indexed: run 'codegraph init $TARGET' first"; exit 1; }
+git -C "$ENGINE" diff --quiet && git -C "$ENGINE" diff --cached --quiet || { echo "engine has uncommitted changes — commit/stash first"; exit 1; }
+CHANGED=$(git -C "$ENGINE" diff --name-only "$BASE_REF" HEAD -- src 2>/dev/null)
+[ -n "$CHANGED" ] || { echo "no src/ changes between $BASE_REF and HEAD"; exit 1; }
+
+cleanup() {
+  pkill -9 -f "serve --mcp --path $OUT/" 2>/dev/null
+  git -C "$ENGINE" checkout HEAD -- $CHANGED 2>/dev/null
+  ( cd "$ENGINE" && npm run build >/dev/null 2>&1 )
+}
+trap cleanup EXIT
+mkdir -p "$OUT"
+echo "###### target=$TARGET  runs/arm=$RUNS  baseline=$BASE_REF"
+echo "###### changed: $(echo "$CHANGED" | tr '\n' ' ')"
+echo "###### task=$TASK"; echo
+
+prewarm() {
+  pkill -9 -f "serve --mcp --path $1" 2>/dev/null
+  CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
+  node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" >/dev/null 2>&1
+}
+
+# Per-run tool-choice counts: Read vs codegraph_node[file] vs [symbol].
+count() {
+  node -e '
+    const fs=require("fs");
+    const lines=fs.readFileSync(process.argv[1],"utf8").split("\n").filter(Boolean);
+    let read=0,cgFile=0,cgSym=0,cgOther=0,exposed="?";
+    for(const l of lines){try{const o=JSON.parse(l);
+      if(o.type==="system"&&o.subtype==="init"){exposed=(o.tools||[]).filter(t=>/codegraph/.test(t)).length;}
+      const blocks=o.message?.content||[];
+      for(const b of (Array.isArray(blocks)?blocks:[])){
+        if(b.type!=="tool_use")continue;
+        if(b.name==="Read")read++;
+        else if(b.name==="mcp__codegraph__codegraph_node"){ if(b.input&&b.input.symbol)cgSym++; else cgFile++; }
+        else if(/mcp__codegraph__/.test(b.name))cgOther++;
+      }
+    }catch{}}
+    console.log(`    Read=${read}  codegraph_node[file]=${cgFile}  codegraph_node[symbol]=${cgSym}  other_cg=${cgOther}  (cg exposed=${exposed})`);
+  ' "$1"
+}
+
+run_arm() { # label, N
+  local label="$1" n="$2"
+  local c="$OUT/mcp-$label.json"
+  for i in $(seq 1 "$n"); do
+    local tgt="$OUT/t-$label-$i"
+    rm -rf "$tgt"
+    rsync -a --exclude node_modules --exclude .git --exclude dist --exclude .codegraph "$TARGET/" "$tgt/"
+    node "$BIN" init "$tgt" >/dev/null 2>&1
+    printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$tgt" > "$c"
+    prewarm "$tgt"
+    echo "----- [$label] run $i -----"
+    ( cd "$tgt" && claude -p "$TASK" \
+        --output-format stream-json --verbose --permission-mode bypassPermissions \
+        --model opus --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
+        </dev/null > "$OUT/run-$label-$i.jsonl" 2>"$OUT/run-$label-$i.err" )
+    count "$OUT/run-$label-$i.jsonl"
+    pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
+  done
+  echo
+}
+
+echo "== NEW build (HEAD: codegraph_node has Read parity) =="
+( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built"
+run_arm new "$RUNS"
+
+echo "== BASELINE build ($BASE_REF) =="
+git -C "$ENGINE" checkout "$BASE_REF" -- $CHANGED
+( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built"
+run_arm baseline "$RUNS"
+
+echo "###### DONE — compare [new] vs [baseline]: does codegraph_node[file] rise / Read fall? Logs: $OUT"
diff --git a/scripts/agent-eval/ab-hook.sh b/scripts/agent-eval/ab-hook.sh
new file mode 100644
index 000000000..8c1af32a3
--- /dev/null
+++ b/scripts/agent-eval/ab-hook.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+# A/B the PreToolUse(Read) REDIRECT hook (P1): does steering Read → codegraph_node
+# file-view actually move the agent off Read during implementation? BOTH arms use
+# the CURRENT build with codegraph attached and pre-warmed; the only difference is
+# the hook. Isolates the hook's behavioral effect from the build/file-view change
+# (use ab-new-vs-baseline.sh for the build A/B).
+#
+#   arm [nohook] — codegraph on, no hook   (does the better file-view get picked on its own?)
+#   arm [hook]   — codegraph on, + redirect hook   (does routing close it?)
+#
+# Reliable attach (works nested): each arm pre-warms a persistent daemon and skips
+# the startup re-exec (CODEGRAPH_WASM_RELAUNCHED=1), so claude connects before the
+# agent's first turn. Judge by ACTUAL codegraph usage in parse-run.mjs's "by type",
+# not claude's init snapshot (which can read pending even when it then connects).
+#
+# Usage: ab-hook.sh <indexed-repo> "<implementation task>" [runs-per-arm]
+#   <indexed-repo>  a repo with a .codegraph index (copied per arm; never mutated)
+#   "<task>"        a GENUINELY-NEW implementation task (verify it isn't already done)
+#   [runs-per-arm]  default 2 (n=1 is noisy — the doctrine says >=2)
+# Env: AGENT_EVAL_OUT (default: /tmp/ab-hook)
+set -uo pipefail
+
+TARGET="${1:?usage: ab-hook.sh <indexed-repo> \"<task>\" [runs-per-arm]}"
+TASK="${2:?task required}"
+RUNS="${3:-2}"
+ENGINE="$(cd "$(dirname "$0")/../.." && pwd)"
+BIN="$ENGINE/dist/bin/codegraph.js"
+HOOK="$ENGINE/scripts/agent-eval/redirect-read-hook.sh"
+OUT="${AGENT_EVAL_OUT:-/tmp/ab-hook}"
+PARSE="$ENGINE/scripts/agent-eval/parse-run.mjs"
+
+command -v claude >/dev/null || { echo "claude CLI not on PATH"; exit 1; }
+command -v jq >/dev/null || { echo "jq not on PATH (the hook needs it)"; exit 1; }
+[ -d "$TARGET/.codegraph" ] || { echo "target not indexed: run 'codegraph init $TARGET' first"; exit 1; }
+chmod +x "$HOOK"
+
+cleanup() { pkill -9 -f "serve --mcp --path $OUT/" 2>/dev/null; }
+trap cleanup EXIT
+
+mkdir -p "$OUT"
+echo "###### engine=$ENGINE"
+echo "###### target=$TARGET   runs/arm=$RUNS"
+echo "###### task=$TASK"
+echo
+
+( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built"
+
+# A settings file carrying ONLY the PreToolUse(Read) redirect hook.
+HOOK_SETTINGS="$OUT/hook-settings.json"
+jq -n --arg cmd "bash $HOOK" \
+  '{hooks:{PreToolUse:[{matcher:"Read",hooks:[{type:"command",command:$cmd}]}]}}' > "$HOOK_SETTINGS"
+
+prewarm() { # target — spawn a persistent daemon and wait for its socket
+  pkill -9 -f "serve --mcp --path $1" 2>/dev/null
+  CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
+  node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" \
+    && echo "  daemon warm: $1" || echo "  WARN: daemon never bound for $1"
+}
+
+run_one() { # arm-label, run-index, use-hook(0|1)
+  local label="$1" idx="$2" hook="$3"
+  local tgt="$OUT/t-$label-$idx" c="$OUT/mcp-$label.json"
+  rm -rf "$tgt"
+  rsync -a --exclude node_modules --exclude .git --exclude dist --exclude .codegraph "$TARGET/" "$tgt/"
+  node "$BIN" init "$tgt" >/dev/null 2>&1
+  printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$tgt" > "$c"
+  prewarm "$tgt"
+  local extra=()
+  [ "$hook" = "1" ] && extra=(--settings "$HOOK_SETTINGS")
+  echo "----- [$label] run $idx -----"
+  # ${extra[@]+...} guard: bash 3.2 (macOS) under `set -u` errors on an empty
+  # array expansion otherwise, which would skip the no-hook arm's claude run.
+  ( cd "$tgt" && claude -p "$TASK" \
+      --output-format stream-json --verbose --permission-mode bypassPermissions \
+      --model opus --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" ${extra[@]+"${extra[@]}"} \
+      </dev/null > "$OUT/run-$label-$idx.jsonl" 2>"$OUT/run-$label-$idx.err" )
+  node "$PARSE" "$OUT/run-$label-$idx.jsonl" 2>&1 | grep -E "by type|Result" || echo "  (parse failed — see $OUT/run-$label-$idx.jsonl)"
+  pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
+  echo
+}
+
+for i in $(seq 1 "$RUNS"); do run_one nohook "$i" 0; done
+for i in $(seq 1 "$RUNS"); do run_one hook   "$i" 1; done
+
+echo "###### DONE. Compare [nohook] vs [hook] 'by type' — Read should fall and"
+echo "###### mcp__codegraph__codegraph_node should rise in the [hook] arm. Logs: $OUT"
diff --git a/scripts/agent-eval/ab-impl.sh b/scripts/agent-eval/ab-impl.sh
new file mode 100644
index 000000000..c5c23b58e
--- /dev/null
+++ b/scripts/agent-eval/ab-impl.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# Sufficiency A/B for an IMPLEMENTATION task (the agent edits): when it uses
+# codegraph (explore/node) to understand before editing, does it still Read? Like
+# ab-sufficiency.sh but copies+indexes a FRESH target per run (the agent mutates
+# it), so runs don't see each other's edits.
+#
+# WITH codegraph (pre-warmed) vs WITHOUT (empty MCP), N runs each. Reports
+# explore/node vs Read/Grep + the files Read, and whether the build still passes.
+#
+# Usage: ab-impl.sh <indexed-repo> "<task>" [runs] [build-cmd]
+# Env: AGENT_EVAL_OUT (default: /tmp/ab-impl)
+set -uo pipefail
+REPO="${1:?usage: ab-impl.sh <indexed-repo> \"<task>\" [runs] [build-cmd]}"
+Q="${2:?task required}"
+RUNS="${3:-2}"
+BUILD_CMD="${4:-}"
+ENGINE="$(cd "$(dirname "$0")/../.." && pwd)"
+BIN="$ENGINE/dist/bin/codegraph.js"
+OUT="${AGENT_EVAL_OUT:-/tmp/ab-impl}"
+command -v claude >/dev/null || { echo "claude CLI not on PATH"; exit 1; }
+[ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO"; exit 1; }
+cleanup(){ pkill -9 -f "serve --mcp --path $OUT/" 2>/dev/null; }
+trap cleanup EXIT
+mkdir -p "$OUT"
+( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built engine"
+echo "###### repo=$REPO  runs/arm=$RUNS"
+echo "###### task=$Q"; echo
+echo '{"mcpServers":{}}' > "$OUT/mcp-empty.json"
+
+prewarm(){
+  pkill -9 -f "serve --mcp --path $1" 2>/dev/null
+  CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$1" </dev/null >/dev/null 2>&1 &
+  node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$1" >/dev/null 2>&1
+}
+
+analyze(){
+  node -e '
+    const fs=require("fs");
+    const L=fs.readFileSync(process.argv[1],"utf8").split("\n").filter(Boolean);
+    let ex=0,nf=0,ns=0,oc=0,gr=0,ed=0,exposed="?";const reads=[];
+    for(const l of L){try{const o=JSON.parse(l);
+      if(o.type==="system"&&o.subtype==="init")exposed=(o.tools||[]).filter(t=>/codegraph/.test(t)).length;
+      for(const b of (o.message?.content||[])){if(b.type!=="tool_use")continue;
+        if(b.name==="mcp__codegraph__codegraph_explore")ex++;
+        else if(b.name==="mcp__codegraph__codegraph_node"){if(b.input&&b.input.symbol)ns++;else nf++;}
+        else if(/mcp__codegraph__/.test(b.name))oc++;
+        else if(b.name==="Read")reads.push((b.input?.file_path||"").split("/").pop());
+        else if(b.name==="Grep")gr++;
+        else if(b.name==="Edit"||b.name==="Write")ed++;
+      }}catch{}}
+    console.log(`    explore=${ex} node[sym]=${ns} node[file]=${nf} other_cg=${oc} | Read=${reads.length}${reads.length?" ("+reads.join(", ")+")":""} Grep=${gr} Edit=${ed}  [cg exposed=${exposed}]`);
+  ' "$1"
+}
+
+run(){ # label, withCodegraph(0/1)
+  local label="$1" wcg="$2"
+  for i in $(seq 1 "$RUNS"); do
+    local tgt="$OUT/t-$label-$i" cfg="$OUT/mcp-$label.json"
+    rm -rf "$tgt"
+    rsync -a --exclude node_modules --exclude .git --exclude dist --exclude .codegraph "$REPO/" "$tgt/"
+    node "$BIN" init "$tgt" >/dev/null 2>&1
+    if [ "$wcg" = "1" ]; then
+      printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$tgt" > "$cfg"
+      prewarm "$tgt"
+    else cp "$OUT/mcp-empty.json" "$cfg"; fi
+    ( cd "$tgt" && claude -p "$Q" --output-format stream-json --verbose \
+        --permission-mode bypassPermissions --model opus --max-budget-usd 4 \
+        --strict-mcp-config --mcp-config "$cfg" </dev/null > "$OUT/$label-$i.jsonl" 2>"$OUT/$label-$i.err" )
+    echo "[$label] run $i:"; analyze "$OUT/$label-$i.jsonl"
+    if [ -n "$BUILD_CMD" ]; then ( cd "$tgt" && eval "$BUILD_CMD" >/dev/null 2>&1 && echo "      build: PASS" || echo "      build: FAIL" ); fi
+    pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
+  done
+  echo
+}
+
+echo "== WITH codegraph =="; run with 1
+echo "== WITHOUT (Read/Grep only) =="; run without 0
+echo "###### DONE: $OUT"
diff --git a/scripts/agent-eval/ab-sufficiency.sh b/scripts/agent-eval/ab-sufficiency.sh
new file mode 100644
index 000000000..066253657
--- /dev/null
+++ b/scripts/agent-eval/ab-sufficiency.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# Sufficiency A/B: on a real understanding/flow question, WHEN the agent uses
+# codegraph (explore/node), does it still Read? Premise under test: explore/node
+# return source WITH line numbers, so a Read should not be needed.
+#
+# WITH codegraph (pre-warmed daemon, reliable nested attach) vs WITHOUT (empty
+# MCP, Read/Grep only), N runs each, on a throwaway copy of the repo. Reports
+# explore/node vs Read/Grep, and LISTS the files Read in the WITH arm so a true
+# sufficiency gap (an indexed source file) is distinguishable from out-of-scope
+# (configs, docs, a file codegraph didn't index).
+#
+# Usage: ab-sufficiency.sh <indexed-repo> "<question>" [runs-per-arm]
+# Env: AGENT_EVAL_OUT (default: /tmp/ab-sufficiency)
+set -uo pipefail
+REPO="${1:?usage: ab-sufficiency.sh <indexed-repo> \"<question>\" [runs]}"
+Q="${2:?question required}"
+RUNS="${3:-2}"
+ENGINE="$(cd "$(dirname "$0")/../.." && pwd)"
+BIN="$ENGINE/dist/bin/codegraph.js"
+OUT="${AGENT_EVAL_OUT:-/tmp/ab-sufficiency}"
+TGT="$OUT/target"
+command -v claude >/dev/null || { echo "claude CLI not on PATH"; exit 1; }
+[ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO"; exit 1; }
+cleanup(){ pkill -9 -f "serve --mcp --path $TGT" 2>/dev/null; }
+trap cleanup EXIT
+mkdir -p "$OUT"
+( cd "$ENGINE" && npm run build >/dev/null 2>&1 ) && echo "built"
+
+# Throwaway copy + fresh index (the agent works here; a read-only question won't
+# edit, but isolate anyway). Excludes the source repo's index/build/vcs.
+rm -rf "$TGT"
+rsync -a --exclude node_modules --exclude .git --exclude dist --exclude .codegraph "$REPO/" "$TGT/"
+node "$BIN" init "$TGT" >/dev/null 2>&1 && echo "indexed copy ($(node "$BIN" status --json 2>/dev/null | node -e 'let s="";process.stdin.on("data",d=>s+=d).on("end",()=>{try{console.log(JSON.parse(s).fileCount+" files")}catch{console.log("?")}})' 2>/dev/null || echo '?'))"
+
+echo "###### repo=$REPO  runs/arm=$RUNS"
+echo "###### Q=$Q"; echo
+echo '{"mcpServers":{}}' > "$OUT/mcp-empty.json"
+printf '{"mcpServers":{"codegraph":{"command":"env","args":["CODEGRAPH_WASM_RELAUNCHED=1","node","%s","serve","--mcp","--path","%s"]}}}' "$BIN" "$TGT" > "$OUT/mcp-cg.json"
+
+prewarm(){
+  pkill -9 -f "serve --mcp --path $TGT" 2>/dev/null
+  CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS=1800000 node "$BIN" serve --mcp --path "$TGT" </dev/null >/dev/null 2>&1 &
+  node -e 'const fs=require("fs");let n=0;const t=setInterval(()=>{if(fs.existsSync(process.argv[1]+"/.codegraph/daemon.sock")){clearInterval(t);process.exit(0)}if(n++>150){clearInterval(t);process.exit(1)}},100)' "$TGT" >/dev/null 2>&1
+}
+
+analyze(){
+  node -e '
+    const fs=require("fs");
+    const L=fs.readFileSync(process.argv[1],"utf8").split("\n").filter(Boolean);
+    let ex=0,nf=0,ns=0,oc=0,gr=0,exposed="?";const reads=[];
+    for(const l of L){try{const o=JSON.parse(l);
+      if(o.type==="system"&&o.subtype==="init")exposed=(o.tools||[]).filter(t=>/codegraph/.test(t)).length;
+      for(const b of (o.message?.content||[])){if(b.type!=="tool_use")continue;
+        if(b.name==="mcp__codegraph__codegraph_explore")ex++;
+        else if(b.name==="mcp__codegraph__codegraph_node"){if(b.input&&b.input.symbol)ns++;else nf++;}
+        else if(/mcp__codegraph__/.test(b.name))oc++;
+        else if(b.name==="Read")reads.push((b.input?.file_path||"").split("/").pop());
+        else if(b.name==="Grep")gr++;
+      }}catch{}}
+    console.log(`    explore=${ex} node[sym]=${ns} node[file]=${nf} other_cg=${oc} | Read=${reads.length}${reads.length?" ("+reads.join(", ")+")":""} Grep=${gr}  [cg exposed=${exposed}]`);
+  ' "$1"
+}
+
+run(){ # label, cfg, prewarm(0/1)
+  local label="$1" cfg="$2" pw="$3"
+  for i in $(seq 1 "$RUNS"); do
+    [ "$pw" = "1" ] && prewarm
+    ( cd "$TGT" && claude -p "$Q" --output-format stream-json --verbose \
+        --permission-mode bypassPermissions --model opus --max-budget-usd 4 \
+        --strict-mcp-config --mcp-config "$cfg" </dev/null > "$OUT/$label-$i.jsonl" 2>"$OUT/$label-$i.err" )
+    echo "[$label] run $i:"; analyze "$OUT/$label-$i.jsonl"
+  done
+  echo
+}
+
+echo "== WITH codegraph (premise: explore/node used -> Read ~0) =="; run with "$OUT/mcp-cg.json" 1
+echo "== WITHOUT (Read/Grep only — the contrast) =="; run without "$OUT/mcp-empty.json" 0
+echo "###### DONE. In the WITH arm: are explore/node>0 and Read~0? Any Read of an INDEXED source file = sufficiency gap. Logs: $OUT"
diff --git a/scripts/agent-eval/redirect-read-hook.sh b/scripts/agent-eval/redirect-read-hook.sh
new file mode 100755
index 000000000..3dce75652
--- /dev/null
+++ b/scripts/agent-eval/redirect-read-hook.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# PreToolUse(Read) REDIRECT hook — prototype for A/B (P1: get agents off Read and
+# onto codegraph_node during implementation, not just for Q&A).
+#
+# When the agent Reads a SOURCE file, deny it and steer to codegraph_node's
+# file-view, which (as of the Lever-1 change) returns the WHOLE file verbatim
+# WITH line numbers — imports, top-level code, comments and all — PLUS the file's
+# blast radius, in one call. That output is a strict superset of Read, so the
+# redirect is lossless: the agent loses nothing by taking it, and gains who-
+# depends-on-this for the edit it's about to make.
+#
+# Differs from block-read-hook.sh (which steers to explore/node-by-symbol): this
+# names the FILE-VIEW path explicitly (file:"<base>" + includeCode:true), the
+# 1:1 Read replacement we're trying to get picked during implementation.
+#
+# Non-source files (configs, docs, lockfiles, .env) pass through to a real Read.
+# A redirect to a file codegraph hasn't indexed SELF-CORRECTS: the file-view
+# replies "No indexed file matches … Read it directly", so a just-created file
+# never dead-ends — the agent Reads it on the next turn.
+#
+# Wire via:  claude ... --settings <settings-with-this-as-PreToolUse(Read)>
+# Eval artifact only. The production version is an indexed-aware `codegraph`
+# subcommand (cross-platform — no bash/jq — and queries the index so it never
+# bounces a new/un-indexed file), wired opt-in by the installer.
+set -uo pipefail
+input="$(cat)"
+fp="$(printf '%s' "$input" | jq -r '.tool_input.file_path // empty' 2>/dev/null)"
+[ -n "$fp" ] || exit 0
+base="$(basename "$fp")"
+
+case "$fp" in
+  *.ts|*.tsx|*.js|*.jsx|*.mjs|*.cjs|*.py|*.go|*.rs|*.java|*.rb|*.php|*.swift|*.kt|*.kts|*.scala|*.c|*.cc|*.cpp|*.h|*.hpp|*.cs|*.lua|*.vue|*.svelte|*.m|*.mm)
+    msg="codegraph has this file indexed (kept in sync on every edit). Call codegraph_node with file:\"$base\" and includeCode:true instead of Read — it returns the WHOLE file verbatim WITH line numbers (imports, top-level code and all — safe to base an Edit on) PLUS which files depend on it, in one call. Treat its output as already-Read; do not Read this file. (If it answers that the file isn't indexed — e.g. you just created it — then Read it directly.)"
+    jq -n --arg m "$msg" '{hookSpecificOutput:{hookEventName:"PreToolUse",permissionDecision:"deny",permissionDecisionReason:$m}}'
+    exit 0
+    ;;
+esac
+exit 0
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
index 5e197a8a9..b246899c7 100644
--- a/src/mcp/server-instructions.ts
+++ b/src/mcp/server-instructions.ts
@@ -48,7 +48,8 @@ typically one to a few calls; a grep/read exploration is dozens.
 - **"How does X reach/become Y? / the flow / the path from X to Y"** → \`codegraph_explore\`, naming the symbols that span the flow (e.g. \`mutateElement renderScene\`) — it surfaces the call path among them, including dynamic-dispatch hops (callbacks, React re-render, JSX children) grep can't follow
 - **"What is the symbol named X?" (just its location)** → \`codegraph_search\`
 - **"What calls this?" / "What does this call?" / "What would changing this break?"** → \`codegraph_callers\` / \`codegraph_callees\` / \`codegraph_impact\`
-- **About to read or edit a symbol you can name** → \`codegraph_node\` (SECONDARY — the after-explore depth tool) instead of \`Read\`: it returns the **verbatim current on-disk source** (safe to base an \`Edit\` on) PLUS its caller/callee trail — the same bytes Read gives you, plus who calls it and what your change would break, for fewer tokens. For an OVERLOADED name it returns EVERY matching definition's body in one call, so you never Read a file to find the right overload. Or pass a FILE PATH alone (no symbol) to get that whole file's symbol map + what depends on it — a Read replacement for a source file
+- **Reading a source FILE (any time you'd use the \`Read\` tool)** → \`codegraph_node\` with a \`file\` path and no \`symbol\`. It returns the file's **current source with line numbers — the same \`<n>\\t<line>\` shape \`Read\` gives you, safe to \`Edit\` from** — narrowable with \`offset\`/\`limit\` exactly like \`Read\`, PLUS a one-line note of which files depend on it. Same bytes as \`Read\`, faster (served from the index), with the blast radius attached. Use it **instead of \`Read\`** for indexed source files; fall back to \`Read\` only for what codegraph doesn't index (configs, docs). Pass \`symbolsOnly: true\` for just the file's structure.
+- **About to read or edit a symbol you can name** → \`codegraph_node\` with that \`symbol\` (SECONDARY — the after-explore depth tool): the verbatim source (\`includeCode: true\`) PLUS its caller/callee trail, so before changing it you see what calls it and what your edit would break. For an OVERLOADED name it returns EVERY matching definition's body in one call, so you never Read a file to find the right overload
 - **"What's in directory X?"** → \`codegraph_files\`
 - **"Is the index ready / what's its size?"** → \`codegraph_status\`
 
@@ -65,7 +66,7 @@ typically one to a few calls; a grep/read exploration is dozens.
 - **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature.
 - **Don't chain \`codegraph_search\` + \`codegraph_node\`** to understand an area — ONE \`codegraph_explore\` returns the relevant symbols' source together in a single round-trip.
 - **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns them all grouped by file, while each separate call re-reads the whole context and costs far more. Use \`codegraph_node\` for a single symbol.
-- **Don't \`Read\` a file just to see or edit a symbol you can name** — \`codegraph_node\` returns the same current source plus its caller/callee trail in one call, for fewer tokens. Reach for raw \`Read\` only for what codegraph doesn't index (configs, docs) or when the staleness banner flags a file as pending re-index.
+- **Don't reach for the \`Read\` tool on an indexed source file** — \`codegraph_node\` with a \`file\` reads it for you (same \`<n>\\t<line>\` source, \`offset\`/\`limit\` like Read, faster, with its blast radius), and with a \`symbol\` it returns the source plus the caller/callee trail. Reach for raw \`Read\` only for what codegraph doesn't index (configs, docs) or when the staleness banner flags a file as pending re-index.
 - **After editing, check the staleness banner.** When a tool response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Every file NOT in that banner is fresh, so still trust codegraph. \`codegraph_status\` also lists pending files under "Pending sync".
 
 ## Limitations
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 8e1f47066..8a696fbc5 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -26,7 +26,7 @@ import {
   existsSync,
   readFileSync,
 } from 'fs';
-import { clamp, validatePathWithinRoot, validateProjectPath, isConfigLeafNode } from '../utils';
+import { clamp, validatePathWithinRoot, validateProjectPath, isConfigLeafNode, CONFIG_LEAF_LANGUAGES } from '../utils';
 import { isGeneratedFile } from '../extraction/generated-detection';
 import { resolve as resolvePath } from 'path';
 
@@ -463,26 +463,39 @@ export const tools: ToolDefinition[] = [
   },
   {
     name: 'codegraph_node',
-    description: 'SECONDARY (after codegraph_explore): the Read upgrade for ONE symbol you can name. Returns its location, signature, the verbatim CURRENT on-disk source (includeCode=true — the same bytes Read would give you, safe to base an Edit on), AND its caller/callee trail in a single call — so before changing a symbol you already see what calls it and what your edit would break, for fewer tokens than reading the file. Prefer it over Read whenever you know the symbol name. Or pass `file` ALONE (no symbol) to get that whole source file\'s symbol map + what depends on it — a Read replacement for a file. When the name is AMBIGUOUS (an overloaded method, or the same name on different types) it returns EVERY matching definition\'s full body in one call — so you never Read a file to find the right overload; pass `file` (and/or `line`) to pin one. Use codegraph_explore for several related symbols or the full flow.',
+    description: 'Two modes. (1) READ A FILE — use INSTEAD of the Read tool: pass `file` (a path or basename) with no `symbol` and it returns that file\'s current on-disk source with line numbers, exactly the shape Read gives you (`<n>\\t<line>`, safe to Edit from), narrowable with `offset`/`limit` just like Read — PLUS a one-line note of which files depend on it. Same bytes as Read, faster (served from the index), with the blast radius attached. Use it whenever you would Read a source file. (2) ONE SYMBOL you can name — its location, signature, verbatim source (includeCode=true) and caller/callee trail in one call, so before changing it you see what calls it and what your edit would break. For an AMBIGUOUS name it returns EVERY matching definition\'s body in one call (so you never Read a file to find the right overload); pass `file`/`line` to pin one. Use codegraph_explore for several related symbols or the full flow.',
     inputSchema: {
       type: 'object',
       properties: {
         symbol: {
           type: 'string',
-          description: 'Name of the symbol to get details for. Omit it and pass `file` alone to get the whole file\'s symbols + dependents (a Read replacement).',
+          description: 'Name of the symbol to read (symbol mode). Omit it and pass `file` alone to read a whole file like Read.',
         },
         includeCode: {
           type: 'boolean',
-          description: 'Include full source bodies (default: false to minimize context). In file mode, returns every symbol\'s body up to a size budget.',
+          description: 'Symbol mode: include the symbol\'s full body (default: false). Ignored in file mode, which always returns source unless `symbolsOnly` is set.',
           default: false,
         },
         file: {
           type: 'string',
-          description: 'A file path or basename (e.g. "harness.rs", "src/auth/session.ts"). Pass it ALONE (no symbol) to get that whole file\'s symbol map + dependents — a Read replacement. Or pass it WITH a symbol to disambiguate an overloaded name to the definition in this file.',
+          description: 'A file path or basename (e.g. "harness.rs", "src/auth/session.ts"). Pass it ALONE (no symbol) to READ the file like the Read tool — its full source with line numbers + which files depend on it. Or pass it WITH a symbol to disambiguate an overloaded name to the definition in this file.',
+        },
+        offset: {
+          type: 'number',
+          description: 'File mode: 1-based line to start reading from, exactly like Read\'s offset. Defaults to the start of the file.',
+        },
+        limit: {
+          type: 'number',
+          description: 'File mode: maximum number of lines to return, exactly like Read\'s limit. Defaults to the whole file (capped at 2000 lines, like Read).',
+        },
+        symbolsOnly: {
+          type: 'boolean',
+          description: 'File mode: return just the file\'s symbol map + dependents (a cheap structural overview) instead of its source.',
+          default: false,
         },
         line: {
           type: 'number',
-          description: 'Optional: disambiguate to the definition at/around this line (use with the file:line a trail showed you).',
+          description: 'Symbol mode only: disambiguate to the definition at/around this line (use with the file:line a trail showed you).',
         },
         projectPath: projectPathProperty,
       },
@@ -2527,14 +2540,18 @@ export class ToolHandler {
     const includeCode = args.includeCode === true;
     const fileHint = typeof args.file === 'string' && args.file.trim() ? args.file.trim() : undefined;
     const lineHint = typeof args.line === 'number' && args.line > 0 ? args.line : undefined;
+    const offset = typeof args.offset === 'number' && args.offset > 0 ? Math.floor(args.offset) : undefined;
+    const limit = typeof args.limit === 'number' && args.limit > 0 ? Math.floor(args.limit) : undefined;
+    const symbolsOnly = args.symbolsOnly === true;
     const symbolRaw = typeof args.symbol === 'string' ? args.symbol.trim() : '';
 
-    // FILE-VIEW MODE: a bare `file` with no `symbol` returns that file's symbol
-    // map + graph role (which files depend on it) — and, with includeCode, the
-    // bodies. A Read replacement for "show me file X" that also surfaces the
-    // blast radius, so an edit is made with impact in view.
+    // FILE READ MODE: a `file` with no `symbol` reads that file like the Read
+    // tool — its current on-disk source with line numbers, narrowable with
+    // `offset`/`limit` exactly as Read does — PLUS a one-line blast-radius
+    // header (which files depend on it). `symbolsOnly` returns just the
+    // structural map instead. Backed by the index: same bytes Read gives you.
     if (!symbolRaw && fileHint) {
-      return this.handleFileView(cg, fileHint, includeCode);
+      return this.handleFileView(cg, fileHint, { offset, limit, symbolsOnly });
     }
 
     const symbol = this.validateString(args.symbol, 'symbol');
@@ -2634,11 +2651,23 @@ export class ToolHandler {
   }
 
   /**
-   * FILE-VIEW: resolve `fileArg` (path or basename) to an indexed file and
-   * return its symbol map + graph role (which files depend on it), plus bodies
-   * when `includeCode`. A Read replacement that also surfaces the blast radius.
+   * FILE READ MODE: resolve `fileArg` (path or basename) to an indexed file and
+   * read it like the Read tool — its current on-disk source with line numbers,
+   * narrowable with `offset`/`limit` exactly as Read's are — preceded by a
+   * one-line blast-radius header (which files depend on it). `symbolsOnly`
+   * returns just the structural map (symbols + dependents) instead of source.
+   *
+   * Parity goal: the numbered source block is byte-for-byte the shape Read
+   * returns (`<n>\t<line>`, no padding), so the agent treats it as a Read — only
+   * faster (served from the index) and with the blast radius attached. Security:
+   * yaml/properties files are summarized by key, never dumped (#383); reads go
+   * through validatePathWithinRoot (#527).
    */
-  private async handleFileView(cg: CodeGraph, fileArg: string, includeCode: boolean): Promise<ToolResult> {
+  private async handleFileView(
+    cg: CodeGraph,
+    fileArg: string,
+    opts: { offset?: number; limit?: number; symbolsOnly?: boolean } = {},
+  ): Promise<ToolResult> {
     const normalize = (p: string) => p.replace(/\\/g, '/').replace(/^(?:\.?\/+)+/, '').replace(/\/+$/, '');
     const wantLower = normalize(fileArg).toLowerCase();
     const allFiles = cg.getFiles();
@@ -2672,62 +2701,96 @@ export class ToolHandler {
       .sort((a, b) => a.startLine - b.startLine);
     const dependents = cg.getFileDependents(filePath);
 
-    const out: string[] = [`**${filePath}** — ${nodes.length} symbol${nodes.length === 1 ? '' : 's'}`];
-    if (dependents.length) {
-      out.push(
-        `Depended on by ${dependents.length} file${dependents.length === 1 ? '' : 's'}` +
-          `${dependents.length > 8 ? ' (first 8)' : ''}: ${dependents.slice(0, 8).join(', ')}${dependents.length > 8 ? ', …' : ''}`,
-        '> Editing a symbol here can affect those files — run codegraph_impact on the specific symbol for its exact blast radius.',
-      );
-    } else {
-      out.push('No other indexed file depends on this one.');
-    }
-    out.push('');
+    // Compact, one-line blast radius (codegraph's value-add over a plain Read).
+    const depSummary = dependents.length
+      ? `used by ${dependents.length} file${dependents.length === 1 ? '' : 's'}: ${dependents.slice(0, 8).join(', ')}${dependents.length > 8 ? `, +${dependents.length - 8} more` : ''}`
+      : 'no other indexed file depends on it';
+
+    // Symbol-map renderer — for symbolsOnly, the config fallback, and read errors.
+    const symbolMap = (heading: string, limit = 200): string[] => {
+      const lines: string[] = [heading];
+      for (const n of nodes.slice(0, limit)) {
+        const sig = n.signature ? ` ${n.signature.replace(/\s+/g, ' ').trim()}` : '';
+        lines.push(`- \`${n.name}\` (${n.kind})${sig} — :${n.startLine}`);
+      }
+      if (nodes.length > limit) lines.push(`- … +${nodes.length - limit} more`);
+      return lines;
+    };
 
-    if (nodes.length === 0) {
-      out.push('_No indexed symbols in this file (codegraph may track it but not parse it for symbols)._');
+    // symbolsOnly → the cheap structural overview, no source.
+    if (opts.symbolsOnly) {
+      const out = [`**${filePath}** — ${nodes.length} symbol${nodes.length === 1 ? '' : 's'}, ${depSummary}`, ''];
+      if (nodes.length) out.push(...symbolMap('### Symbols'));
+      else out.push('_No indexed symbols in this file._');
+      out.push('', '> Drop `symbolsOnly` (or pass `offset`/`limit`) to read the source, like Read.');
       return this.textResult(this.truncateOutput(out.join('\n')));
     }
 
-    if (!includeCode) {
-      out.push('### Symbols');
-      for (const n of nodes) {
-        const sig = n.signature ? ` ${n.signature.replace(/\s+/g, ' ').trim()}` : '';
-        out.push(`- \`${n.name}\` (${n.kind})${sig} — :${n.startLine}`);
-      }
-      out.push('', '> Call again with `includeCode:true` for the bodies, or `codegraph_node <name>` for one symbol in full.');
+    // SECURITY (#383): never dump a raw config/data file — a yaml/properties
+    // line is `key: <secret>`. Summarize by key and point to a real Read.
+    if (CONFIG_LEAF_LANGUAGES.has(resolved.language)) {
+      const out = [`**${filePath}** — configuration/data file, ${depSummary}`, ''];
+      if (nodes.length) out.push(...symbolMap('### Keys (values withheld for safety)'));
+      out.push('', '> Values may be secrets, so codegraph indexes keys only. Read the file directly if you need a value.');
       return this.textResult(this.truncateOutput(out.join('\n')));
     }
 
-    // Render each OUTERMOST symbol's verbatim body (a container's body already
-    // includes its members, so skip anything covered) — no duplication, and no
-    // "read the file" container outline. Budget-capped.
-    out.push('### Source (verbatim — treat as already Read)');
-    const BODY_BUDGET = 14000;
-    const outermost = [...nodes].sort((a, b) =>
-      a.startLine - b.startLine || (b.endLine ?? b.startLine) - (a.endLine ?? a.startLine));
-    const covered: Array<[number, number]> = [];
-    let used = out.join('\n').length;
-    const listed: Node[] = [];
-    for (const n of outermost) {
-      const end = n.endLine ?? n.startLine;
-      if (covered.some(([s, e]) => s <= n.startLine && e >= end)) continue;
-      const code = await cg.getCode(n.id);
-      if (!code) continue;
-      const section = `#### \`${n.name}\` (${n.kind}) — :${n.startLine}\n\`\`\`\n${code}\n\`\`\``;
-      if (used + section.length <= BODY_BUDGET || used < 1500) {
-        out.push('', section);
-        used += section.length;
-        covered.push([n.startLine, end]);
-      } else {
-        listed.push(n);
-      }
+    // Read the current bytes from disk through the security chokepoint
+    // (validatePathWithinRoot: blocks `../` traversal and symlink escapes, #527).
+    const abs = validatePathWithinRoot(cg.getProjectRoot(), filePath);
+    let content: string | null = null;
+    if (abs) {
+      try { content = readFileSync(abs, 'utf-8'); } catch { content = null; }
     }
-    if (listed.length) {
-      out.push('', `### ${listed.length} more symbol${listed.length === 1 ? '' : 's'} (over the size budget — fetch with codegraph_node <name>)`,
-        ...listed.slice(0, 30).map((n) => `- \`${n.name}\` (${n.kind}) — :${n.startLine}`));
+    if (content === null) {
+      const out = [`**${filePath}** — could not read from disk (it may have moved since indexing). ${depSummary}`, ''];
+      if (nodes.length) out.push(...symbolMap('### Symbols'));
+      out.push('', `> Read \`${filePath}\` directly for its current content.`);
+      return this.textResult(this.truncateOutput(out.join('\n')));
     }
-    return this.textResult(this.truncateOutput(out.join('\n')));
+
+    // Split exactly as Read does — keep the trailing empty line a final newline
+    // produces (Read numbers it too), so line numbers line up byte-for-byte.
+    const fileLines = content.split('\n');
+    const total = fileLines.length;
+
+    // Read-parity windowing: `offset`/`limit` mean exactly what they do on Read
+    // (1-based start line; max line count). Default: the whole file, capped like
+    // Read at 2000 lines and bounded by a char budget that tracks explore's
+    // proven-safe ~38k response ceiling. Overflow is stated explicitly (Read
+    // paginates too) — never the silent 15k truncateOutput chop.
+    const CHAR_BUDGET = 38000;
+    const DEFAULT_LIMIT = 2000;
+    const offset = Math.max(1, opts.offset ?? 1);
+    if (offset > total) {
+      return this.textResult(`**${filePath}** has ${total} line${total === 1 ? '' : 's'} — offset ${offset} is past the end. ${depSummary}`);
+    }
+    const maxLines = Math.max(1, opts.limit ?? DEFAULT_LIMIT);
+    const start = offset - 1; // 0-based
+    const header = `**${filePath}** — ${total} lines, ${nodes.length} symbol${nodes.length === 1 ? '' : 's'} · ${depSummary}`;
+
+    // Numbered lines, byte-for-byte Read's shape: `<n>\t<line>`, no left-pad.
+    const numbered: string[] = [];
+    let used = header.length + 8;
+    let i = start;
+    for (; i < total && numbered.length < maxLines; i++) {
+      const ln = `${i + 1}\t${fileLines[i]}`;
+      if (used + ln.length + 1 > CHAR_BUDGET && numbered.length > 0) break;
+      numbered.push(ln);
+      used += ln.length + 1;
+    }
+    const shownEnd = start + numbered.length;
+    const complete = offset === 1 && shownEnd >= total;
+
+    const out: string[] = [header, '', ...numbered];
+    if (!complete) {
+      out.push(
+        '',
+        `(lines ${offset}–${shownEnd} of ${total} — pass \`offset\`/\`limit\` for another range, or \`codegraph_node <symbol>\` for one symbol in full)`,
+      );
+    }
+    // Self-bounded to CHAR_BUDGET — do NOT route through truncateOutput (15k).
+    return this.textResult(out.join('\n'));
   }
 
   /** Render one symbol: details + (optional) body/outline + its caller/callee trail. */

From 636d9fcb7d797485d059f38053aefa8b7f98544c Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 19:05:36 -0400
Subject: [PATCH 03/51] feat(extraction): index string-literal names in generic
 tuple type aliases (#634) (#740)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TypeScript service/RPC contracts written as a tuple of generic types —
`type List = [Service<'query_apply_record', Req, Resp>, …]` — carry their
names only as string-literal type arguments, so static extraction never
indexed them and `codegraph query query_apply_record` returned nothing.

Add a narrow TS/TSX type-alias pass that emits each tuple entry's
string-literal name as a `method` node under the alias (qualifiedName
`List::query_apply_record`), making it searchable. Scope is limited to a
direct literal arg of a generic that is a direct tuple element, with a
valid-identifier filter — so utility types (Pick/Omit/Record), deeper
nested generics, and route paths produce no noise.

Bumps EXTRACTION_VERSION so existing indexes get a re-index hint.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/extraction.test.ts         | 53 ++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/tree-sitter.ts        | 72 ++++++++++++++++++++++++++++
 4 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a94dadb45..54bfc8951 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - ASP.NET Razor (`.cshtml`) and Blazor (`.razor`) markup are now parsed for code relationships. A `@model` / `@inherits` / `@inject` directive links the view to the C# view-model, base type, or service it names; a Blazor `<MyComponent/>` tag (plus `@typeof(...)` and generic `TItem="..."` arguments) links to the component class; and the C# inside `@code { }` / `@functions { }` / `@{ }` blocks is analyzed too, so services and types used in component logic are linked. A view-model, component, or service referenced only from markup is no longer reported as having no dependents, and editing it surfaces the views that use it. (ASP.NET, Blazor)
 - A Razor/Blazor type reference now resolves through the component's `@using` namespaces — including the folder's cascading `_Imports.razor` — so a simple name that exists in several namespaces lands on the right one. A `@model` / `<MyComponent>` / `@code` reference to `CatalogBrand` resolves to the `@using`'d DTO (`BlazorShared.Models.CatalogBrand`) rather than a same-named domain entity. (ASP.NET, Blazor)
 - `codegraph status --json` now also reports the running CLI `version`, the index directory (`indexPath`), and a `lastIndexed` timestamp (ISO-8601, or null when nothing's indexed yet), so CI and scripts can pin the CLI version and check index freshness from a single command. A matching `CodeGraph.getLastIndexedAt()` library method exposes the same freshness check without shelling out. Thanks @12122J and @eddieran. (#329)
+- TypeScript service/RPC contracts defined as a tuple of generic types — `type MyServiceList = [Service<'query_apply_record', …>, Service<'apply_confirm', …>]` — now index each entry's string-literal name as a searchable symbol. Previously these names existed only as type arguments, so `codegraph query query_apply_record` found nothing even though the names are the app's primary API surface. The pattern is common in typed RPC / BFF clients and mock servers where the types are the source of truth for a runtime proxy object. Utility types (`Pick`, `Omit`, `Record`) and route paths are deliberately left out to avoid noise. Thanks @jiezhiyong. (#634) (TypeScript)
 
 ### Fixes
 
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index 6d9af6066..89918c58b 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -451,6 +451,59 @@ type Internal = string;
     expect(exported).toHaveLength(2);
     expect(exported.map((n) => n.name).sort()).toEqual(['DateFormat', 'UnitSystem']);
   });
+
+  // A service/contract registry written as a tuple of generic instantiations —
+  // the names are string-literal type arguments, not declarations, so static
+  // extraction otherwise never indexes them (issue #634).
+  it('extracts string-literal contract names from a generic tuple type alias (#634)', () => {
+    const code = `
+interface Service<Name extends string, Req, Resp> { name: Name; }
+export type MyServiceList = [
+  Service<'query_apply_record', { pageNo: number }, { ok: boolean }>,
+  Service<'apply_confirm', { code: string }, { ok: boolean }>
+];
+`;
+    const result = extractFromSource('services/api.ts', code);
+
+    const names = result.nodes.filter(
+      (n) => n.kind === 'method' && n.qualifiedName.startsWith('MyServiceList::')
+    );
+    expect(names.map((n) => n.name).sort()).toEqual(['apply_confirm', 'query_apply_record']);
+
+    const queryNode = names.find((n) => n.name === 'query_apply_record');
+    expect(queryNode?.qualifiedName).toBe('MyServiceList::query_apply_record');
+    // Signature carries the full contract entry so search results show context.
+    expect(queryNode?.signature).toContain("Service<'query_apply_record'");
+
+    // The string-literal name is contained by the type alias.
+    const alias = result.nodes.find((n) => n.kind === 'type_alias' && n.name === 'MyServiceList');
+    const containsEdge = result.edges.find(
+      (e) => e.kind === 'contains' && e.source === alias?.id && e.target === queryNode?.id
+    );
+    expect(containsEdge).toBeDefined();
+  });
+
+  it('does not extract string literals from utility types or nested generics (#634)', () => {
+    const code = `
+interface User { id: string; name: string; }
+interface Service<Name extends string, Req, Resp> { name: Name; }
+export type Picked = Pick<User, 'id' | 'name'>;
+export type Rec = Record<'foo' | 'bar', number>;
+// Tuple entry, but the name is a non-identifier route path; the nested Pick's
+// 'id' must also stay out (only DIRECT literal args of a tuple's generic count).
+export type Routes = [Service<'/api/users', Pick<User, 'id'>, {}>];
+// Bare string-literal tuple — not generic type arguments.
+export type Names = ['alpha', 'beta'];
+`;
+    const result = extractFromSource('noise.ts', code);
+
+    const leaked = result.nodes.filter(
+      (n) =>
+        (n.kind === 'method' || n.kind === 'property') &&
+        ['id', 'name', 'foo', 'bar', 'alpha', 'beta'].includes(n.name)
+    );
+    expect(leaked).toEqual([]);
+  });
 });
 
 describe('Exported Variable Extraction', () => {
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 5ca8f2d24..e216292cc 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 1;
+export const EXTRACTION_VERSION = 2;
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index f237efc5d..6febee652 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -1677,6 +1677,9 @@ export class TreeSitterExtractor {
         // an unrelated class method picked by path-proximity (#359).
         if (this.language === 'typescript' || this.language === 'tsx') {
           this.extractTsTypeAliasMembers(value, typeAliasNode);
+          // `type List = [ Service<'name', Req, Resp>, … ]` — surface each
+          // entry's string-literal name as a searchable member (issue #634).
+          this.extractTsTupleContractNames(value, typeAliasNode);
         }
       }
     }
@@ -1763,6 +1766,75 @@ export class TreeSitterExtractor {
     this.nodeStack.pop();
   }
 
+  /**
+   * Surface the string-literal "names" of a TypeScript service/contract
+   * registry written as a tuple of generic instantiations:
+   *
+   *   type MyServiceList = [
+   *     Service<'query_apply_record', Req, Resp>,
+   *     Service<'apply_confirm', Req, Resp>,
+   *   ];
+   *
+   * Each `Service<'name', …>` tags an entry with a string-literal name that a
+   * dynamic factory (`createService<MyServiceList>()`) turns into a callable
+   * property (`api.query_apply_record(…)`). Static extraction otherwise never
+   * sees that name — it's a type argument, not a declaration — so
+   * `codegraph query query_apply_record` returned nothing (issue #634). We emit
+   * each name as a `method` node under the type alias (qualifiedName
+   * `MyServiceList::query_apply_record`) so it's searchable and resolvable as a
+   * symbol. (A call through the proxy, `api.query_apply_record(…)`, still
+   * resolves to the imported `api` binding — the receiver's type isn't known —
+   * so this fixes discoverability, not the per-method call edge.)
+   *
+   * Scope is deliberately narrow to avoid noise: only a string literal that is
+   * a DIRECT type argument of a `generic_type` that is itself a DIRECT element
+   * of a `tuple_type`. This excludes utility types (`Pick`/`Omit`/`Record` are
+   * never written as tuples) and string args nested deeper
+   * (`Service<'a', Pick<U, 'id'>>` yields only `a`, never `id`). Names must be
+   * valid identifiers, which also rules out route paths / arbitrary strings.
+   */
+  private extractTsTupleContractNames(value: SyntaxNode, typeAliasNode: Node): void {
+    const tuples: SyntaxNode[] = [];
+    const collectTuples = (n: SyntaxNode, depth: number): void => {
+      if (depth > 6) return; // a type expression is shallow; cap defensively
+      if (n.type === 'tuple_type') tuples.push(n);
+      for (let i = 0; i < n.namedChildCount; i++) {
+        const c = n.namedChild(i);
+        if (c) collectTuples(c, depth + 1);
+      }
+    };
+    collectTuples(value, 0);
+    if (tuples.length === 0) return;
+
+    this.nodeStack.push(typeAliasNode.id);
+    for (const tuple of tuples) {
+      for (let i = 0; i < tuple.namedChildCount; i++) {
+        const entry = tuple.namedChild(i);
+        if (!entry || entry.type !== 'generic_type') continue;
+        const typeArgs = getChildByField(entry, 'type_arguments');
+        if (!typeArgs) continue;
+        for (let j = 0; j < typeArgs.namedChildCount; j++) {
+          const arg = typeArgs.namedChild(j);
+          if (!arg || arg.type !== 'literal_type') continue;
+          // literal_type wraps the actual literal; only a string is a name.
+          const strNode = arg.namedChild(0);
+          if (!strNode || strNode.type !== 'string') continue;
+          const name = getNodeText(strNode, this.source)
+            .trim()
+            .replace(/^['"`]/, '')
+            .replace(/['"`]$/, '');
+          if (!/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name)) continue;
+          const signature = getNodeText(entry, this.source).replace(/\s+/g, ' ').trim().slice(0, 120);
+          this.createNode('method', name, entry, {
+            signature,
+            qualifiedName: `${typeAliasNode.name}::${name}`,
+          });
+        }
+      }
+    }
+    this.nodeStack.pop();
+  }
+
   /**
    * `foo: () => T` → property_signature whose type_annotation contains a
    * `function_type`. Treat that as a method-shaped contract member, since

From a56d9e6941ce7233a0fb9c7f5e1c225a50c97b36 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 19:31:50 -0400
Subject: [PATCH 04/51] feat(directory): CODEGRAPH_DIR env var to override the
 index dir name (#636) (#741)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two environments that share one working tree — most concretely Windows
and WSL — can't safely share a single `.codegraph/`: the daemon lockfile
records a platform-specific pid + socket (named pipe vs Unix socket), and
SQLite locking across the WSL2/Windows filesystem boundary is unreliable,
so two daemons over one index risks corruption.

Add a `CODEGRAPH_DIR` env var (default `.codegraph`) that overrides the
per-project data directory name, so each environment keeps its own index
in the same tree (e.g. `CODEGRAPH_DIR=.codegraph-win` on Windows). The
name is resolved live and validated (rejects separators / `..` / absolute,
falling back to the default with a one-time stderr warning). Indexing and
file-watching now skip ANY `.codegraph-*` sibling so neither side trips
over the other's data.

Routes the previously-hardcoded `.codegraph` literals (db path, lockfile,
error log, watcher ignore, file-scan skip, installer) through the
resolver. No extraction-version bump — index content is unchanged.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 |  1 +
 README.md                    |  2 +
 __tests__/foundation.test.ts | 92 +++++++++++++++++++++++++++++++++++-
 src/bin/codegraph.ts         |  4 +-
 src/db/index.ts              |  3 +-
 src/directory.ts             | 72 ++++++++++++++++++++++++++--
 src/extraction/index.ts      |  6 ++-
 src/index.ts                 |  3 +-
 src/installer/index.ts       |  5 +-
 src/sync/watcher.ts          |  7 ++-
 10 files changed, 182 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 54bfc8951..faf31754b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - A Razor/Blazor type reference now resolves through the component's `@using` namespaces — including the folder's cascading `_Imports.razor` — so a simple name that exists in several namespaces lands on the right one. A `@model` / `<MyComponent>` / `@code` reference to `CatalogBrand` resolves to the `@using`'d DTO (`BlazorShared.Models.CatalogBrand`) rather than a same-named domain entity. (ASP.NET, Blazor)
 - `codegraph status --json` now also reports the running CLI `version`, the index directory (`indexPath`), and a `lastIndexed` timestamp (ISO-8601, or null when nothing's indexed yet), so CI and scripts can pin the CLI version and check index freshness from a single command. A matching `CodeGraph.getLastIndexedAt()` library method exposes the same freshness check without shelling out. Thanks @12122J and @eddieran. (#329)
 - TypeScript service/RPC contracts defined as a tuple of generic types — `type MyServiceList = [Service<'query_apply_record', …>, Service<'apply_confirm', …>]` — now index each entry's string-literal name as a searchable symbol. Previously these names existed only as type arguments, so `codegraph query query_apply_record` found nothing even though the names are the app's primary API surface. The pattern is common in typed RPC / BFF clients and mock servers where the types are the source of truth for a runtime proxy object. Utility types (`Pick`, `Omit`, `Record`) and route paths are deliberately left out to avoid noise. Thanks @jiezhiyong. (#634) (TypeScript)
+- New `CODEGRAPH_DIR` environment variable sets the per-project index directory name (default `.codegraph`). This lets one working tree hold two independent indexes — most usefully when you open the same checkout from both **Windows** and **WSL**, which can't safely share a single `.codegraph/`: the background-server lock and the SQLite database are tied to the OS that wrote them, and SQLite locking across the WSL2/Windows filesystem boundary is unreliable. Set `CODEGRAPH_DIR=.codegraph-win` on the Windows side, leave WSL on the default, and each keeps its own index in the same folder without clobbering the other. CodeGraph also skips any sibling `.codegraph-*` directory when indexing and watching, so neither environment trips over the other's data. Thanks @rrtt2323. (#636)
 
 ### Fixes
 
diff --git a/README.md b/README.md
index 7c7b84a9e..ab147b548 100644
--- a/README.md
+++ b/README.md
@@ -684,6 +684,8 @@ Framework routing is validated the same way, on a canonical app per framework: E
 
 **Missing symbols** — The MCP server auto-syncs on save (wait a couple seconds). Run `codegraph sync` manually if needed. Check that the file's language is supported and isn't inside a `.gitignore`d or default-excluded directory (e.g. `node_modules`, `dist`).
 
+**Sharing one checkout between Windows and WSL** — Don't point both at the same `.codegraph/`: the background-server lock and the SQLite index are tied to the OS that wrote them, and SQLite locking across the WSL2/Windows filesystem boundary is unreliable. Give each side its own index in the same tree by setting `CODEGRAPH_DIR` to a distinct name on one of them — e.g. `CODEGRAPH_DIR=.codegraph-win` on Windows, leaving WSL on the default `.codegraph`. CodeGraph skips any sibling `.codegraph-*` directory when indexing and watching, so the two never trip over each other.
+
 ## Star History
 
 <a href="https://www.star-history.com/?repos=colbymchenry%2Fcodegraph&type=date&legend=top-left">
diff --git a/__tests__/foundation.test.ts b/__tests__/foundation.test.ts
index 9933cf8c5..12b11c21f 100644
--- a/__tests__/foundation.test.ts
+++ b/__tests__/foundation.test.ts
@@ -10,7 +10,7 @@ import * as path from 'path';
 import * as os from 'os';
 import { CodeGraph } from '../src';
 import { Node, Edge } from '../src/types';
-import { isInitialized, getCodeGraphDir, validateDirectory } from '../src/directory';
+import { isInitialized, getCodeGraphDir, validateDirectory, codeGraphDirName, isCodeGraphDataDir } from '../src/directory';
 import { DatabaseConnection, getDatabasePath } from '../src/db';
 
 // Create a temporary directory for each test
@@ -306,3 +306,93 @@ describe('Query Builder', () => {
     expect(files).toEqual([]);
   });
 });
+
+// Two environments that share one working tree (Windows-native + WSL) must not
+// share one `.codegraph/`. CODEGRAPH_DIR overrides the data directory name so
+// each side keeps its own index in the same tree (issue #636).
+describe('CODEGRAPH_DIR override (#636)', () => {
+  const saved = process.env.CODEGRAPH_DIR;
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-dirname-'));
+  });
+  afterEach(() => {
+    if (saved === undefined) delete process.env.CODEGRAPH_DIR;
+    else process.env.CODEGRAPH_DIR = saved;
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  describe('codeGraphDirName()', () => {
+    it('defaults to .codegraph when unset', () => {
+      delete process.env.CODEGRAPH_DIR;
+      expect(codeGraphDirName()).toBe('.codegraph');
+    });
+
+    it('honors a valid override', () => {
+      process.env.CODEGRAPH_DIR = '.codegraph-win';
+      expect(codeGraphDirName()).toBe('.codegraph-win');
+    });
+
+    // Anything that isn't a plain segment could escape the project root or
+    // clobber it, so it's ignored in favor of the default.
+    it.each(['foo/bar', 'a\\b', '..', '../x', '.', '/abs/path', '   ', ''])(
+      'falls back to .codegraph for invalid value %j',
+      (bad) => {
+        process.env.CODEGRAPH_DIR = bad;
+        expect(codeGraphDirName()).toBe('.codegraph');
+      }
+    );
+  });
+
+  describe('isCodeGraphDataDir()', () => {
+    it('matches the default, the active override, and .codegraph-* siblings', () => {
+      process.env.CODEGRAPH_DIR = '.codegraph-win';
+      expect(isCodeGraphDataDir('.codegraph')).toBe(true);       // the other env's dir
+      expect(isCodeGraphDataDir('.codegraph-win')).toBe(true);   // active override
+      expect(isCodeGraphDataDir('.codegraph-wsl')).toBe(true);   // any sibling
+    });
+
+    it('does not match unrelated directories', () => {
+      delete process.env.CODEGRAPH_DIR;
+      for (const name of ['src', 'node_modules', '.git', 'codegraph', '.codegraphextra']) {
+        expect(isCodeGraphDataDir(name)).toBe(false);
+      }
+    });
+  });
+
+  it('init writes the index under the overridden directory, not .codegraph', () => {
+    process.env.CODEGRAPH_DIR = '.codegraph-win';
+    const cg = CodeGraph.initSync(tempDir);
+    try {
+      expect(fs.existsSync(path.join(tempDir, '.codegraph-win', 'codegraph.db'))).toBe(true);
+      expect(fs.existsSync(path.join(tempDir, '.codegraph'))).toBe(false);
+      expect(getCodeGraphDir(tempDir)).toBe(path.join(tempDir, '.codegraph-win'));
+      expect(CodeGraph.isInitialized(tempDir)).toBe(true);
+    } finally {
+      cg.close();
+    }
+  });
+
+  it('two index dirs coexist in one tree and the override side skips the sibling', async () => {
+    // WSL side: default `.codegraph`, with a source file.
+    delete process.env.CODEGRAPH_DIR;
+    fs.writeFileSync(path.join(tempDir, 'app.ts'), 'export function onlyReal() {}\n');
+    const wsl = await CodeGraph.init(tempDir, { index: true });
+    wsl.close();
+
+    // Windows side: override dir, same tree. Plant a decoy source file INSIDE
+    // the WSL data dir — the override-side index must not pick it up.
+    process.env.CODEGRAPH_DIR = '.codegraph-win';
+    fs.writeFileSync(path.join(tempDir, '.codegraph', 'decoy.ts'), 'export function decoyLeak() {}\n');
+    const win = await CodeGraph.init(tempDir, { index: true });
+    try {
+      expect(fs.existsSync(path.join(tempDir, '.codegraph', 'codegraph.db'))).toBe(true);
+      expect(fs.existsSync(path.join(tempDir, '.codegraph-win', 'codegraph.db'))).toBe(true);
+      expect(win.searchNodes('onlyReal').length).toBeGreaterThan(0);
+      expect(win.searchNodes('decoyLeak')).toEqual([]); // sibling data dir not indexed
+    } finally {
+      win.close();
+    }
+  });
+});
diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts
index bd667738b..879dbb078 100644
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -356,7 +356,7 @@ function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexR
       clack.log.info(`The index is fully usable ${getGlyphs().dash} only the failed files are missing.`);
     }
   } else if (projectPath) {
-    const logPath = path.join(projectPath, '.codegraph', 'errors.log');
+    const logPath = path.join(getCodeGraphDir(projectPath), 'errors.log');
     if (fs.existsSync(logPath)) {
       fs.unlinkSync(logPath);
     }
@@ -367,7 +367,7 @@ function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexR
  * Write detailed error log to .codegraph/errors.log
  */
 function writeErrorLog(projectPath: string, errors: Array<{ message: string; filePath?: string; severity: string; code?: string }>): void {
-  const cgDir = path.join(projectPath, '.codegraph');
+  const cgDir = getCodeGraphDir(projectPath);
   if (!fs.existsSync(cgDir)) return;
 
   const logPath = path.join(cgDir, 'errors.log');
diff --git a/src/db/index.ts b/src/db/index.ts
index cbc08b8f0..e6d52d47a 100644
--- a/src/db/index.ts
+++ b/src/db/index.ts
@@ -9,6 +9,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { SchemaVersion } from '../types';
 import { runMigrations, getCurrentVersion, CURRENT_SCHEMA_VERSION } from './migrations';
+import { getCodeGraphDir } from '../directory';
 
 export { SqliteDatabase, SqliteBackend } from './sqlite-adapter';
 
@@ -240,5 +241,5 @@ export const DATABASE_FILENAME = 'codegraph.db';
  * Get the default database path for a project
  */
 export function getDatabasePath(projectRoot: string): string {
-  return path.join(projectRoot, '.codegraph', DATABASE_FILENAME);
+  return path.join(getCodeGraphDir(projectRoot), DATABASE_FILENAME);
 }
diff --git a/src/directory.ts b/src/directory.ts
index 3a5c91d93..8f5abb092 100644
--- a/src/directory.ts
+++ b/src/directory.ts
@@ -7,16 +7,82 @@
 import * as fs from 'fs';
 import * as path from 'path';
 
+/** The default per-project data directory name. */
+const DEFAULT_CODEGRAPH_DIR = '.codegraph';
+
+let warnedBadDirName = false;
+
+/**
+ * Resolve the per-project data directory name, honoring the `CODEGRAPH_DIR`
+ * environment override (default `.codegraph`). The override is a single path
+ * segment that lives in the project root.
+ *
+ * Why this exists: two environments that share one working tree must NOT share
+ * one `.codegraph/` — most concretely Windows-native and WSL (issue #636). The
+ * daemon lockfile (`.codegraph/daemon.pid`) records a platform-specific pid and
+ * socket path (a Windows named pipe vs a WSL Unix socket), and SQLite file
+ * locking across the WSL2 ↔ Windows filesystem boundary is unreliable, so two
+ * daemons sharing one index risks corruption. Setting `CODEGRAPH_DIR=.codegraph-win`
+ * on one side gives each environment its own index in the same tree.
+ *
+ * Read live (not captured at load) so it is both process-accurate and testable.
+ * An override that isn't a plain directory name — empty, containing a path
+ * separator, `.`, `..`/traversal, or absolute — is ignored (we keep the
+ * default) rather than risk writing the index outside the project or into the
+ * project root itself; we warn once to stderr so the misconfiguration is seen.
+ */
+export function codeGraphDirName(): string {
+  const raw = process.env.CODEGRAPH_DIR?.trim();
+  if (!raw) return DEFAULT_CODEGRAPH_DIR;
+  const invalid =
+    raw === '.' ||
+    raw.includes('..') ||
+    raw.includes('/') ||
+    raw.includes('\\') ||
+    path.isAbsolute(raw);
+  if (invalid) {
+    if (!warnedBadDirName) {
+      warnedBadDirName = true;
+      // stderr only — stdout is the MCP protocol channel.
+      console.warn(
+        `[codegraph] Ignoring invalid CODEGRAPH_DIR="${raw}" — it must be a plain ` +
+          `directory name (no path separators, no "..", not absolute). Using "${DEFAULT_CODEGRAPH_DIR}".`
+      );
+    }
+    return DEFAULT_CODEGRAPH_DIR;
+  }
+  return raw;
+}
+
 /**
- * CodeGraph directory name
+ * CodeGraph directory name — a load-time snapshot of {@link codeGraphDirName}.
+ * A running process's environment is fixed, so this equals the live value;
+ * it's kept as a stable string export for backward compatibility. Internal code
+ * resolves the name through {@link codeGraphDirName} / {@link getCodeGraphDir}
+ * so the `CODEGRAPH_DIR` override always applies.
  */
-export const CODEGRAPH_DIR = '.codegraph';
+export const CODEGRAPH_DIR = codeGraphDirName();
+
+/**
+ * Is `name` (a single path segment) a CodeGraph data directory? Matches the
+ * default `.codegraph`, the active `CODEGRAPH_DIR` override, and any
+ * `.codegraph-*` sibling. File-watching and the indexer skip ALL of these, so
+ * when two environments share one working tree (Windows + WSL, issue #636)
+ * neither indexes or watches the other's index directory.
+ */
+export function isCodeGraphDataDir(name: string): boolean {
+  return (
+    name === DEFAULT_CODEGRAPH_DIR ||
+    name === codeGraphDirName() ||
+    name.startsWith(DEFAULT_CODEGRAPH_DIR + '-')
+  );
+}
 
 /**
  * Get the .codegraph directory path for a project
  */
 export function getCodeGraphDir(projectRoot: string): string {
-  return path.join(projectRoot, CODEGRAPH_DIR);
+  return path.join(projectRoot, codeGraphDirName());
 }
 
 /**
diff --git a/src/extraction/index.ts b/src/extraction/index.ts
index 36569258e..9f1831f87 100644
--- a/src/extraction/index.ts
+++ b/src/extraction/index.ts
@@ -18,6 +18,7 @@ import {
 import { QueryBuilder } from '../db/queries';
 import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isSourceFile, isLanguageSupported, isFileLevelOnlyLanguage, initGrammars, loadGrammarsForLanguages } from './grammars';
+import { isCodeGraphDataDir } from '../directory';
 import { logDebug, logWarn } from '../errors';
 import { validatePathWithinRoot, normalizePath } from '../utils';
 import ignore, { Ignore } from 'ignore';
@@ -454,8 +455,9 @@ function scanDirectoryWalk(
     }
 
     for (const entry of entries) {
-      // Never descend into git internals or our own data directory.
-      if (entry.name === '.git' || entry.name === '.codegraph') continue;
+      // Never descend into git internals or any CodeGraph data directory
+      // (the active one or a sibling another environment created — #636).
+      if (entry.name === '.git' || isCodeGraphDataDir(entry.name)) continue;
 
       const fullPath = path.join(dir, entry.name);
       const relativePath = normalizePath(path.relative(rootDir, fullPath));
diff --git a/src/index.ts b/src/index.ts
index 55ef12e64..1b2642dbd 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -48,6 +48,7 @@ import { ContextBuilder, createContextBuilder } from './context';
 import { Mutex, FileLock } from './utils';
 import { FileWatcher, WatchOptions, PendingFile, LockUnavailableError } from './sync';
 import { EXTRACTION_VERSION } from './extraction/extraction-version';
+import { getCodeGraphDir } from './directory';
 import { CodeGraphPackageVersion } from './mcp/version';
 
 // Re-export types for consumers
@@ -154,7 +155,7 @@ export class CodeGraph {
     this.queries = queries;
     this.projectRoot = projectRoot;
     this.fileLock = new FileLock(
-      path.join(projectRoot, '.codegraph', 'codegraph.lock')
+      path.join(getCodeGraphDir(projectRoot), 'codegraph.lock')
     );
     this.orchestrator = new ExtractionOrchestrator(projectRoot, queries);
     this.resolver = createResolver(projectRoot, queries);
diff --git a/src/installer/index.ts b/src/installer/index.ts
index edd48ecaf..a9be118be 100644
--- a/src/installer/index.ts
+++ b/src/installer/index.ts
@@ -28,6 +28,7 @@ import { getGlyphs } from '../ui/glyphs';
 // installer must stay importable even when native modules can't load).
 import { watchDisabledReason } from '../sync/watch-policy';
 import { isGitRepo, isSyncHookInstalled, installGitSyncHook } from '../sync/git-hooks';
+import { getCodeGraphDir, codeGraphDirName } from '../directory';
 
 // Backwards-compat: keep these named exports — downstream code may
 // import them. The shim in `config-writer.ts` continues to re-export
@@ -362,8 +363,8 @@ export async function runUninstaller(opts: RunUninstallerOptions): Promise<void>
 
   // Step 4: for local uninstall, the index dir is separate — point at
   // `uninit` so the user knows it's still there (and how to remove it).
-  if (location === 'local' && fs.existsSync(path.join(process.cwd(), '.codegraph'))) {
-    clack.log.info('The .codegraph/ index for this project is still here. Run `codegraph uninit` to delete it.');
+  if (location === 'local' && fs.existsSync(getCodeGraphDir(process.cwd()))) {
+    clack.log.info(`The ${codeGraphDirName()}/ index for this project is still here. Run \`codegraph uninit\` to delete it.`);
   }
 
   // Step 5: summary.
diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts
index 8635a26cc..9bc654708 100644
--- a/src/sync/watcher.ts
+++ b/src/sync/watcher.ts
@@ -37,6 +37,7 @@ import type { Ignore } from 'ignore';
 import { isSourceFile, buildDefaultIgnore } from '../extraction';
 import { logDebug, logWarn } from '../errors';
 import { normalizePath } from '../utils';
+import { isCodeGraphDataDir } from '../directory';
 import { watchDisabledReason } from './watch-policy';
 
 /**
@@ -425,8 +426,12 @@ export class FileWatcher {
 
   /** Our own dirs are always ignored, regardless of .gitignore. */
   private isAlwaysIgnored(rel: string): boolean {
+    // First path segment. Ignore any CodeGraph data dir — the active one AND a
+    // sibling like `.codegraph-win` a second environment (Windows/WSL) created
+    // in the same tree, so neither side watches the other's index (#636).
+    const top = rel.split('/')[0] ?? rel;
     return (
-      rel === '.codegraph' || rel.startsWith('.codegraph/') ||
+      isCodeGraphDataDir(top) ||
       rel === '.git' || rel.startsWith('.git/')
     );
   }

From fd03f31b2cdfe3148e953a0231ca247d03dbb5f7 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 20:18:17 -0400
Subject: [PATCH 05/51] fix(cpp): resolve calls through
 singletons/factories/chained getters (#645) (#742)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A C++ method call whose receiver is another call's result — `Foo::instance().bar()`,
`WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an
`auto` local first — lost the receiver's type during extraction. The callee degraded
to a bare method name, so when two classes shared a method name the call silently
resolved to whichever was indexed first (or not at all), corrupting callers / impact /
trace with a plausible-but-wrong edge.

Three parts:
- Capture C++ return types (new nodes.return_type column, schema v5): the
  function_definition's `type` field, normalized — smart-pointer pointee unwrapped,
  void/primitives dropped.
- Preserve the inner-call receiver in extraction: a C/C++ field_expression whose
  receiver is itself a call is encoded `inner().method` instead of dropping to the
  bare name. Other languages keep the existing behavior.
- New resolution strategy (matchCppCallChain): infer the receiver's class from the
  inner call's return type, then resolve AND validate the method on it. Handles
  singletons/accessors, factories returning a different type, free-function
  factories, make_unique/make_shared/new/direct construction, single-level member
  chains, and namespace-qualified inner calls. A wrong inference yields no edge,
  never a wrong one.

EXTRACTION_VERSION 2->3 (re-index to populate return types).

Validated on the issue repro + spdlog: node count stable (no explosion),
deterministic, and ~100 pre-existing wrong `.size()`-style edges removed.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   1 +
 __tests__/extraction.test.ts         |  35 ++++++
 __tests__/foundation.test.ts         |   2 +-
 __tests__/pr19-improvements.test.ts  |   2 +-
 __tests__/resolution.test.ts         | 108 +++++++++++++++++
 src/db/migrations.ts                 |  12 +-
 src/db/queries.ts                    |   9 +-
 src/db/schema.sql                    |   1 +
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/languages/c-cpp.ts    |  52 +++++++++
 src/extraction/tree-sitter-types.ts  |   9 ++
 src/extraction/tree-sitter.ts        |  21 ++++
 src/resolution/name-matcher.ts       | 166 ++++++++++++++++++++++++++-
 src/types.ts                         |   9 ++
 14 files changed, 421 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index faf31754b..08e6d4f80 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- C++ method calls made through a singleton, factory, or chained getter now resolve to the correct class. A call like `Foo::instance().bar()`, `WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an `auto` local first, used to lose the receiver's type — so when two classes had a same-named method the call silently attached to whichever was indexed first (or didn't resolve at all), corrupting callers, impact, and trace. CodeGraph now infers the receiver's type from what the inner call returns (capturing C++ return types for the first time) and creates the edge only when that class genuinely has the method, so a wrong guess produces no edge instead of a misleading one. Covers singletons and self-returning accessors, factories that return a different type, free-function factories, `make_unique` / `make_shared` / `new` / direct construction, and single-level member chains. Existing C/C++ indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @stabey. (#645) (C/C++)
 - The shared background server no longer logs a scary-looking `[error] … undefined` line on every session start. Attaching to the shared daemon is normal, healthy behavior, but the informational message was being surfaced by MCP hosts (Claude Code and others) as an error; it's now silent by default — set `CODEGRAPH_MCP_LOG_ATTACH=1` to surface it when debugging daemon attach. Thanks @mturac. (#618)
 - On Windows, CodeGraph's background processes no longer pile up without bound and saturate CPU over a long session. When the editor or agent that launched CodeGraph exited, its helper process couldn't tell its parent had gone — Windows reports process lineage differently than macOS and Linux — so the helper kept running, the shared background server never saw the client disconnect, and its idle timer never fired to shut it down. CodeGraph now detects parent-process exit directly on Windows, so helpers and the idle background server wind down promptly, the same as they already did on macOS and Linux. (#692, #576, #680)
 - The shared background server has two further safeguards against ever lingering: it now drops a client the moment it detects that client's process is gone (even if the disconnect arrived uncleanly — a force-quit or a dropped connection that never closed the socket), and it won't stay running indefinitely with clients attached but no activity. Together these guarantee it always winds down, on every platform. (#692)
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index 89918c58b..7bbaaea48 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -2369,6 +2369,41 @@ end
     });
   });
 
+  describe('C/C++ return type capture (#645)', () => {
+    it('captures the normalized return type of a C++ method/function', () => {
+      const code = `
+struct Widget { void draw(); };
+class Factory { public: static Widget create(); };
+Widget Factory::create() { return Widget(); }
+void doNothing() {}
+`;
+      const result = extractFromSource('f.cpp', code);
+
+      const create = result.nodes.find(
+        (n) => n.name === 'create' && (n.kind === 'method' || n.kind === 'function')
+      );
+      expect(create?.returnType).toBe('Widget');
+
+      // A `void` return records no type, so resolution never tries to resolve a
+      // method on it.
+      const doNothing = result.nodes.find((n) => n.name === 'doNothing');
+      expect(doNothing).toBeDefined();
+      expect(doNothing?.returnType).toBeUndefined();
+    });
+
+    it('unwraps a smart-pointer return type to its pointee', () => {
+      const code = `
+#include <memory>
+struct Widget {};
+std::unique_ptr<Widget> makeWidget() { return nullptr; }
+`;
+      const result = extractFromSource('f.cpp', code);
+
+      const make = result.nodes.find((n) => n.name === 'makeWidget');
+      expect(make?.returnType).toBe('Widget');
+    });
+  });
+
   describe('C/C++ imports', () => {
     it('should extract system include', () => {
       const code = `#include <iostream>`;
diff --git a/__tests__/foundation.test.ts b/__tests__/foundation.test.ts
index 12b11c21f..405865b2f 100644
--- a/__tests__/foundation.test.ts
+++ b/__tests__/foundation.test.ts
@@ -242,7 +242,7 @@ describe('Database Connection', () => {
 
     const version = db.getSchemaVersion();
     expect(version).not.toBeNull();
-    expect(version?.version).toBe(4);
+    expect(version?.version).toBe(5);
 
     db.close();
   });
diff --git a/__tests__/pr19-improvements.test.ts b/__tests__/pr19-improvements.test.ts
index eb5200919..8e8ca8177 100644
--- a/__tests__/pr19-improvements.test.ts
+++ b/__tests__/pr19-improvements.test.ts
@@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
 describe('Schema v2 Migration', () => {
   it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
     const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
-    expect(CURRENT_SCHEMA_VERSION).toBe(4);
+    expect(CURRENT_SCHEMA_VERSION).toBe(5);
   });
 
   it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 347cb635c..74ad5d7f5 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -1918,4 +1918,112 @@ func main() {
       }
     });
   });
+
+  describe('C++ chained-call receiver resolution (#645)', () => {
+    async function indexCpp(files: Record<string, string>): Promise<void> {
+      for (const [name, content] of Object.entries(files)) {
+        fs.writeFileSync(path.join(tempDir, name), content);
+      }
+      cg = await CodeGraph.init(tempDir, { index: true });
+    }
+
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves singleton chains and auto locals to the right class, never the first-sorted one', async () => {
+      // Two classes share writeLog; Logger sorts first so it wins any name-only
+      // tie. All three call forms target Metrics.
+      await indexCpp({
+        'logger.hpp': `#pragma once
+#include <string>
+class Logger  { public: static Logger&  instance(); void writeLog(const std::string&); };
+class Metrics { public: static Metrics& instance(); void writeLog(const std::string&); };
+`,
+        'impl.cpp': `#include "logger.hpp"
+Logger&  Logger::instance()  { static Logger l;  return l; }
+Metrics& Metrics::instance() { static Metrics m; return m; }
+void Logger::writeLog(const std::string&)  {}
+void Metrics::writeLog(const std::string&) {}
+`,
+        'app.cpp': `#include "logger.hpp"
+void a() { Metrics::instance().writeLog("x"); }              // chained singleton
+void b() { auto& m = Metrics::instance(); m.writeLog("x"); } // stored in auto
+void c() { Metrics& m = Metrics::instance(); m.writeLog("x"); } // explicit type
+`,
+      });
+
+      expect(callerNamesOf('Metrics::writeLog')).toEqual(['a', 'b', 'c']);
+      expect(callerNamesOf('Logger::writeLog')).toEqual([]);
+    });
+
+    it('resolves factories, free-function factories, and member chains via the inner call return type', async () => {
+      await indexCpp({
+        'types.hpp': `#pragma once
+#include <memory>
+struct Widget { void draw(); };
+struct Session { void run(); };
+struct View { void render(); };
+class WidgetFactory { public: static Widget create(); };
+class Manager { public: View view(); };
+Session* openSession();
+// Decoy that sorts first and has all three methods — must never win.
+struct Aaa { void draw(); void run(); void render(); };
+`,
+        'impl.cpp': `#include "types.hpp"
+void Widget::draw() {}
+void Session::run() {}
+void View::render() {}
+void Aaa::draw() {}
+void Aaa::run() {}
+void Aaa::render() {}
+Widget WidgetFactory::create() { return Widget(); }
+View Manager::view() { return View(); }
+Session* openSession() { return nullptr; }
+`,
+        'app.cpp': `#include "types.hpp"
+void factory()     { WidgetFactory::create().draw(); }   // -> Widget::draw
+void freefunc()    { openSession()->run(); }             // -> Session::run
+void member()      { Manager mgr; mgr.view().render(); }  // -> View::render
+void makeUnique()  { auto w = std::make_unique<Widget>(); w->draw(); } // -> Widget::draw
+`,
+      });
+
+      expect(callerNamesOf('Widget::draw')).toEqual(['factory', 'makeUnique']);
+      expect(callerNamesOf('Session::run')).toEqual(['freefunc']);
+      expect(callerNamesOf('View::render')).toEqual(['member']);
+      // The first-sorted decoy never captures any of them.
+      expect(callerNamesOf('Aaa::draw')).toEqual([]);
+      expect(callerNamesOf('Aaa::run')).toEqual([]);
+      expect(callerNamesOf('Aaa::render')).toEqual([]);
+    });
+
+    it('creates NO edge when the inferred type lacks the method (silent miss, not a wrong edge)', async () => {
+      await indexCpp({
+        'types.hpp': `#pragma once
+struct Widget { void draw(); };
+struct Other  { void onlyOther(); };
+class WidgetFactory { public: static Widget create(); };
+`,
+        'impl.cpp': `#include "types.hpp"
+void Widget::draw() {}
+void Other::onlyOther() {}
+Widget WidgetFactory::create() { return Widget(); }
+`,
+        'app.cpp': `#include "types.hpp"
+// Widget has no onlyOther() — must produce NO edge, never a wrong one to Other.
+void wrong() { WidgetFactory::create().onlyOther(); }
+`,
+      });
+
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
 });
diff --git a/src/db/migrations.ts b/src/db/migrations.ts
index 1a8d1c542..bfea9024d 100644
--- a/src/db/migrations.ts
+++ b/src/db/migrations.ts
@@ -9,7 +9,7 @@ import { SqliteDatabase } from './sqlite-adapter';
 /**
  * Current schema version
  */
-export const CURRENT_SCHEMA_VERSION = 4;
+export const CURRENT_SCHEMA_VERSION = 5;
 
 /**
  * Migration definition
@@ -65,6 +65,16 @@ const migrations: Migration[] = [
       `);
     },
   },
+  {
+    version: 5,
+    description:
+      'Add nodes.return_type — normalized return/result type for receiver-type inference (C++ singletons/factories, #645)',
+    up: (db) => {
+      db.exec(`
+        ALTER TABLE nodes ADD COLUMN return_type TEXT;
+      `);
+    },
+  },
 ];
 
 /**
diff --git a/src/db/queries.ts b/src/db/queries.ts
index 3f35c5b04..3e4e6e14a 100644
--- a/src/db/queries.ts
+++ b/src/db/queries.ts
@@ -72,6 +72,7 @@ interface NodeRow {
   is_abstract: number;
   decorators: string | null;
   type_parameters: string | null;
+  return_type: string | null;
   updated_at: number;
 }
 
@@ -133,6 +134,7 @@ function rowToNode(row: NodeRow): Node {
     isAbstract: row.is_abstract === 1,
     decorators: row.decorators ? safeJsonParse(row.decorators, undefined) : undefined,
     typeParameters: row.type_parameters ? safeJsonParse(row.type_parameters, undefined) : undefined,
+    returnType: row.return_type ?? undefined,
     updatedAt: row.updated_at,
   };
 }
@@ -232,13 +234,13 @@ export class QueryBuilder {
           start_line, end_line, start_column, end_column,
           docstring, signature, visibility,
           is_exported, is_async, is_static, is_abstract,
-          decorators, type_parameters, updated_at
+          decorators, type_parameters, return_type, updated_at
         ) VALUES (
           @id, @kind, @name, @qualifiedName, @filePath, @language,
           @startLine, @endLine, @startColumn, @endColumn,
           @docstring, @signature, @visibility,
           @isExported, @isAsync, @isStatic, @isAbstract,
-          @decorators, @typeParameters, @updatedAt
+          @decorators, @typeParameters, @returnType, @updatedAt
         )
       `);
     }
@@ -281,6 +283,7 @@ export class QueryBuilder {
       isAbstract: node.isAbstract ? 1 : 0,
       decorators: node.decorators ? JSON.stringify(node.decorators) : null,
       typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
+      returnType: node.returnType ?? null,
       updatedAt: node.updatedAt ?? Date.now(),
     });
   }
@@ -321,6 +324,7 @@ export class QueryBuilder {
           is_abstract = @isAbstract,
           decorators = @decorators,
           type_parameters = @typeParameters,
+          return_type = @returnType,
           updated_at = @updatedAt
         WHERE id = @id
       `);
@@ -355,6 +359,7 @@ export class QueryBuilder {
       isAbstract: node.isAbstract ? 1 : 0,
       decorators: node.decorators ? JSON.stringify(node.decorators) : null,
       typeParameters: node.typeParameters ? JSON.stringify(node.typeParameters) : null,
+      returnType: node.returnType ?? null,
       updatedAt: node.updatedAt ?? Date.now(),
     });
   }
diff --git a/src/db/schema.sql b/src/db/schema.sql
index b08c34f37..292981c82 100644
--- a/src/db/schema.sql
+++ b/src/db/schema.sql
@@ -37,6 +37,7 @@ CREATE TABLE IF NOT EXISTS nodes (
     is_abstract INTEGER DEFAULT 0,
     decorators TEXT, -- JSON array
     type_parameters TEXT, -- JSON array
+    return_type TEXT, -- normalized return/result type name (e.g. C++ method return, for receiver-type inference)
     updated_at INTEGER NOT NULL
 );
 
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index e216292cc..aa5106f07 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 2;
+export const EXTRACTION_VERSION = 3;
diff --git a/src/extraction/languages/c-cpp.ts b/src/extraction/languages/c-cpp.ts
index 5d13ddc5a..1365cc24c 100644
--- a/src/extraction/languages/c-cpp.ts
+++ b/src/extraction/languages/c-cpp.ts
@@ -45,6 +45,56 @@ function extractCppReceiverType(node: SyntaxNode, source: string): string | unde
   return parts.length > 1 ? parts.slice(0, -1).join('::') : undefined;
 }
 
+/**
+ * Built-in / non-class return types that can never be a method receiver. We
+ * store no `returnType` for these so resolution never tries to resolve a method
+ * on `void` / `int` / etc.
+ */
+const CPP_NON_CLASS_RETURN = new Set([
+  'void', 'bool', 'char', 'short', 'int', 'long', 'float', 'double', 'unsigned',
+  'signed', 'size_t', 'ssize_t', 'auto', 'wchar_t', 'char8_t', 'char16_t',
+  'char32_t', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t',
+  'uint32_t', 'uint64_t', 'intptr_t', 'uintptr_t', 'nullptr_t',
+]);
+
+/**
+ * Normalize a C++ return type to the bare class name a method could be called
+ * on. Unwraps smart-pointer / optional wrappers to their element type
+ * (`std::unique_ptr<Widget>` → `Widget`) so a factory's `->method()` resolves on
+ * the pointee. Strips cv-qualifiers, `&`/`*`, namespace qualifiers, and other
+ * template args. Returns undefined for primitives / void / `auto` / empty.
+ */
+export function normalizeCppReturnType(raw: string): string | undefined {
+  let t = raw.trim();
+  if (!t) return undefined;
+  // Unwrap smart pointers / optional to their pointee (the thing you call `->` on).
+  const wrapper = t.match(/\b(?:std\s*::\s*)?(?:unique_ptr|shared_ptr|weak_ptr|optional)\s*<\s*([^,>]+?)\s*>/);
+  if (wrapper && wrapper[1]) t = wrapper[1];
+  t = t
+    .replace(/\b(?:const|volatile|typename|struct|class|enum)\b/g, ' ')
+    .replace(/<[^>]*>/g, ' ')
+    .replace(/[*&]+/g, ' ')
+    .replace(/\s+/g, ' ')
+    .trim();
+  if (!t) return undefined;
+  const last = t.split('::').filter(Boolean).pop();
+  if (!last) return undefined;
+  if (CPP_NON_CLASS_RETURN.has(last)) return undefined;
+  if (!/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last;
+}
+
+/**
+ * A function/method's return type lives in the `function_definition`'s `type`
+ * field (`Metrics& Metrics::instance()` → `Metrics`). Constructors, destructors,
+ * and conversion operators have no `type` field → undefined.
+ */
+function extractCppReturnType(node: SyntaxNode, source: string): string | undefined {
+  const typeNode = getChildByField(node, 'type');
+  if (!typeNode) return undefined;
+  return normalizeCppReturnType(getNodeText(typeNode, source));
+}
+
 export const cExtractor: LanguageExtractor = {
   functionTypes: ['function_definition'],
   classTypes: [],
@@ -60,6 +110,7 @@ export const cExtractor: LanguageExtractor = {
   nameField: 'declarator',
   bodyField: 'body',
   paramsField: 'parameters',
+  getReturnType: extractCppReturnType,
   resolveTypeAliasKind: (node, _source) => {
     // C typedef: `typedef enum { ... } name;` or `typedef struct { ... } name;`
     // The inner enum_specifier/struct_specifier is anonymous, but we want the typedef name
@@ -107,6 +158,7 @@ export const cppExtractor: LanguageExtractor = {
   paramsField: 'parameters',
   resolveName: extractCppQualifiedMethodName,
   getReceiverType: extractCppReceiverType,
+  getReturnType: extractCppReturnType,
   getVisibility: (node) => {
     // Check for access specifier in parent
     const parent = node.parent;
diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts
index 8742961ca..cecd54c02 100644
--- a/src/extraction/tree-sitter-types.ts
+++ b/src/extraction/tree-sitter-types.ts
@@ -205,6 +205,15 @@ export interface LanguageExtractor {
    */
   getReceiverType?: (node: SyntaxNode, source: string) => string | undefined;
 
+  /**
+   * Extract a function/method's normalized return type name (bare class name,
+   * smart-pointer pointee unwrapped), stored on the node as `returnType`. Used
+   * by C/C++ so resolution can infer a chained receiver's type from what the
+   * inner call returns (`Foo::instance().bar()` → resolve `bar` on `Foo`,
+   * issue #645). Return undefined for primitives / void / constructors.
+   */
+  getReturnType?: (node: SyntaxNode, source: string) => string | undefined;
+
   /**
    * Resolve the actual node kind for a type alias declaration.
    * Used by Go where `type_spec` is the named declaration wrapper for structs/interfaces:
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 6febee652..ae28a6657 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -811,6 +811,7 @@ export class TreeSitterExtractor {
     const isExported = this.extractor.isExported?.(node, this.source);
     const isAsync = this.extractor.isAsync?.(node);
     const isStatic = this.extractor.isStatic?.(node);
+    const returnType = this.extractor.getReturnType?.(node, this.source);
 
     const funcNode = this.createNode('function', name, node, {
       docstring,
@@ -819,6 +820,7 @@ export class TreeSitterExtractor {
       isExported,
       isAsync,
       isStatic,
+      returnType,
     });
     if (!funcNode) return;
 
@@ -930,12 +932,14 @@ export class TreeSitterExtractor {
     const visibility = this.extractor.getVisibility?.(node);
     const isAsync = this.extractor.isAsync?.(node);
     const isStatic = this.extractor.isStatic?.(node);
+    const returnType = this.extractor.getReturnType?.(node, this.source);
     const extraProps: Partial<Node> = {
       docstring,
       signature,
       visibility,
       isAsync,
       isStatic,
+      returnType,
     };
     if (receiverType) {
       extraProps.qualifiedName = `${receiverType}::${name}`;
@@ -2457,6 +2461,23 @@ export class TreeSitterExtractor {
               } else {
                 calleeName = methodName;
               }
+            } else if (
+              (this.language === 'cpp' || this.language === 'c') &&
+              receiver &&
+              receiver.type === 'call_expression'
+            ) {
+              // C/C++ receiver that is itself a call — `Foo::instance().bar()`,
+              // `openSession()->run()`, `mgr.view().render()`. Keep the inner
+              // call so resolution can infer bar()'s class from what the inner
+              // call RETURNS (#645). Encode as `<innerCallee>().<method>`; the
+              // `().` marker never appears in an ordinary ref, so the C++
+              // resolver can detect and split it. Other languages keep the
+              // bare-name behavior (dropping the receiver) below.
+              const innerFn = getChildByField(receiver, 'function');
+              const innerCallee = innerFn
+                ? getNodeText(innerFn, this.source).replace(/->/g, '.').replace(/\s+/g, '')
+                : '';
+              calleeName = innerCallee ? `${innerCallee}().${methodName}` : methodName;
             } else {
               calleeName = methodName;
             }
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index d6bce5659..f01628c12 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -351,6 +351,7 @@ function inferCppReceiverType(
   receiverName: string,
   ref: UnresolvedRef,
   context: ResolutionContext,
+  depth = 0,
 ): string | null {
   const source = context.readFile(ref.filePath);
   if (!source) return null;
@@ -368,7 +369,15 @@ function inferCppReceiverType(
     const declaratorMatch = line.match(declaratorRegex);
     if (declaratorMatch) {
       const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
-      if (normalized) return normalized;
+      if (normalized === 'auto') {
+        // `auto x = Foo::instance();` — the declared type is deduced; recover it
+        // from the initializer (call return type / construction) (#645).
+        const initType = inferCppAutoInitializerType(line, receiverName, ref, context, depth);
+        if (initType) return initType;
+        // No usable initializer on this line — keep scanning earlier ones.
+      } else if (normalized) {
+        return normalized;
+      }
     }
   }
 
@@ -388,13 +397,158 @@ function inferCppReceiverType(
       const declaratorMatch = line.match(declaratorRegex);
       if (!declaratorMatch) continue;
       const normalized = normalizeCppTypeName(declaratorMatch[1] ?? '');
-      if (normalized) return normalized;
+      if (normalized && normalized !== 'auto') return normalized;
     }
   }
 
   return null;
 }
 
+/**
+ * Last `::`-separated segment of a (possibly namespace-qualified) C++ name.
+ */
+function cppLastSegment(name: string): string {
+  const parts = name.split('::').filter(Boolean);
+  return parts[parts.length - 1] ?? name;
+}
+
+/**
+ * Return type captured at extraction for `Class::method` (or a free function),
+ * read off the indexed node's `returnType` (#645). Null when not indexed or no
+ * return type was recorded (e.g. a `void`/primitive return).
+ */
+function lookupCppReturnType(
+  callee: string,
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+): string | null {
+  let method = callee;
+  let cls: string | null = null;
+  if (callee.includes('::')) {
+    const parts = callee.split('::').filter(Boolean);
+    method = parts[parts.length - 1] ?? callee;
+    cls = parts.slice(0, -1).join('::');
+  }
+  const candidates = context.getNodesByName(method).filter(
+    (n) =>
+      (n.kind === 'method' || n.kind === 'function') &&
+      n.language === ref.language &&
+      !!n.returnType,
+  );
+  if (cls) {
+    const want = `${cls}::${method}`;
+    // The call site may name the class with MORE namespace qualification than
+    // the stored node (`details::registry::instance` at the call vs
+    // `registry::instance` on the node — the receiver type only carries the
+    // immediate class), or LESS. Accept an exact match or either being a
+    // namespace-suffix of the other; the shared `::<class>::<method>` tail keeps
+    // it specific.
+    const m = candidates.find(
+      (n) =>
+        n.qualifiedName === want ||
+        n.qualifiedName.endsWith(`::${want}`) ||
+        want.endsWith(`::${n.qualifiedName}`),
+    );
+    return m?.returnType ?? null;
+  }
+  return candidates.find((n) => n.kind === 'function')?.returnType ?? null;
+}
+
+/** Does the graph contain a class/struct named `name`'s last segment? */
+function cppClassExists(name: string, ref: UnresolvedRef, context: ResolutionContext): boolean {
+  const last = cppLastSegment(name);
+  return context
+    .getNodesByName(last)
+    .some((n) => (n.kind === 'class' || n.kind === 'struct') && n.language === ref.language);
+}
+
+/**
+ * Infer the class produced by a C++ call/construction expression, using return
+ * types captured at extraction (#645). Handles, in order:
+ *   - `make_unique<T>()` / `make_shared<T>()`        → T
+ *   - single-level member call `recv.method()`       → recv's type, then method's return
+ *   - `Class::method()` / free `func()`              → the callee's recorded return type
+ *   - direct construction `Type()` / `ns::Type()`    → Type
+ * Returns null when undeterminable. Callers MUST still validate the outer method
+ * exists on the result before creating an edge, so a wrong guess stays silent.
+ */
+function resolveCppCallResultType(
+  inner: string,
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+  depth = 0,
+): string | null {
+  if (depth > 3) return null; // guard against pathological mutual recursion
+  const expr = inner.trim();
+
+  const make = expr.match(/(?:^|::)(?:make_unique|make_shared)\s*<\s*([A-Za-z_]\w*)/);
+  if (make) return make[1] ?? null;
+
+  // Single-level member call `recv.method` (the `manager.view().render()` shape).
+  const dotIdx = expr.lastIndexOf('.');
+  if (dotIdx > 0) {
+    const recv = expr.slice(0, dotIdx);
+    const method = expr.slice(dotIdx + 1);
+    if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
+    const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
+    if (!recvType) return null;
+    return lookupCppReturnType(`${recvType}::${method}`, ref, context);
+  }
+
+  const ret = lookupCppReturnType(expr, ref, context);
+  if (ret) return ret;
+
+  // Direct construction — the callee itself names a class/struct.
+  if (cppClassExists(expr, ref, context)) return cppLastSegment(expr);
+
+  return null;
+}
+
+/**
+ * Recover the type of an `auto`-declared local from its initializer on the
+ * declaration line — `auto x = Foo::instance();`, `auto w = make_unique<W>();`,
+ * `auto p = new W();`, `auto w = Widget();` (#645).
+ */
+function inferCppAutoInitializerType(
+  line: string,
+  receiverName: string,
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+  depth: number,
+): string | null {
+  const escaped = receiverName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+  const m = line.match(new RegExp(`\\b${escaped}\\b\\s*=\\s*([^;]+)`));
+  if (!m || !m[1]) return null;
+  const init = m[1].trim();
+
+  const neu = init.match(/^new\s+([A-Za-z_][\w:]*)/);
+  if (neu && neu[1]) return cppLastSegment(neu[1]);
+
+  // A call or construction: `Foo(...)`, `A::b(...)`, `make_unique<T>(...)`.
+  const call = init.match(/^([A-Za-z_][\w:]*(?:\s*<[^>;]*>)?)\s*\(/);
+  if (call && call[1]) return resolveCppCallResultType(call[1].replace(/\s+/g, ''), ref, context, depth + 1);
+
+  return null;
+}
+
+/**
+ * Resolve a C++ chained call whose receiver is itself a call — encoded by the
+ * extractor as `<innerCallee>().<method>` (#645). The receiver's type is what
+ * the inner call returns; the outer method is then resolved and VALIDATED on it
+ * (resolveMethodOnType requires `cls::method` to exist), so a wrong inference
+ * produces no edge rather than a wrong one.
+ */
+export function matchCppCallChain(
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+): ResolvedRef | null {
+  const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
+  if (!m || !m[1] || !m[2]) return null;
+  const cls = resolveCppCallResultType(m[1], ref, context);
+  if (!cls) return null;
+  return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
+}
+
 /**
  * Java/Kotlin: infer a receiver's declared type by walking field declarations
  * in the class enclosing the call site. The field's `signature` is already in
@@ -809,6 +963,14 @@ export function matchReference(
   result = matchByQualifiedName(ref, context);
   if (result) return result;
 
+  // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()`
+  // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the
+  // receiver's type from what the inner call returns, then the method on it.
+  if (ref.language === 'cpp' || ref.language === 'c') {
+    result = matchCppCallChain(ref, context);
+    if (result) return result;
+  }
+
   // 2. Method call pattern
   result = matchMethodCall(ref, context);
   if (result) return result;
diff --git a/src/types.ts b/src/types.ts
index 01aadae02..0ff4b7a5f 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -163,6 +163,15 @@ export interface Node {
   /** Generic type parameters */
   typeParameters?: string[];
 
+  /**
+   * Normalized return/result type name for a function/method (the bare class
+   * name, smart-pointer pointee unwrapped). Captured for C/C++ so resolution
+   * can infer a chained receiver's type from what the inner call returns —
+   * `Foo::instance().bar()` resolves `bar` on `Foo` (issue #645). Undefined for
+   * languages/symbols where it isn't captured.
+   */
+  returnType?: string;
+
   /** When the node was last updated */
   updatedAt: number;
 }

From 6e2a24d96ad1d969d6ef8b297ffc2280ee7e1193 Mon Sep 17 00:00:00 2001
From: Max Hsu <maxmilian@users.noreply.github.com>
Date: Tue, 9 Jun 2026 08:31:15 +0800
Subject: [PATCH 06/51] =?UTF-8?q?fix(extraction):=20map=20PHP=20include/re?=
 =?UTF-8?q?quire=20to=20file=E2=86=92file=20dependency=20edges=20(#660)=20?=
 =?UTF-8?q?(#663)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PHP's importTypes only captured namespace_use_declaration, so
include/require(_once) — the dependency mechanism in procedural and
script-style PHP — never produced edges. callers, impact, and trace
missed the entire file-include graph; only namespace `use` became a
dependency edge.

Capture the four include/require expression types and emit file→file
imports edges, reusing the path-based resolution that C/C++ #include
already goes through. Only static string-literal paths are resolved
(relative to the including file); dynamic forms (include $var,
require __DIR__ . '/x', interpolated strings) are skipped.

Include PATHS are distinguished from namespace `use` symbols by shape: a
path contains '/' or '.', which PHP identifiers and FQNs never do. A
path-shaped include that doesn't resolve to a known project file is left
unresolved and does NOT fall back to the symbol name-matcher, which would
otherwise mis-connect "inc/db.php" to an unrelated db.php elsewhere — a
wrong edge is worse than a missing one.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Colby McHenry <me@colbymchenry.com>
---
 CHANGELOG.md                      |   1 +
 __tests__/extraction.test.ts      |  37 +++++++++
 __tests__/resolution.test.ts      | 134 +++++++++++++++++++++++++++++-
 src/extraction/languages/php.ts   |  41 ++++++++-
 src/resolution/import-resolver.ts |  71 ++++++++++++++++
 src/resolution/index.ts           |  14 +++-
 6 files changed, 295 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 08e6d4f80..f50a2d94f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -85,6 +85,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - CodeGraph's MCP server now answers an agent's `resources/list` and `prompts/list` probes with an empty list instead of an error, clearing the `-32601` messages some clients (opencode, Codex) logged on connect. (#621)
 - Svelte and Vue components used through a barrel file — `export { default as Button } from './Button.svelte'` re-exported from an `index.ts` and imported elsewhere — are no longer falsely reported as having **0 callers**. CodeGraph now follows the default re-export all the way to the component and resolves the imports that `.svelte` / `.vue` files themselves use, so `codegraph_callers` and `codegraph_impact` see every place a component is used. This also covers components imported from another package in a workspace/monorepo (`@scope/ui/widgets`) and bare directory imports (`import { x } from './'`). Previously a live component consumed only through a barrel looked like dead code. Thanks @nakisen. (#629)
 - Components used in a Vue Single-File Component's `<template>` — `<MyButton />`, or the kebab-case `<my-button />` — are now indexed as usages, so `codegraph_callers` and `codegraph_impact` include components that appear only in another component's markup (including through a barrel re-export). Previously only a Vue component's `<script>` block was analyzed, so template-only usages were invisible. (#629)
+- PHP: `include` / `require` / `include_once` / `require_once` of a static path now create a file→file dependency edge, so `codegraph_callers` and `codegraph_impact` follow includes in procedural / script-style PHP codebases — previously only namespace `use` statements became dependency edges. Dynamic includes (`include $var`, `require __DIR__ . '/x'`) are skipped. Thanks @atahan150 (#660).
 
 ## [0.9.9] - 2026-06-02
 
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index 7bbaaea48..68b2f5862 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -2247,6 +2247,43 @@ use Closure;
       expect(names).toContain('Illuminate\\Support\\Str');
       expect(names).toContain('Closure');
     });
+
+    it('should extract include/require (+_once) static paths as imports (#660)', () => {
+      const code = `<?php
+require_once("lib.php");
+include 'other.php';
+require 'r.php';
+include_once("io.php");
+`;
+      const result = extractFromSource('page.php', code);
+      const names = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name);
+      expect(names).toContain('lib.php');
+      expect(names).toContain('other.php');
+      expect(names).toContain('r.php');
+      expect(names).toContain('io.php');
+    });
+
+    it('should skip dynamic include/require with no static path (#660)', () => {
+      const code = `<?php
+require_once(__DIR__ . '/dyn.php');
+include $file;
+include "tpl/{$name}.php";
+`;
+      const result = extractFromSource('page.php', code);
+      const imports = result.nodes.filter((n) => n.kind === 'import');
+      expect(imports).toHaveLength(0);
+    });
+
+    it('should extract include alongside namespace use without interference (#660)', () => {
+      const code = `<?php
+use App\\Service\\Mailer;
+require_once("bootstrap.php");
+`;
+      const result = extractFromSource('page.php', code);
+      const names = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name);
+      expect(names).toContain('App\\Service\\Mailer');
+      expect(names).toContain('bootstrap.php');
+    });
   });
 
   describe('Ruby imports', () => {
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 74ad5d7f5..1b97e05c9 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -12,7 +12,7 @@ import { CodeGraph } from '../src';
 import { Node, UnresolvedReference } from '../src/types';
 import { ReferenceResolver, createResolver, ResolutionContext } from '../src/resolution';
 import { matchReference } from '../src/resolution/name-matcher';
-import { resolveImportPath, extractImportMappings, resolveJvmImport, loadCppIncludeDirs, clearCppIncludeDirCache } from '../src/resolution/import-resolver';
+import { resolveImportPath, extractImportMappings, resolveJvmImport, loadCppIncludeDirs, clearCppIncludeDirCache, isPhpIncludePathRef } from '../src/resolution/import-resolver';
 import type { UnresolvedRef } from '../src/resolution/types';
 import { detectFrameworks, getAllFrameworkResolvers } from '../src/resolution/frameworks';
 import { QueryBuilder } from '../src/db/queries';
@@ -1919,6 +1919,138 @@ func main() {
     });
   });
 
+  describe('PHP Include Resolution', () => {
+    it('isPhpIncludePathRef distinguishes include paths from namespace use (#660)', () => {
+      const mk = (name: string, over: Partial<UnresolvedRef> = {}): UnresolvedRef => ({
+        fromNodeId: 'f', referenceName: name, referenceKind: 'imports',
+        line: 1, column: 0, filePath: 'x.php', language: 'php', ...over,
+      });
+      // include paths: contain a slash or a file extension
+      expect(isPhpIncludePathRef(mk('lib.php'))).toBe(true);
+      expect(isPhpIncludePathRef(mk('inc/db.php'))).toBe(true);
+      expect(isPhpIncludePathRef(mk('../config.php'))).toBe(true);
+      // namespace use symbols: a bare class (Closure) or FQN — never a path,
+      // so they must NOT be treated as includes (would mis-connect to a
+      // same-named Closure.php / Bar.php file).
+      expect(isPhpIncludePathRef(mk('Closure'))).toBe(false);
+      expect(isPhpIncludePathRef(mk('PDO'))).toBe(false);
+      expect(isPhpIncludePathRef(mk('App\\Foo\\Bar'))).toBe(false);
+      // scoped to PHP imports only
+      expect(isPhpIncludePathRef(mk('lib.php', { language: 'c' }))).toBe(false);
+      expect(isPhpIncludePathRef(mk('lib.php', { referenceKind: 'calls' }))).toBe(false);
+    });
+
+    it('resolves require_once to a file→file imports edge (#660)', async () => {
+      const tempProject = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-php-e2e-'));
+      try {
+        fs.mkdirSync(path.join(tempProject, 'src'), { recursive: true });
+        fs.writeFileSync(
+          path.join(tempProject, 'src', 'lib.php'),
+          `<?php\nfunction greet() { return "hi"; }\n`
+        );
+        fs.writeFileSync(
+          path.join(tempProject, 'src', 'page.php'),
+          `<?php\nrequire_once("lib.php");\necho greet();\n`
+        );
+
+        cg = await CodeGraph.init(tempProject, { index: true });
+
+        // reporter's repro: page.php's `require_once("lib.php")` must resolve
+        // to the real src/lib.php file node — a file→file `imports` edge, so
+        // callers(lib.php) now includes page.php.
+        const db = DatabaseConnection.open(path.join(tempProject, '.codegraph', 'codegraph.db'));
+        const rows = db.getDb().prepare(`
+          select dst.kind as dstKind, dst.file_path as dstPath
+          from edges e
+          join nodes src on e.source = src.id
+          join nodes dst on e.target = dst.id
+          where e.kind = 'imports'
+            and src.kind = 'file'
+            and src.file_path = 'src/page.php'
+        `).all() as Array<{ dstKind: string; dstPath: string }>;
+        const resolved = rows.find(
+          (r) => r.dstKind === 'file' && r.dstPath === 'src/lib.php'
+        );
+        expect(resolved, 'page.php → src/lib.php imports edge missing').toBeDefined();
+      } finally {
+        fs.rmSync(tempProject, { recursive: true, force: true });
+      }
+    });
+
+    it('resolves a subdirectory include path to the correct file (#660)', async () => {
+      const tempProject = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-php-subdir-'));
+      try {
+        fs.mkdirSync(path.join(tempProject, 'inc'), { recursive: true });
+        fs.writeFileSync(
+          path.join(tempProject, 'inc', 'db.php'),
+          `<?php\nfunction query() { return 1; }\n`
+        );
+        fs.writeFileSync(
+          path.join(tempProject, 'index.php'),
+          `<?php\nrequire "inc/db.php";\nquery();\n`
+        );
+
+        cg = await CodeGraph.init(tempProject, { index: true });
+
+        const db = DatabaseConnection.open(path.join(tempProject, '.codegraph', 'codegraph.db'));
+        const rows = db.getDb().prepare(`
+          select dst.kind as dstKind, dst.file_path as dstPath
+          from edges e
+          join nodes src on e.source = src.id
+          join nodes dst on e.target = dst.id
+          where e.kind = 'imports'
+            and src.kind = 'file'
+            and src.file_path = 'index.php'
+        `).all() as Array<{ dstKind: string; dstPath: string }>;
+        expect(
+          rows.find((r) => r.dstKind === 'file' && r.dstPath === 'inc/db.php'),
+          'index.php → inc/db.php imports edge missing'
+        ).toBeDefined();
+      } finally {
+        fs.rmSync(tempProject, { recursive: true, force: true });
+      }
+    });
+
+    it('does not mis-connect an unresolvable include to a same-named file elsewhere (#660)', async () => {
+      const tempProject = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-php-misresolve-'));
+      try {
+        // app/page.php's `require "inc/db.php"` resolves relative to app/, where
+        // inc/db.php does NOT exist. A same-named lib/inc/db.php exists elsewhere
+        // but is unrelated — no edge should be created (a wrong edge is worse
+        // than a missing one).
+        fs.mkdirSync(path.join(tempProject, 'app'), { recursive: true });
+        fs.mkdirSync(path.join(tempProject, 'lib', 'inc'), { recursive: true });
+        fs.writeFileSync(
+          path.join(tempProject, 'lib', 'inc', 'db.php'),
+          `<?php\nfunction unrelated() {}\n`
+        );
+        fs.writeFileSync(
+          path.join(tempProject, 'app', 'page.php'),
+          `<?php\nrequire "inc/db.php";\n`
+        );
+
+        cg = await CodeGraph.init(tempProject, { index: true });
+
+        const db = DatabaseConnection.open(path.join(tempProject, '.codegraph', 'codegraph.db'));
+        const rows = db.getDb().prepare(`
+          select dst.kind as dstKind, dst.file_path as dstPath
+          from edges e
+          join nodes src on e.source = src.id
+          join nodes dst on e.target = dst.id
+          where e.kind = 'imports'
+            and src.kind = 'file'
+            and src.file_path = 'app/page.php'
+        `).all() as Array<{ dstKind: string; dstPath: string }>;
+        expect(
+          rows.find((r) => r.dstKind === 'file' && r.dstPath === 'lib/inc/db.php'),
+          'app/page.php must NOT mis-connect to unrelated lib/inc/db.php'
+        ).toBeUndefined();
+      } finally {
+        fs.rmSync(tempProject, { recursive: true, force: true });
+      }
+    });
+  });
+
   describe('C++ chained-call receiver resolution (#645)', () => {
     async function indexCpp(files: Record<string, string>): Promise<void> {
       for (const [name, content] of Object.entries(files)) {
diff --git a/src/extraction/languages/php.ts b/src/extraction/languages/php.ts
index c9a24a2fa..673fbf90f 100644
--- a/src/extraction/languages/php.ts
+++ b/src/extraction/languages/php.ts
@@ -2,6 +2,37 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+// include / require (+ _once) expression node types. These carry the
+// file→file dependency in procedural PHP, where `include`/`require` — not
+// namespace `use` — is how a file pulls in another (issue #660).
+const PHP_INCLUDE_TYPES = new Set([
+  'include_expression',
+  'include_once_expression',
+  'require_expression',
+  'require_once_expression',
+]);
+
+/**
+ * Extract a static string-literal path from a PHP include/require expression.
+ *
+ * Returns null for dynamic forms (`include $var`, `require __DIR__ . '/x'`,
+ * interpolated strings) — they have no resolvable compile-time path, which
+ * matches the issue's "static string literals (the common case)" scope.
+ */
+function phpStaticIncludePath(node: SyntaxNode, source: string): string | null {
+  // The path argument is the expression's first named child; the call-style
+  // form `require("x")` wraps it in a parenthesized_expression.
+  let arg: SyntaxNode | null = node.namedChild(0);
+  if (arg?.type === 'parenthesized_expression') arg = arg.namedChild(0);
+  if (!arg || (arg.type !== 'string' && arg.type !== 'encapsed_string')) return null;
+  // Pure literal only: any non-`string_content` child (interpolated variable,
+  // escape sequence, …) means the value isn't a static path.
+  const parts = arg.namedChildren;
+  if (parts.some((c: SyntaxNode) => c.type !== 'string_content')) return null;
+  const content = parts.find((c: SyntaxNode) => c.type === 'string_content');
+  return content ? getNodeText(content, source) : null;
+}
+
 export const phpExtractor: LanguageExtractor = {
   functionTypes: ['function_definition'],
   classTypes: ['class_declaration', 'trait_declaration'],
@@ -11,7 +42,7 @@ export const phpExtractor: LanguageExtractor = {
   enumTypes: ['enum_declaration'],
   enumMemberTypes: ['enum_case'],
   typeAliasTypes: [],
-  importTypes: ['namespace_use_declaration'],
+  importTypes: ['namespace_use_declaration', ...PHP_INCLUDE_TYPES],
   callTypes: ['function_call_expression', 'member_call_expression', 'scoped_call_expression'],
   variableTypes: ['const_declaration'],
   fieldTypes: ['property_declaration'],
@@ -93,6 +124,14 @@ export const phpExtractor: LanguageExtractor = {
   extractImport: (node, source) => {
     const importText = source.substring(node.startIndex, node.endIndex).trim();
 
+    // include / require (+ _once): emit a file→file dependency. The path is a
+    // static string literal in the common case; dynamic forms resolve to null
+    // and are skipped (no import node, no edge).
+    if (PHP_INCLUDE_TYPES.has(node.type)) {
+      const includePath = phpStaticIncludePath(node, source);
+      return includePath ? { moduleName: includePath, signature: importText } : null;
+    }
+
     // Check for grouped imports: use X\{A, B} - return null for core fallback
     const namespacePrefix = node.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_name');
     const useGroup = node.namedChildren.find((c: SyntaxNode) => c.type === 'namespace_use_group');
diff --git a/src/resolution/import-resolver.ts b/src/resolution/import-resolver.ts
index 77a78834d..64930ddf0 100644
--- a/src/resolution/import-resolver.ts
+++ b/src/resolution/import-resolver.ts
@@ -529,6 +529,47 @@ function resolveCppIncludePath(
   return null;
 }
 
+/**
+ * Is this reference a PHP include/require PATH (vs a namespace `use` symbol)?
+ *
+ * include/require emit a file path ("lib.php", "inc/db.php", "../x.php"),
+ * whereas namespace use is an FQN (App\Foo\Bar) or a bare class symbol
+ * (Closure). PHP identifiers contain neither '/' nor '.', so a slash or dot
+ * marks a path-shaped include. Such references resolve to files only — never
+ * to a same-named symbol — so callers must not fall back to the name-matcher.
+ */
+export function isPhpIncludePathRef(ref: UnresolvedRef): boolean {
+  return (
+    ref.language === 'php' &&
+    ref.referenceKind === 'imports' &&
+    (ref.referenceName.includes('/') || ref.referenceName.includes('.'))
+  );
+}
+
+/**
+ * Resolve a PHP include/require path to a project-relative file path.
+ *
+ * PHP resolves includes relative to the including file's directory (the
+ * common case for procedural codebases); php.ini `include_path` is not
+ * modeled. Callers pass an already-extracted static literal path.
+ */
+function resolvePhpIncludePath(
+  includePath: string,
+  fromFile: string,
+  context: ResolutionContext
+): string | null {
+  const projectRoot = context.getProjectRoot();
+  const fromDir = path.dirname(path.join(projectRoot, fromFile));
+  const basePath = path.resolve(fromDir, includePath);
+  const relativePath = path.relative(projectRoot, basePath).replace(/\\/g, '/');
+  if (context.fileExists(relativePath)) return relativePath;
+  // The literal may omit the .php extension (e.g. include "config").
+  for (const ext of EXTENSION_RESOLUTION.php ?? []) {
+    if (context.fileExists(relativePath + ext)) return relativePath + ext;
+  }
+  return null;
+}
+
 /**
  * Extract import mappings from a file
  */
@@ -1122,6 +1163,36 @@ export function resolveViaImport(
     return null;
   }
 
+  // PHP include/require — resolve the static string path to a file→file
+  // edge, mirroring the C/C++ branch above. Distinguish include PATHS from
+  // namespace `use` symbols by shape: an include path contains a slash or a
+  // file extension ("lib.php", "inc/db.php", "../x.php"), whereas a namespace
+  // use is an FQN (App\Foo\Bar) or a bare class symbol (Closure) — PHP
+  // identifiers contain neither '/' nor '.'. Only path-shaped references are
+  // includes; symbol references fall through to the namespace resolution.
+  if (isPhpIncludePathRef(ref)) {
+    const resolvedPath = resolvePhpIncludePath(ref.referenceName, ref.filePath, context);
+    if (resolvedPath) {
+      const basename = resolvedPath.split('/').pop()!;
+      const fileNode = context
+        .getNodesByName(basename)
+        .find((n) => n.kind === 'file' && n.filePath === resolvedPath);
+      if (fileNode) {
+        return {
+          original: ref,
+          targetNodeId: fileNode.id,
+          confidence: 0.9,
+          resolvedBy: 'import',
+        };
+      }
+    }
+    // A path-shaped include that doesn't resolve to a known project file is a
+    // dead end. Return unresolved rather than falling through to the symbol
+    // name-matcher, which would mis-connect e.g. "inc/db.php" to an unrelated
+    // db.php elsewhere in the tree — a wrong edge is worse than a missing one.
+    return null;
+  }
+
   // Use cached import mappings (avoids re-reading and re-parsing per ref)
   const imports = context.getImportMappings(ref.filePath, ref.language);
   if (imports.length === 0 && !context.readFile(ref.filePath)) {
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 9d580d363..ac22ecb91 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -17,7 +17,7 @@ import {
   ImportMapping,
 } from './types';
 import { matchReference, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
-import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs } from './import-resolver';
+import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs, isPhpIncludePathRef } from './import-resolver';
 import { detectFrameworks } from './frameworks';
 import { synthesizeCallbackEdges } from './callback-synthesizer';
 import { loadProjectAliases, type AliasMap } from './path-aliases';
@@ -666,6 +666,18 @@ export class ReferenceResolver {
       candidates.push(importResult);
     }
 
+    // PHP include/require paths resolve to files via import resolution only.
+    // If that didn't find the file, do NOT fall back to the symbol
+    // name-matcher — it would mis-connect e.g. "inc/db.php" to an unrelated
+    // db.php elsewhere in the tree (a wrong edge is worse than none, #660).
+    if (isPhpIncludePathRef(ref)) {
+      return candidates.length > 0
+        ? candidates.reduce((best, curr) =>
+            curr.confidence > best.confidence ? curr : best
+          )
+        : null;
+    }
+
     // Strategy 3: Try name matching
     const nameResult = this.gateLanguage(matchReference(ref, this.context), ref);
     if (nameResult) {

From 35b44e242c6f407a1318781ad6a0d51c9eca6a2e Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 21:36:44 -0400
Subject: [PATCH 07/51] fix(scan): don't abort indexing on a non-UTF-8 or
 unparseable .gitignore (#682) (#743)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A .gitignore transparently encrypted in place by corporate DLP / endpoint
software (UTF-16 header + ciphertext), or one containing a pattern the
`ignore` library can't compile to a regex (`\[` -> "Unterminated character
class"), crashed the entire sync/index. The throw is LAZY — it surfaces at
match time (`ig.ignores()`), not `.add()` — so the existing add-time
try/catch never caught it, and the error never named the offending file.

Read .gitignore defensively: skip a file that isn't valid UTF-8 text whole
(NUL byte or fatal UTF-8 decode), drop only the individual uncompilable
patterns from a text one (probe-compile, then per-line fallback), and warn
with the file path. Indexing continues either way. The watcher inherits the
fix via buildDefaultIgnore.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 |  1 +
 __tests__/extraction.test.ts | 50 ++++++++++++++++++-
 src/extraction/index.ts      | 96 ++++++++++++++++++++++++++++++------
 3 files changed, 131 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f50a2d94f..f349853fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Indexing no longer aborts when a `.gitignore` contains non-UTF-8 bytes or an unparseable pattern. A `.gitignore` transparently encrypted in place by corporate DLP / endpoint-security software (a common enterprise scenario) — or one with a stray pattern the matcher can't compile (`\[`, producing "Unterminated character class") — used to crash the entire `sync` / `index` with a screen of garbled bytes and never name the offending file, leaving `Files: 0 / Nodes: 0`. CodeGraph now skips a `.gitignore` that isn't valid UTF-8 text whole, drops only the individual unparseable patterns from a text one, and logs a warning naming the file — indexing continues either way. Thanks @zhanghang-9527. (#682)
 - C++ method calls made through a singleton, factory, or chained getter now resolve to the correct class. A call like `Foo::instance().bar()`, `WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an `auto` local first, used to lose the receiver's type — so when two classes had a same-named method the call silently attached to whichever was indexed first (or didn't resolve at all), corrupting callers, impact, and trace. CodeGraph now infers the receiver's type from what the inner call returns (capturing C++ return types for the first time) and creates the edge only when that class genuinely has the method, so a wrong guess produces no edge instead of a misleading one. Covers singletons and self-returning accessors, factories that return a different type, free-function factories, `make_unique` / `make_shared` / `new` / direct construction, and single-level member chains. Existing C/C++ indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @stabey. (#645) (C/C++)
 - The shared background server no longer logs a scary-looking `[error] … undefined` line on every session start. Attaching to the shared daemon is normal, healthy behavior, but the informational message was being surfaced by MCP hosts (Claude Code and others) as an error; it's now silent by default — set `CODEGRAPH_MCP_LOG_ATTACH=1` to surface it when debugging daemon attach. Thanks @mturac. (#618)
 - On Windows, CodeGraph's background processes no longer pile up without bound and saturate CPU over a long session. When the editor or agent that launched CodeGraph exited, its helper process couldn't tell its parent had gone — Windows reports process lineage differently than macOS and Linux — so the helper kept running, the shared background server never saw the client disconnect, and its idle timer never fired to shut it down. CodeGraph now detects parent-process exit directly on Windows, so helpers and the idle background server wind down promptly, the same as they already did on macOS and Linux. (#692, #576, #680)
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index 68b2f5862..edac95171 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -9,7 +9,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { CodeGraph } from '../src';
-import { extractFromSource, scanDirectory } from '../src/extraction';
+import { extractFromSource, scanDirectory, buildDefaultIgnore } from '../src/extraction';
 import { detectLanguage, isLanguageSupported, getSupportedLanguages, initGrammars, loadAllGrammars, isSourceFile } from '../src/extraction/grammars';
 import { normalizePath } from '../src/utils';
 
@@ -5245,6 +5245,54 @@ describe('Nested non-submodule git repos', () => {
     expect(files).toContain('sub_repo/src/real.ts');
     expect(files).not.toContain('sub_repo/src/generated.ts');
   });
+
+  // A .gitignore the `ignore` library can't compile to a regex must not abort
+  // the whole scan — the bad pattern is dropped, valid ones still apply (#682).
+  it('does not crash on a .gitignore with an uncompilable pattern (#682)', () => {
+    fs.mkdirSync(path.join(tempDir, 'src'), { recursive: true });
+    fs.mkdirSync(path.join(tempDir, 'build'), { recursive: true });
+    fs.writeFileSync(path.join(tempDir, 'src', 'real.ts'), 'export const x = 1;');
+    fs.writeFileSync(path.join(tempDir, 'build', 'out.ts'), 'export const y = 2;');
+    // `\\[` makes the matcher build an unterminated character class — the throw
+    // is lazy (at match time), which is what escaped and killed sync.
+    fs.writeFileSync(path.join(tempDir, '.gitignore'), 'build/\n\\\\[\n');
+
+    let files: string[] = [];
+    expect(() => {
+      files = scanDirectory(tempDir);
+    }).not.toThrow();
+    expect(files).toContain('src/real.ts');
+    // The still-valid `build/` rule is honored; only the bad line was dropped.
+    expect(files.some((f) => f.startsWith('build/'))).toBe(false);
+  });
+
+  // A .gitignore that isn't valid UTF-8 — e.g. encrypted in place by corporate
+  // DLP / endpoint software (UTF-16 header + ciphertext) — is skipped whole,
+  // not fed to the matcher as garbage patterns (#682).
+  it('does not crash on a non-UTF-8 (DLP-encrypted) .gitignore (#682)', () => {
+    fs.mkdirSync(path.join(tempDir, 'src'), { recursive: true });
+    fs.writeFileSync(path.join(tempDir, 'src', 'real.ts'), 'export const x = 1;');
+    const header = Buffer.concat([
+      Buffer.from([0x00, 0x00]),
+      Buffer.from('[notice][user]', 'utf16le'),
+    ]);
+    const junk = Buffer.from([0x5b, 0x99, 0xc3, 0x28, 0x5c, 0x5b, 0xff, 0xfd]);
+    fs.writeFileSync(path.join(tempDir, '.gitignore'), Buffer.concat([header, junk]));
+
+    let files: string[] = [];
+    expect(() => {
+      files = scanDirectory(tempDir);
+    }).not.toThrow();
+    expect(files).toContain('src/real.ts');
+  });
+
+  it('buildDefaultIgnore survives a bad .gitignore and still applies valid rules (#682)', () => {
+    fs.writeFileSync(path.join(tempDir, '.gitignore'), 'dist/\n\\\\[\n');
+    const ig = buildDefaultIgnore(tempDir);
+    expect(() => ig.ignores('src/app.ts')).not.toThrow();
+    expect(ig.ignores('dist/')).toBe(true); // valid rule survives
+    expect(ig.ignores('src/app.ts')).toBe(false);
+  });
 });
 
 // =============================================================================
diff --git a/src/extraction/index.ts b/src/extraction/index.ts
index 9f1831f87..271309b9b 100644
--- a/src/extraction/index.ts
+++ b/src/extraction/index.ts
@@ -160,6 +160,78 @@ const DEFAULT_IGNORE_PATTERNS: string[] = [
   'bazel-*/',        // Bazel output symlink trees
 ];
 
+/** True if `buf` decodes as strict UTF-8 (no invalid byte sequences). */
+function isValidUtf8(buf: Buffer): boolean {
+  try {
+    new TextDecoder('utf-8', { fatal: true }).decode(buf);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Read a `.gitignore` and return patterns safe to hand to the `ignore` matcher —
+ * never throwing, even when the file isn't real gitignore text. Two failure
+ * modes, both seen in the wild (issue #682):
+ *
+ *  - The file isn't valid UTF-8 — e.g. transparently encrypted in place by
+ *    corporate DLP / endpoint-security software, leaving a UTF-16 header plus
+ *    ciphertext. None of it is meaningful patterns, so the whole file is skipped.
+ *  - The file is text but a single line can't be compiled to a regex by the
+ *    `ignore` library — `\\[` and friends throw "Unterminated character class".
+ *    Crucially the throw is LAZY (at match time, not `.add()`), so it would
+ *    otherwise escape mid-scan. That one pattern is dropped; the rest are kept.
+ *
+ * Either way a warning that NAMES the file is logged (the reporter couldn't tell
+ * which `.gitignore` was at fault) and indexing continues instead of aborting.
+ * Returns '' when there's nothing usable.
+ */
+function readGitignorePatterns(giPath: string): string {
+  let buf: Buffer;
+  try {
+    buf = fs.readFileSync(giPath);
+  } catch {
+    return ''; // unreadable (permissions / race) — treat as absent
+  }
+  // A NUL byte never appears in real gitignore text, and a fatal UTF-8 decode
+  // catches the rest. Such a file isn't ignore patterns at all.
+  if (buf.includes(0) || !isValidUtf8(buf)) {
+    logWarn(
+      'Ignoring a .gitignore that is not valid UTF-8 text — it may have been encrypted ' +
+        'in place by endpoint-security software. Indexing continues without it.',
+      { file: giPath },
+    );
+    return '';
+  }
+  const content = buf.toString('utf-8');
+  // Fast path: one `.ignores()` call forces the library to compile EVERY rule,
+  // so if it doesn't throw, the whole file is safe to use verbatim.
+  try {
+    ignore().add(content).ignores('.codegraph-probe');
+    return content;
+  } catch {
+    // Fall through: a line is uncompilable — keep the good ones, drop the bad.
+  }
+  const kept: string[] = [];
+  let dropped = 0;
+  for (const line of content.split(/\r?\n/)) {
+    try {
+      ignore().add(line).ignores('.codegraph-probe');
+      kept.push(line);
+    } catch {
+      dropped++;
+    }
+  }
+  if (dropped > 0) {
+    logWarn(
+      `Skipped ${dropped} unparseable pattern(s) in a .gitignore; the rest are applied.`,
+      { file: giPath },
+    );
+  }
+  return kept.join('\n');
+}
+
 /**
  * An `ignore` matcher seeded with the built-in defaults, merged with the project's
  * root .gitignore so a negation there (e.g. `!vendor/`) overrides a default. Shared
@@ -169,12 +241,8 @@ const DEFAULT_IGNORE_PATTERNS: string[] = [
  */
 export function buildDefaultIgnore(rootDir: string): Ignore {
   const ig = ignore().add(DEFAULT_IGNORE_PATTERNS);
-  try {
-    const rootGitignore = path.join(rootDir, '.gitignore');
-    if (fs.existsSync(rootGitignore)) ig.add(fs.readFileSync(rootGitignore, 'utf-8'));
-  } catch {
-    // Unreadable root .gitignore — the built-in defaults still apply.
-  }
+  const rootGitignore = path.join(rootDir, '.gitignore');
+  if (fs.existsSync(rootGitignore)) ig.add(readGitignorePatterns(rootGitignore));
   return ig;
 }
 
@@ -404,15 +472,13 @@ function scanDirectoryWalk(
   }
 
   const loadIgnore = (dir: string): ScopedIgnore | null => {
-    try {
-      const giPath = path.join(dir, '.gitignore');
-      if (fs.existsSync(giPath)) {
-        return { dir, ig: ignore().add(fs.readFileSync(giPath, 'utf-8')) };
-      }
-    } catch {
-      // Unreadable .gitignore — treat as absent.
-    }
-    return null;
+    const giPath = path.join(dir, '.gitignore');
+    if (!fs.existsSync(giPath)) return null;
+    // readGitignorePatterns is defensive: a non-UTF-8 (DLP-encrypted) or
+    // uncompilable .gitignore is skipped/filtered with a warning, never thrown
+    // (issue #682) — so the per-file `.ignores()` calls below can't crash.
+    const patterns = readGitignorePatterns(giPath);
+    return patterns ? { dir, ig: ignore().add(patterns) } : null;
   };
 
   const isIgnored = (fullPath: string, isDir: boolean, matchers: ScopedIgnore[]): boolean => {

From 5b3f5e36db2e858815fb4deecd76af1f8f792304 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 22:05:53 -0400
Subject: [PATCH 08/51] fix(go): attribute calls inside top-level closures to
 the var, not the file (#693) (#744)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A function called only from an anonymous func_literal at package level — a
cobra `RunE: func(){…}` handler, a goroutine literal, a callback closure
stored in a `var` — had its call leak to the FILE node, because the Go
var-initializer walk ran with an empty scope. So `callers`/`impact` showed
the function with a file (or no meaningful) caller, unlike JS/TS where an
arrow-in-const becomes a named node whose calls attribute correctly.

Scope the Go top-level var/const initializer walk to the declared symbol, so
a call nested in any func_literal initializer (struct field, slice/map,
nested closure) attributes to the enclosing var. EXTRACTION_VERSION 3->4
(re-index to pick up the corrected attribution).

Validated on cli/cli (858 Go files): node/edge counts identical, file-level
dependents byte-identical (no regression), and 62 top-level-closure calls
correctly moved from file-attributed to var-attributed.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/extraction.test.ts         | 28 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/tree-sitter.ts        | 13 +++++++++++--
 4 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f349853fc..b311664dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Go: a function called only from inside an anonymous closure — a cobra `RunE: func(…) {…}` handler, a goroutine literal, or a callback closure stored in a package-level `var` — now shows its real caller. Previously the call leaked to the file node, so `codegraph_callers` and `codegraph_impact` reported such a function as having no meaningful caller; the call is now attributed to the enclosing declaration, so editing the function surfaces the closures that use it. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @Cyclone1070. (#693) (Go)
 - Indexing no longer aborts when a `.gitignore` contains non-UTF-8 bytes or an unparseable pattern. A `.gitignore` transparently encrypted in place by corporate DLP / endpoint-security software (a common enterprise scenario) — or one with a stray pattern the matcher can't compile (`\[`, producing "Unterminated character class") — used to crash the entire `sync` / `index` with a screen of garbled bytes and never name the offending file, leaving `Files: 0 / Nodes: 0`. CodeGraph now skips a `.gitignore` that isn't valid UTF-8 text whole, drops only the individual unparseable patterns from a text one, and logs a warning naming the file — indexing continues either way. Thanks @zhanghang-9527. (#682)
 - C++ method calls made through a singleton, factory, or chained getter now resolve to the correct class. A call like `Foo::instance().bar()`, `WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an `auto` local first, used to lose the receiver's type — so when two classes had a same-named method the call silently attached to whichever was indexed first (or didn't resolve at all), corrupting callers, impact, and trace. CodeGraph now infers the receiver's type from what the inner call returns (capturing C++ return types for the first time) and creates the edge only when that class genuinely has the method, so a wrong guess produces no edge instead of a misleading one. Covers singletons and self-returning accessors, factories that return a different type, free-function factories, `make_unique` / `make_shared` / `new` / direct construction, and single-level member chains. Existing C/C++ indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @stabey. (#645) (C/C++)
 - The shared background server no longer logs a scary-looking `[error] … undefined` line on every session start. Attaching to the shared daemon is normal, healthy behavior, but the informational message was being surfaced by MCP hosts (Claude Code and others) as an error; it's now silent by default — set `CODEGRAPH_MCP_LOG_ATTACH=1` to surface it when debugging daemon attach. Thanks @mturac. (#618)
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index edac95171..e9231963b 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -6438,6 +6438,34 @@ describe('Go cross-package composite literals (blast-radius recall)', () => {
     }
   });
 
+  it('attributes a call inside a top-level closure (cobra RunE) to the var, not the file (#693)', async () => {
+    const dir = createTempDir();
+    try {
+      fs.writeFileSync(path.join(dir, 'go.mod'), 'module example.com/proj\n\ngo 1.21\n');
+      // Wire is called ONLY from the anonymous RunE closure inside a top-level
+      // `var rootCmd = &Cmd{...}` — previously the call leaked to the file node,
+      // so `callers(Wire)` surfaced a file (or read as "no caller"). It must now
+      // attribute to the enclosing var.
+      fs.writeFileSync(path.join(dir, 'factory.go'), `package main\n\nfunc Wire() error { return nil }\n`);
+      fs.writeFileSync(
+        path.join(dir, 'root.go'),
+        `package main\n\ntype Cmd struct{ RunE func() error }\n\nvar rootCmd = &Cmd{\n\tRunE: func() error { return Wire() },\n}\n`
+      );
+      const cg = CodeGraph.initSync(dir, { config: { include: ['**/*.go'], exclude: [] } });
+      await cg.indexAll();
+      cg.resolveReferences();
+
+      const wire = cg.getNodesByName('Wire').find((n) => n.kind === 'function');
+      expect(wire).toBeDefined();
+      const callers = cg.getCallers(wire!.id).map((c) => c.node);
+      expect(callers.some((n) => n.kind === 'variable' && n.name === 'rootCmd')).toBe(true);
+      expect(callers.some((n) => n.kind === 'file')).toBe(false);
+      cg.destroy();
+    } finally {
+      cleanupTempDir(dir);
+    }
+  });
+
   it('links a parenthesized pointer type conversion `(*T)(x)` to the type', async () => {
     const dir = createTempDir();
     try {
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index aa5106f07..9428a44ed 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 3;
+export const EXTRACTION_VERSION = 4;
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index ae28a6657..798229489 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -1499,13 +1499,14 @@ export class TreeSitterExtractor {
 
       for (const spec of specs) {
         const nameNode = spec.namedChild(0);
+        let varNode: Node | null = null;
         if (nameNode && nameNode.type === 'identifier') {
           const name = getNodeText(nameNode, this.source);
           const valueNode = spec.namedChildCount > 1 ? spec.namedChild(spec.namedChildCount - 1) : null;
           const initValue = valueNode ? getNodeText(valueNode, this.source).slice(0, 100) : undefined;
           const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined;
 
-          this.createNode(node.type === 'const_declaration' ? 'constant' : 'variable', name, spec, {
+          varNode = this.createNode(node.type === 'const_declaration' ? 'constant' : 'variable', name, spec, {
             docstring,
             signature: initSignature,
           });
@@ -1515,8 +1516,16 @@ export class TreeSitterExtractor {
         // implementations) or `var c = pkg.New()` are extracted as
         // instantiates/calls dependencies — the body walker only covers
         // initializers inside functions, not these top-level declarations.
+        // Scope the walk to the declared symbol so a call inside an anonymous
+        // func_literal initializer — a cobra `RunE: func(){…}` handler, a
+        // goroutine or callback closure — attributes to the var instead of
+        // leaking to the file node (which reads as "no caller"), issue #693.
         const valueField = getChildByField(spec, 'value');
-        if (valueField) this.visitFunctionBody(valueField, '');
+        if (valueField) {
+          if (varNode) this.nodeStack.push(varNode.id);
+          this.visitFunctionBody(valueField, varNode?.id ?? '');
+          if (varNode) this.nodeStack.pop();
+        }
       }
 
       // Handle short_var_declaration (:=)

From afec1282e1135d3f884efbc2b7e0f4a1c3d64f6c Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 22:23:42 -0400
Subject: [PATCH 09/51] fix(search): score path relevance per query word, not
 per sub-token (#720) (#745)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A multi-word PascalCase query token — typically a project name a user
includes (`SuperBizAgent backend routes`) — splits into sub-tokens
(superbizagent / super / biz / agent) that ALL match the same path segment,
so path relevance summed +5 four times for one concept. In a mixed-stack
repo that ~doubled every score of the lexically-matching stack's file,
burying the stack the query was about.

Score path relevance per original query WORD instead: a word matches a path
level if any of its sub-tokens do, and counts once — while still splitting
the word (via extractSearchTerms on the original case) so it matches across
naming conventions (`getUserName` → `get_user_name`). Distinct words each
still contribute.

Partial fix: this removes the dominant path over-counting (backend rises
from absent-in-top-6 to parity on the reporter's repro). The residual lexical
edge from the project name in the FTS class-name match + dir match is a deeper
down-weighting change, tracked separately. No re-index needed (query-time).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                      |  1 +
 __tests__/context-ranking.test.ts | 27 ++++++++++++++++++++++++++-
 src/search/query-utils.ts         | 29 ++++++++++++++++++++---------
 3 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b311664dc..37a6802ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Search relevance: a multi-word PascalCase query token — typically a project name a user naturally includes (searching `MyApp backend routes`, say) — no longer over-weights a file whose path or class name embeds it. Such a token was scored once per sub-token (`my` / `app` / `myapp`), so a single concept boosted a lexically-matching file's path score several times over — enough, in a mixed-stack repo, to bury the stack the query was actually about. Path relevance now counts each query word once per path level (still splitting it so it matches across naming conventions), so the rest of the query's terms decide the ranking. Thanks @MiNuo1. (#720)
 - Go: a function called only from inside an anonymous closure — a cobra `RunE: func(…) {…}` handler, a goroutine literal, or a callback closure stored in a package-level `var` — now shows its real caller. Previously the call leaked to the file node, so `codegraph_callers` and `codegraph_impact` reported such a function as having no meaningful caller; the call is now attributed to the enclosing declaration, so editing the function surfaces the closures that use it. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @Cyclone1070. (#693) (Go)
 - Indexing no longer aborts when a `.gitignore` contains non-UTF-8 bytes or an unparseable pattern. A `.gitignore` transparently encrypted in place by corporate DLP / endpoint-security software (a common enterprise scenario) — or one with a stray pattern the matcher can't compile (`\[`, producing "Unterminated character class") — used to crash the entire `sync` / `index` with a screen of garbled bytes and never name the offending file, leaving `Files: 0 / Nodes: 0`. CodeGraph now skips a `.gitignore` that isn't valid UTF-8 text whole, drops only the individual unparseable patterns from a text one, and logs a warning naming the file — indexing continues either way. Thanks @zhanghang-9527. (#682)
 - C++ method calls made through a singleton, factory, or chained getter now resolve to the correct class. A call like `Foo::instance().bar()`, `WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an `auto` local first, used to lose the receiver's type — so when two classes had a same-named method the call silently attached to whichever was indexed first (or didn't resolve at all), corrupting callers, impact, and trace. CodeGraph now infers the receiver's type from what the inner call returns (capturing C++ return types for the first time) and creates the edge only when that class genuinely has the method, so a wrong guess produces no edge instead of a misleading one. Covers singletons and self-returning accessors, factories that return a different type, free-function factories, `make_unique` / `make_shared` / `new` / direct construction, and single-level member chains. Existing C/C++ indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @stabey. (#645) (C/C++)
diff --git a/__tests__/context-ranking.test.ts b/__tests__/context-ranking.test.ts
index 25f2b682c..f841374ef 100644
--- a/__tests__/context-ranking.test.ts
+++ b/__tests__/context-ranking.test.ts
@@ -16,7 +16,7 @@ import * as path from 'path';
 import * as os from 'os';
 import CodeGraph from '../src/index';
 import { LOW_CONFIDENCE_MARKER } from '../src/context';
-import { isDistinctiveIdentifier } from '../src/search/query-utils';
+import { isDistinctiveIdentifier, scorePathRelevance } from '../src/search/query-utils';
 
 describe('isDistinctiveIdentifier', () => {
   it('treats plain dictionary words as non-distinctive', () => {
@@ -39,6 +39,31 @@ describe('isDistinctiveIdentifier', () => {
   });
 });
 
+// A single PascalCase query word (notably a project name a user naturally
+// includes) splits into sub-tokens that all match the SAME path segment; summed
+// per sub-token it boosted that path 4×, burying the rest of the query's stack
+// (#720). Path relevance must count each original WORD once per level, while
+// still splitting it for cross-convention matching.
+describe('scorePathRelevance per-word scoring (#720)', () => {
+  it('counts a single PascalCase word once per path level, not once per sub-token', () => {
+    // "SuperBizAgent" → super/biz/agent/superbizagent all hit the dir, but it's
+    // one concept: +5 (dir) once, not +20.
+    expect(scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent')).toBe(5);
+  });
+
+  it('still splits a word so it matches across naming conventions', () => {
+    // getUserName must still match a snake_case path via its sub-tokens.
+    expect(scorePathRelevance('get_user_name.go', 'getUserName')).toBeGreaterThanOrEqual(10);
+  });
+
+  it('still credits distinct query words matching different path segments', () => {
+    // auth (dir) and handler (filename) are separate concepts — each counts.
+    expect(scorePathRelevance('src/auth/login_handler.go', 'auth handler')).toBeGreaterThan(
+      scorePathRelevance('src/auth/login_handler.go', 'auth')
+    );
+  });
+});
+
 describe('Context ranking — common-word precision & confidence', () => {
   let testDir: string;
   let cg: CodeGraph;
diff --git a/src/search/query-utils.ts b/src/search/query-utils.ts
index 0c1f16055..588901ae1 100644
--- a/src/search/query-utils.ts
+++ b/src/search/query-utils.ts
@@ -173,23 +173,34 @@ export function extractSearchTerms(query: string, options?: { stems?: boolean })
  * Higher score = more relevant path
  */
 export function scorePathRelevance(filePath: string, query: string): number {
-  // Use base terms only — stem variants inflate path scores by generating
-  // many near-duplicate terms that all match the same path segments.
-  const terms = extractSearchTerms(query, { stems: false });
-  if (terms.length === 0) return 0;
-
   const pathLower = filePath.toLowerCase();
   const fileName = path.basename(filePath).toLowerCase();
   const dirName = path.dirname(filePath).toLowerCase();
   let score = 0;
 
-  for (const term of terms) {
+  // Score per original query WORD, not per sub-token. A single PascalCase word
+  // splits into many sub-tokens (a project name "SuperBizAgent" →
+  // superbizagent / super / biz / agent) that all match the SAME path segment,
+  // so summing per sub-token boosted that path 4× for one concept — enough to
+  // bury the rest of the query's stack (#720). A word matches a path level if
+  // ANY of its sub-tokens do, and counts ONCE; distinct words still each add.
+  // Split the ORIGINAL-case query into words; extractSearchTerms does the
+  // camelCase/snake split per word (so `getUserName` still matches a
+  // `get_user_name` path) — we just attribute each word's matches once.
+  const words = query.split(/\s+/).filter((w) => w.length > 0);
+  if (words.length === 0) return 0;
+
+  for (const word of words) {
+    // Use base terms only — stem variants inflate path scores by generating
+    // many near-duplicate terms that all match the same path segments.
+    const subtokens = extractSearchTerms(word, { stems: false });
+    if (subtokens.length === 0) continue;
     // Exact filename match (strongest)
-    if (fileName.includes(term)) score += 10;
+    if (subtokens.some((t) => fileName.includes(t))) score += 10;
     // Directory match
-    if (dirName.includes(term)) score += 5;
+    if (subtokens.some((t) => dirName.includes(t))) score += 5;
     // General path match
-    else if (pathLower.includes(term)) score += 3;
+    else if (subtokens.some((t) => pathLower.includes(t))) score += 3;
   }
 
   // Deprioritize test files unless the query is explicitly about tests

From 75ae1e8bd91c60747a9aacf8a16274d70ed636a9 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 22:43:54 -0400
Subject: [PATCH 10/51] =?UTF-8?q?fix(search):=20down-weight=20the=20projec?=
 =?UTF-8?q?t=20name=20in=20ranking=20=E2=80=94=20completes=20#720=20(#748)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-word path fix (#745) brought the backend to parity but not above:
the project name still gave the lexically-matching stack a residual dir
match + an FTS class-name match, so a backend query that included the
project name still ranked the frontend at/above the backend.

Derive the project name from go.mod module / package.json name / repo dir,
and treat a query word matching it as non-discriminative: drop it from path
relevance and from codegraph_explore's PascalCase type-disambiguation bias
(reporter's suggestions #1/#2) — unless it's the only query word, so a bare
project-name search still scores.

Narrow by construction: the down-weighting fires ONLY when a query word
matches the derived project name (≥5 chars), so every query that doesn't
name the project is byte-identical. On the reporter's repro the backend
controllers now top a backend question that includes the project name;
queries without it, bare project-name queries, and normal symbol queries
are unchanged. Query-time only (no re-index).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                      |  2 +-
 __tests__/context-ranking.test.ts | 43 ++++++++++++++++++-
 src/db/queries.ts                 | 19 ++++++++-
 src/index.ts                      | 19 +++++++++
 src/mcp/tools.ts                  | 12 ++++--
 src/search/query-utils.ts         | 71 ++++++++++++++++++++++++++++---
 6 files changed, 155 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37a6802ec..9f73044cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,7 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
-- Search relevance: a multi-word PascalCase query token — typically a project name a user naturally includes (searching `MyApp backend routes`, say) — no longer over-weights a file whose path or class name embeds it. Such a token was scored once per sub-token (`my` / `app` / `myapp`), so a single concept boosted a lexically-matching file's path score several times over — enough, in a mixed-stack repo, to bury the stack the query was actually about. Path relevance now counts each query word once per path level (still splitting it so it matches across naming conventions), so the rest of the query's terms decide the ranking. Thanks @MiNuo1. (#720)
+- Search relevance: including the project name in a query (a user naturally writes `MyApp backend routes`) no longer buries the part of the codebase the query is actually about. The project name lexically matches whatever stack embeds it — a `MyAppFrontend/` directory, a `MyAppApp` class — and it was over-weighted two ways: a single PascalCase word was scored once per sub-token (`my` / `app` / `myapp`), so one concept boosted that path several times over; and the name carried full path / disambiguation weight even though it names the whole repo, not any symbol. Now path relevance counts each query word once, and a word matching the project name (derived from `go.mod`, `package.json`, or the repo directory) is dropped from path scoring and from `codegraph_explore`'s type-disambiguation bias — unless it's the only term, so a bare project-name search still works. In a mixed-stack repo, a backend question now surfaces the backend even with the project name in the query. Thanks @MiNuo1. (#720)
 - Go: a function called only from inside an anonymous closure — a cobra `RunE: func(…) {…}` handler, a goroutine literal, or a callback closure stored in a package-level `var` — now shows its real caller. Previously the call leaked to the file node, so `codegraph_callers` and `codegraph_impact` reported such a function as having no meaningful caller; the call is now attributed to the enclosing declaration, so editing the function surfaces the closures that use it. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @Cyclone1070. (#693) (Go)
 - Indexing no longer aborts when a `.gitignore` contains non-UTF-8 bytes or an unparseable pattern. A `.gitignore` transparently encrypted in place by corporate DLP / endpoint-security software (a common enterprise scenario) — or one with a stray pattern the matcher can't compile (`\[`, producing "Unterminated character class") — used to crash the entire `sync` / `index` with a screen of garbled bytes and never name the offending file, leaving `Files: 0 / Nodes: 0`. CodeGraph now skips a `.gitignore` that isn't valid UTF-8 text whole, drops only the individual unparseable patterns from a text one, and logs a warning naming the file — indexing continues either way. Thanks @zhanghang-9527. (#682)
 - C++ method calls made through a singleton, factory, or chained getter now resolve to the correct class. A call like `Foo::instance().bar()`, `WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an `auto` local first, used to lose the receiver's type — so when two classes had a same-named method the call silently attached to whichever was indexed first (or didn't resolve at all), corrupting callers, impact, and trace. CodeGraph now infers the receiver's type from what the inner call returns (capturing C++ return types for the first time) and creates the edge only when that class genuinely has the method, so a wrong guess produces no edge instead of a misleading one. Covers singletons and self-returning accessors, factories that return a different type, free-function factories, `make_unique` / `make_shared` / `new` / direct construction, and single-level member chains. Existing C/C++ indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @stabey. (#645) (C/C++)
diff --git a/__tests__/context-ranking.test.ts b/__tests__/context-ranking.test.ts
index f841374ef..ec9772086 100644
--- a/__tests__/context-ranking.test.ts
+++ b/__tests__/context-ranking.test.ts
@@ -16,7 +16,7 @@ import * as path from 'path';
 import * as os from 'os';
 import CodeGraph from '../src/index';
 import { LOW_CONFIDENCE_MARKER } from '../src/context';
-import { isDistinctiveIdentifier, scorePathRelevance } from '../src/search/query-utils';
+import { isDistinctiveIdentifier, scorePathRelevance, deriveProjectNameTokens } from '../src/search/query-utils';
 
 describe('isDistinctiveIdentifier', () => {
   it('treats plain dictionary words as non-distinctive', () => {
@@ -64,6 +64,47 @@ describe('scorePathRelevance per-word scoring (#720)', () => {
   });
 });
 
+// The project name is context, not a discriminator: dropping it from path
+// scoring stops every file under a `<ProjectName>…/` tree from winning on the
+// name alone, so the rest of the query decides the ranking (#720).
+describe('project-name down-weighting in path relevance (#720)', () => {
+  it('derives the project name from go.mod / package.json, skipping short names', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-projname-'));
+    try {
+      fs.writeFileSync(path.join(dir, 'go.mod'), 'module example.com/SuperBizAgent\n\ngo 1.21\n');
+      fs.writeFileSync(path.join(dir, 'package.json'), JSON.stringify({ name: '@acme/superbizagent-web' }));
+      const tokens = deriveProjectNameTokens(dir);
+      expect(tokens.has('superbizagent')).toBe(true);
+      expect(tokens.has('superbizagentweb')).toBe(true);
+    } finally {
+      fs.rmSync(dir, { recursive: true, force: true });
+    }
+  });
+
+  it('drops a project-name query word from path scoring when other words remain', () => {
+    const proj = new Set(['superbizagent']);
+    // Without the project name dropped, the frontend path wins on it (+5).
+    // With it dropped, only "backend" is left — and it doesn't match this path.
+    const withDrop = scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent backend', proj);
+    const noDrop = scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent backend');
+    expect(withDrop).toBeLessThan(noDrop);
+    expect(withDrop).toBe(0);
+  });
+
+  it('keeps the project-name word when it is the ONLY query word (bare query still scores)', () => {
+    const proj = new Set(['superbizagent']);
+    expect(scorePathRelevance('SuperBizAgentFrontend/app.js', 'SuperBizAgent', proj)).toBe(5);
+  });
+
+  it('does not affect a query that omits the project name', () => {
+    const proj = new Set(['superbizagent']);
+    const path0 = 'internal/controller/chat/chat.go';
+    expect(scorePathRelevance(path0, 'controller chat', proj)).toBe(
+      scorePathRelevance(path0, 'controller chat')
+    );
+  });
+});
+
 describe('Context ranking — common-word precision & confidence', () => {
   let testDir: string;
   let cg: CodeGraph;
diff --git a/src/db/queries.ts b/src/db/queries.ts
index 3e4e6e14a..adf239268 100644
--- a/src/db/queries.ts
+++ b/src/db/queries.ts
@@ -176,6 +176,12 @@ function rowToFileRecord(row: FileRow): FileRecord {
 export class QueryBuilder {
   private db: SqliteDatabase;
 
+  // Project-name tokens (go.mod / package.json / repo dir), normalized. A query
+  // word matching one is dropped from path-relevance scoring — it names the
+  // whole project, not a symbol, so it carries no discriminative signal (#720).
+  // Set once by the CodeGraph instance; empty by default (no down-weighting).
+  private projectNameTokens: Set<string> = new Set();
+
   // Node cache for frequently accessed nodes (LRU-style, max 1000 entries)
   private nodeCache: Map<string, Node> = new Map();
   private readonly maxCacheSize = 1000;
@@ -219,6 +225,17 @@ export class QueryBuilder {
     this.db = db;
   }
 
+  /** Set the normalized project-name tokens used to down-weight non-discriminative
+   * query words in path scoring (#720). Called once when the project opens. */
+  setProjectNameTokens(tokens: Set<string>): void {
+    this.projectNameTokens = tokens;
+  }
+
+  /** The normalized project-name tokens (#720); empty if none were derived. */
+  getProjectNameTokens(): Set<string> {
+    return this.projectNameTokens;
+  }
+
   // ===========================================================================
   // Node Operations
   // ===========================================================================
@@ -842,7 +859,7 @@ export class QueryBuilder {
         ...r,
         score: r.score
           + kindBonus(r.node.kind)
-          + scorePathRelevance(r.node.filePath, scoringQuery)
+          + scorePathRelevance(r.node.filePath, scoringQuery, this.projectNameTokens)
           + nameMatchBonus(r.node.name, scoringQuery),
       }));
       results.sort((a, b) => b.score - a.score);
diff --git a/src/index.ts b/src/index.ts
index 1b2642dbd..f967f6eb7 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -49,6 +49,7 @@ import { Mutex, FileLock } from './utils';
 import { FileWatcher, WatchOptions, PendingFile, LockUnavailableError } from './sync';
 import { EXTRACTION_VERSION } from './extraction/extraction-version';
 import { getCodeGraphDir } from './directory';
+import { deriveProjectNameTokens } from './search/query-utils';
 import { CodeGraphPackageVersion } from './mcp/version';
 
 // Re-export types for consumers
@@ -154,6 +155,13 @@ export class CodeGraph {
     this.db = db;
     this.queries = queries;
     this.projectRoot = projectRoot;
+    // Down-weight the project name as a query term in search ranking — it names
+    // the whole repo, not a symbol, so it has no discriminative value (#720).
+    try {
+      this.queries.setProjectNameTokens(deriveProjectNameTokens(projectRoot));
+    } catch {
+      // Best-effort: ranking still works without it.
+    }
     this.fileLock = new FileLock(
       path.join(getCodeGraphDir(projectRoot), 'codegraph.lock')
     );
@@ -747,6 +755,17 @@ export class CodeGraph {
     return this.queries.searchNodes(query, options);
   }
 
+  /**
+   * Normalized project-name tokens (go.mod / package.json / repo dir) used to
+   * down-weight the non-discriminative project name in search ranking (#720).
+   * Exposed so explore can exclude it from the PascalCase type-disambiguation
+   * bias, which would otherwise pull overloaded tokens toward whichever stack
+   * embeds the project name.
+   */
+  getProjectNameTokens(): Set<string> {
+    return this.queries.getProjectNameTokens();
+  }
+
   /**
    * Find the project's "primary route file" — the file with the densest
    * concentration of framework-emitted `route` nodes (≥3 routes, ≥30%
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 8a696fbc5..94fcc5dd9 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -21,7 +21,7 @@ import {
 } from '../sync/worktree';
 import type { PendingFile } from '../sync';
 import type { Node, Edge, SearchResult, Subgraph, NodeKind } from '../types';
-import { isTestFile } from '../search/query-utils';
+import { isTestFile, normalizeNameToken } from '../search/query-utils';
 import {
   existsSync,
   readFileSync,
@@ -1661,8 +1661,14 @@ export class ToolHandler {
       // agent writes "DataRequest task validate", the `task`/`validate` it wants
       // are DataRequest's, NOT the same-named overloads in Validation.swift /
       // Concurrency.swift / the abstract base. Used below to bias overloaded
-      // names toward the file/class the query also names.
-      const typeTokens = tokens.filter((o) => /^[A-Z][A-Za-z0-9]{3,}/.test(o));
+      // names toward the file/class the query also names. EXCLUDE the project
+      // name (a PascalCase token a user naturally includes) — it names the whole
+      // repo, so biasing toward it just pulls overloads to whichever stack
+      // embeds it, re-burying the rest (#720).
+      const projectNameTokens = cg.getProjectNameTokens();
+      const typeTokens = tokens.filter(
+        (o) => /^[A-Z][A-Za-z0-9]{3,}/.test(o) && !projectNameTokens.has(normalizeNameToken(o)),
+      );
       const inNamedContext = (n: Node) =>
         typeTokens.some((ct) => {
           const lc = ct.toLowerCase();
diff --git a/src/search/query-utils.ts b/src/search/query-utils.ts
index 588901ae1..1a7b121fc 100644
--- a/src/search/query-utils.ts
+++ b/src/search/query-utils.ts
@@ -4,9 +4,55 @@
  * Shared module for search term extraction and scoring.
  */
 
+import * as fs from 'fs';
 import * as path from 'path';
 import { Node } from '../types';
 
+/** Normalize a name to a comparable token: lowercase, alphanumerics only. */
+export function normalizeNameToken(raw: string): string {
+  return raw.toLowerCase().replace(/[^a-z0-9]/g, '');
+}
+
+/**
+ * Tokens that name the PROJECT as a whole — its `go.mod` module, `package.json`
+ * name, or repo root directory — rather than any specific symbol. A user
+ * naturally puts the project name in a query as context ("MyApp backend
+ * routes"), but it carries no discriminative signal: when it's also a substring
+ * of a symbol or path on one stack (a `MyAppFrontend/` dir, a `MyAppApp` class)
+ * it lexically inflates that stack and buries the rest (#720).
+ *
+ * Returned normalized (lowercase, alphanumerics only) so a query word can be
+ * compared by its normalized form. Only names ≥5 chars are kept — short ones
+ * (`api`, `app`, `core`, `web`) collide with real query terms too often to
+ * safely down-weight.
+ */
+export function deriveProjectNameTokens(projectRoot: string): Set<string> {
+  const tokens = new Set<string>();
+  const add = (raw: string | undefined | null): void => {
+    if (!raw) return;
+    const norm = normalizeNameToken(raw);
+    if (norm.length >= 5) tokens.add(norm);
+  };
+
+  // go.mod module last segment (the most reliable signal for Go repos).
+  try {
+    const gomod = fs.readFileSync(path.join(projectRoot, 'go.mod'), 'utf-8');
+    const m = gomod.match(/^\s*module\s+(\S+)/m);
+    if (m && m[1]) add(m[1].split('/').pop());
+  } catch { /* no go.mod */ }
+
+  // package.json name (strip an `@scope/` prefix).
+  try {
+    const pkg = JSON.parse(fs.readFileSync(path.join(projectRoot, 'package.json'), 'utf-8'));
+    if (typeof pkg.name === 'string') add(pkg.name.replace(/^@[^/]+\//, ''));
+  } catch { /* no / invalid package.json */ }
+
+  // Repo root directory name — a fallback when neither manifest names the project.
+  add(path.basename(path.resolve(projectRoot)));
+
+  return tokens;
+}
+
 /**
  * Common stop words to filter from search queries.
  * Includes generic English + code-specific noise words.
@@ -172,7 +218,11 @@ export function extractSearchTerms(query: string, options?: { stems?: boolean })
  * Score path relevance to a query
  * Higher score = more relevant path
  */
-export function scorePathRelevance(filePath: string, query: string): number {
+export function scorePathRelevance(
+  filePath: string,
+  query: string,
+  projectNameTokens?: Set<string>,
+): number {
   const pathLower = filePath.toLowerCase();
   const fileName = path.basename(filePath).toLowerCase();
   const dirName = path.dirname(filePath).toLowerCase();
@@ -187,10 +237,21 @@ export function scorePathRelevance(filePath: string, query: string): number {
   // Split the ORIGINAL-case query into words; extractSearchTerms does the
   // camelCase/snake split per word (so `getUserName` still matches a
   // `get_user_name` path) — we just attribute each word's matches once.
-  const words = query.split(/\s+/).filter((w) => w.length > 0);
-  if (words.length === 0) return 0;
-
-  for (const word of words) {
+  const allWords = query.split(/\s+/).filter((w) => w.length > 0);
+  if (allWords.length === 0) return 0;
+
+  // A query word that just names the PROJECT (its go.mod / package.json / repo
+  // name) carries no discriminative path signal — drop it so the rest of the
+  // query decides the ranking, instead of every file under a `<ProjectName>…/`
+  // tree winning on the project name alone (#720). Only when OTHER words remain,
+  // so a bare project-name query still scores on its path.
+  const words =
+    projectNameTokens && projectNameTokens.size > 0
+      ? allWords.filter((w) => !projectNameTokens.has(normalizeNameToken(w)))
+      : allWords;
+  const scored = words.length > 0 ? words : allWords;
+
+  for (const word of scored) {
     // Use base terms only — stem variants inflate path scores by generating
     // many near-duplicate terms that all match the same path segments.
     const subtokens = extractSearchTerms(word, { stems: false });

From eb5960b535f25a333ca7a3e2edc5d59fb049e8fd Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 23:10:01 -0400
Subject: [PATCH 11/51] fix(php): resolve chained static-factory calls
 `Cls::for($x)->method()` (#608) (#749)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A method called through a PHP fluent static factory — `ApiClient::for($c)->createOrder()`,
the canonical Laravel per-credential/per-tenant client idiom — produced no
`calls` edge: the receiver of `->createOrder` is the `Cls::for(...)` static
call, whose result type was never recovered, so the edge was dropped and
`codegraph_callers` returned nothing.

Same shape as the C++ singleton/factory fix (#645), reusing its return_type
column + the chained-call mechanism:
- Capture PHP return types (getReturnType): `: self` / `: static` / `$this`
  stored as the `self` marker, a concrete `: Type` as its short name,
  primitives/unions dropped.
- Encode the chained scoped-call receiver as `Cls::for().method` so the
  resolver can split it (PHP-gated, in extractCall).
- New matchPhpCallChain: look up the factory's return type (`self` → the
  factory's own class; concrete → that class), then resolve AND validate the
  method on it — a wrong inference yields no edge, never a wrong one.

EXTRACTION_VERSION 4->5 (re-index to populate PHP return types + chained edges).

Validated on koel (1383 PHP files): node count identical (no explosion),
0 edges lost, +80 chained-call edges recovered; synthetic tests cover the
self-factory, concrete-return, namespace, decoy, and absent-method cases.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/extraction.test.ts         | 24 +++++++++++++++
 __tests__/resolution.test.ts         | 37 +++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/php.ts      | 35 +++++++++++++++++++++-
 src/extraction/tree-sitter.ts        | 27 +++++++++++++++++
 src/resolution/name-matcher.ts       | 45 ++++++++++++++++++++++++----
 7 files changed, 164 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f73044cd..c5c47ed32 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- PHP: a method called through a chained static factory — `Cls::for($x)->method(...)`, the canonical Laravel per-credential / per-tenant client idiom — now records a caller edge. Previously the receiver type (what `for()` returns) was never recovered, so `codegraph_callers` returned nothing for the method and the call was invisible to `codegraph_impact`. CodeGraph now captures PHP return types — `: self` / `: static` resolve to the declaring class, `: SomeClass` to that class — and resolves the chained method on the factory's result, creating the edge only when that class actually has the method (so a wrong inference produces no edge). Existing PHP indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @cvanderlinden. (#608) (PHP)
 - Search relevance: including the project name in a query (a user naturally writes `MyApp backend routes`) no longer buries the part of the codebase the query is actually about. The project name lexically matches whatever stack embeds it — a `MyAppFrontend/` directory, a `MyAppApp` class — and it was over-weighted two ways: a single PascalCase word was scored once per sub-token (`my` / `app` / `myapp`), so one concept boosted that path several times over; and the name carried full path / disambiguation weight even though it names the whole repo, not any symbol. Now path relevance counts each query word once, and a word matching the project name (derived from `go.mod`, `package.json`, or the repo directory) is dropped from path scoring and from `codegraph_explore`'s type-disambiguation bias — unless it's the only term, so a bare project-name search still works. In a mixed-stack repo, a backend question now surfaces the backend even with the project name in the query. Thanks @MiNuo1. (#720)
 - Go: a function called only from inside an anonymous closure — a cobra `RunE: func(…) {…}` handler, a goroutine literal, or a callback closure stored in a package-level `var` — now shows its real caller. Previously the call leaked to the file node, so `codegraph_callers` and `codegraph_impact` reported such a function as having no meaningful caller; the call is now attributed to the enclosing declaration, so editing the function surfaces the closures that use it. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @Cyclone1070. (#693) (Go)
 - Indexing no longer aborts when a `.gitignore` contains non-UTF-8 bytes or an unparseable pattern. A `.gitignore` transparently encrypted in place by corporate DLP / endpoint-security software (a common enterprise scenario) — or one with a stray pattern the matcher can't compile (`\[`, producing "Unterminated character class") — used to crash the entire `sync` / `index` with a screen of garbled bytes and never name the offending file, leaving `Files: 0 / Nodes: 0`. CodeGraph now skips a `.gitignore` that isn't valid UTF-8 text whole, drops only the individual unparseable patterns from a text one, and logs a warning naming the file — indexing continues either way. Thanks @zhanghang-9527. (#682)
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index e9231963b..aae1d9e62 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -2406,6 +2406,30 @@ end
     });
   });
 
+  describe('PHP return type capture (#608)', () => {
+    it('captures self/static factory returns as the `self` marker; primitives as undefined', () => {
+      const code = `<?php
+class ApiClient {
+    public static function for(string $c): self { return new self; }
+    public static function make(): static { return new static; }
+    public function send(array $p): array { return []; }
+}`;
+      const result = extractFromSource('ApiClient.php', code);
+      expect(result.nodes.find((n) => n.name === 'for' && n.kind === 'method')?.returnType).toBe('self');
+      expect(result.nodes.find((n) => n.name === 'make' && n.kind === 'method')?.returnType).toBe('self');
+      // `array` is not a class to chain on → no return type recorded.
+      expect(result.nodes.find((n) => n.name === 'send' && n.kind === 'method')?.returnType).toBeUndefined();
+    });
+
+    it('captures a concrete return type as its short class name', () => {
+      const code = `<?php
+namespace App;
+class WidgetFactory { public static function make(): Widget { return new Widget(); } }`;
+      const result = extractFromSource('WidgetFactory.php', code);
+      expect(result.nodes.find((n) => n.name === 'make' && n.kind === 'method')?.returnType).toBe('Widget');
+    });
+  });
+
   describe('C/C++ return type capture (#645)', () => {
     it('captures the normalized return type of a C++ method/function', () => {
       const code = `
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 1b97e05c9..e727b953c 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2158,4 +2158,41 @@ void wrong() { WidgetFactory::create().onlyOther(); }
       expect(callerNamesOf('Other::onlyOther')).toEqual([]);
     });
   });
+
+  describe('PHP chained static-factory call resolution (#608)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves Cls::for($x)->method() via the factory\'s `: self` return (#608)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'ApiClient.php'),
+        `<?php\nclass ApiClient {\n    public static function for(string $c): self { return new self; }\n    public function createOrder(array $p): array { return []; }\n}\n`
+      );
+      fs.writeFileSync(
+        path.join(tempDir, 'DispatchOrder.php'),
+        `<?php\nclass DispatchOrder {\n    public function handle(): void {\n        ApiClient::for('cred')->createOrder([]);\n    }\n}\n`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // The chained call's edge attaches to the factory result's method.
+      expect(callerNamesOf('ApiClient::createOrder')).toContain('handle');
+    });
+
+    it('creates NO edge when the factory result lacks the method (#608)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'lib.php'),
+        `<?php\nclass ApiClient { public static function for(string $c): self { return new self; } }\nclass Other { public function onlyOther(): void {} }\nclass Caller { public function go(): void { ApiClient::for('x')->onlyOther(); } }\n`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // ApiClient has no onlyOther — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 9428a44ed..41143fe08 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 4;
+export const EXTRACTION_VERSION = 5;
diff --git a/src/extraction/languages/php.ts b/src/extraction/languages/php.ts
index 673fbf90f..e6c1ce1b0 100644
--- a/src/extraction/languages/php.ts
+++ b/src/extraction/languages/php.ts
@@ -1,5 +1,5 @@
 import type { Node as SyntaxNode } from 'web-tree-sitter';
-import { getNodeText } from '../tree-sitter-helpers';
+import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
 // include / require (+ _once) expression node types. These carry the
@@ -33,6 +33,38 @@ function phpStaticIncludePath(node: SyntaxNode, source: string): string | null {
   return content ? getNodeText(content, source) : null;
 }
 
+/** PHP built-in return types that can't be a method receiver (so no class to chain on). */
+const PHP_NON_CLASS_RETURN = new Set([
+  'array', 'string', 'int', 'integer', 'float', 'double', 'bool', 'boolean',
+  'void', 'mixed', 'never', 'null', 'false', 'true', 'object', 'callable',
+  'iterable', 'resource',
+]);
+
+/**
+ * A method/function's declared return type, normalized to the class a chained
+ * `->method()` could be called on (issue #608). `self` / `static` / `$this` are
+ * kept as the marker `self` and resolved to the declaring class at resolution
+ * time; a concrete type returns its short name; primitives / unions / nullable
+ * non-class types return undefined.
+ */
+function extractPhpReturnType(node: SyntaxNode, source: string): string | undefined {
+  let rt = getChildByField(node, 'return_type');
+  if (!rt) return undefined;
+  // Unwrap `?Type`. Union / intersection types are ambiguous — skip them.
+  if (rt.type === 'optional_type') rt = rt.namedChild(0) ?? rt;
+  if (!rt || rt.type === 'primitive_type') return undefined;
+
+  const nameNode = rt.type === 'named_type' ? (rt.namedChild(0) ?? rt) : rt;
+  const text = getNodeText(nameNode, source).trim().replace(/^\\/, '');
+  if (!text) return undefined;
+  const last = text.split('\\').pop() ?? text;
+  const lc = last.toLowerCase();
+  if (lc === 'self' || lc === 'static' || lc === 'this' || lc === '$this') return 'self';
+  if (PHP_NON_CLASS_RETURN.has(lc)) return undefined;
+  if (!/^[A-Za-z_]\w*$/.test(last)) return undefined; // union/intersection/complex
+  return last;
+}
+
 export const phpExtractor: LanguageExtractor = {
   functionTypes: ['function_definition'],
   classTypes: ['class_declaration', 'trait_declaration'],
@@ -50,6 +82,7 @@ export const phpExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameters',
   returnField: 'return_type',
+  getReturnType: extractPhpReturnType,
   classifyClassNode: (node) => {
     return node.type === 'trait_declaration' ? 'trait' : 'class';
   },
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 798229489..61bd674b7 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2349,6 +2349,33 @@ export class TreeSitterExtractor {
       // single-dot receiver regex fails. Pull out the immediate field after `this.`
       // so the receiver is the field name (`userbo`), which the resolver can then
       // look up in the enclosing class's field declarations.
+      // PHP static-factory fluent chain: `Cls::for($x)->method()` — the receiver
+      // is itself a static call, so resolution must infer the method's class
+      // from what `Cls::for` RETURNS (its `: self` / `: static` / `: Type`),
+      // #608 (mirrors the C++ chain fix in #645). Encode `<Cls::factory>().<method>`;
+      // the `().` marker lets the PHP resolver split it. The receiver text
+      // (`Cls::for('x')`) carries the args, so without this it degrades to an
+      // unresolvable string and the call edge is dropped.
+      if (methodName && this.language === 'php' && objectField.type === 'scoped_call_expression') {
+        const innerScope = getChildByField(objectField, 'scope');
+        const innerName = getChildByField(objectField, 'name');
+        if (innerScope && innerName) {
+          calleeName = `${getNodeText(innerScope, this.source)}::${getNodeText(innerName, this.source)}().${methodName}`;
+        } else {
+          calleeName = methodName;
+        }
+        if (calleeName) {
+          this.unresolvedReferences.push({
+            fromNodeId: callerId,
+            referenceName: calleeName,
+            referenceKind: 'calls',
+            line: node.startPosition.row + 1,
+            column: node.startPosition.column,
+          });
+        }
+        return;
+      }
+
       let receiverName: string;
       if (objectField.type === 'field_access') {
         const inner = getChildByField(objectField, 'object');
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index f01628c12..f283e6130 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -414,10 +414,11 @@ function cppLastSegment(name: string): string {
 
 /**
  * Return type captured at extraction for `Class::method` (or a free function),
- * read off the indexed node's `returnType` (#645). Null when not indexed or no
- * return type was recorded (e.g. a `void`/primitive return).
+ * read off the indexed node's `returnType` — used by the C++ (#645) and PHP
+ * (#608) chained-call resolvers. Language-filtered. Null when not indexed or no
+ * return type was recorded (a `void`/primitive return).
  */
-function lookupCppReturnType(
+function lookupCalleeReturnType(
   callee: string,
   ref: UnresolvedRef,
   context: ResolutionContext,
@@ -492,10 +493,10 @@ function resolveCppCallResultType(
     if (recv.includes('.') || recv.includes('(') || recv.includes('::')) return null; // single level only
     const recvType = inferCppReceiverType(recv, ref, context, depth + 1);
     if (!recvType) return null;
-    return lookupCppReturnType(`${recvType}::${method}`, ref, context);
+    return lookupCalleeReturnType(`${recvType}::${method}`, ref, context);
   }
 
-  const ret = lookupCppReturnType(expr, ref, context);
+  const ret = lookupCalleeReturnType(expr, ref, context);
   if (ret) return ret;
 
   // Direct construction — the callee itself names a class/struct.
@@ -549,6 +550,32 @@ export function matchCppCallChain(
   return resolveMethodOnType(cls, m[2], ref, context, 0.85, 'instance-method');
 }
 
+/**
+ * Resolve a PHP fluent static-factory chain whose receiver is a static call —
+ * `Cls::for($x)->method()`, encoded by the extractor as `Cls::for().method`
+ * (#608, the per-credential Laravel client idiom). The receiver's type is what
+ * `Cls::for` returns: a `: self` / `: static` resolves to `Cls` itself, a
+ * concrete `: Type` to that type. The outer method is then resolved and
+ * VALIDATED on it (resolveMethodOnType requires the method to exist), so a
+ * wrong inference yields no edge rather than a wrong one.
+ */
+export function matchPhpCallChain(
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+): ResolvedRef | null {
+  const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
+  if (!m || !m[1] || !m[2]) return null;
+  const inner = m[1];
+  const method = m[2];
+  if (!inner.includes('::')) return null; // only static-factory (`Cls::method`) chains
+  const factoryClass = inner.slice(0, inner.lastIndexOf('::'));
+  const ret = lookupCalleeReturnType(inner, ref, context);
+  if (!ret) return null;
+  // `self` (the extractor's marker for self/static/$this) → the factory's class.
+  const resolvedClass = ret === 'self' ? factoryClass : ret;
+  return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
+}
+
 /**
  * Java/Kotlin: infer a receiver's declared type by walking field declarations
  * in the class enclosing the call site. The field's `signature` is already in
@@ -971,6 +998,14 @@ export function matchReference(
     if (result) return result;
   }
 
+  // 1c. PHP fluent static-factory chain — `Cls::for($x)->method()` encoded as
+  // `Cls::for().method` (#608). Same idea as 1b: the receiver's type is the
+  // factory's `: self` / `: Type` return.
+  if (ref.language === 'php') {
+    result = matchPhpCallChain(ref, context);
+    if (result) return result;
+  }
+
   // 2. Method call pattern
   result = matchMethodCall(ref, context);
   if (result) return result;

From 7f6bdf7ad157a22b857f5d44ab12a71935010def Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Mon, 8 Jun 2026 23:43:17 -0400
Subject: [PATCH 12/51] fix(java): resolve chained static-factory calls
 Foo.getInstance().bar() (#750) (#751)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A Java method called through a static factory or fluent chain — `Foo.getInstance().bar()`,
`Config.create(opts).build()` — lost the receiver's type, so the chained method either
didn't resolve at all or (when a same-named method existed on an unrelated class) attached
to whichever class was indexed first. Ports the #645 (C++) / #608 (PHP) 3-part mechanism:

- Part 1: capture Java return types in the extractor (skip void/primitives/arrays,
  unwrap generics, strip package qualifier).
- Part 2: encode a chained-call receiver as `inner().method` with normalized empty
  parens, so factory calls that take arguments still split.
- Part 3: matchJavaCallChain resolves the chained method on the factory's return type,
  validated via resolveMethodOnType so a wrong inference yields NO edge (never a wrong one).

Validated: synthetic decoy + absent-method safety tests; real-repo A/B on google/guava
(3,227 files) — node count identical (no explosion), 0 edges lost, +1,507 unique chained
edges recovered, precision spot-checked verbatim (Splitter.on().split(),
CacheBuilder.newBuilder().recordStats(), GraphBuilder.directed().build(), nested
MultimapBuilder.linkedHashKeys().arrayListValues()). EXTRACTION_VERSION 5 -> 6.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 68 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/java.ts     | 35 ++++++++++++++
 src/extraction/tree-sitter.ts        | 27 +++++++++++
 src/resolution/name-matcher.ts       | 41 +++++++++++++++++
 6 files changed, 173 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5c47ed32..1894e966d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Java method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to lose the receiver's type, so when two classes had a same-named method the call silently attached to whichever was indexed first — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Java return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Covers factories and fluent builders that take arguments (`hashKeys().arrayListValues()`), including builders that return a nested type. Existing Java indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Java)
 - PHP: a method called through a chained static factory — `Cls::for($x)->method(...)`, the canonical Laravel per-credential / per-tenant client idiom — now records a caller edge. Previously the receiver type (what `for()` returns) was never recovered, so `codegraph_callers` returned nothing for the method and the call was invisible to `codegraph_impact`. CodeGraph now captures PHP return types — `: self` / `: static` resolve to the declaring class, `: SomeClass` to that class — and resolves the chained method on the factory's result, creating the edge only when that class actually has the method (so a wrong inference produces no edge). Existing PHP indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @cvanderlinden. (#608) (PHP)
 - Search relevance: including the project name in a query (a user naturally writes `MyApp backend routes`) no longer buries the part of the codebase the query is actually about. The project name lexically matches whatever stack embeds it — a `MyAppFrontend/` directory, a `MyAppApp` class — and it was over-weighted two ways: a single PascalCase word was scored once per sub-token (`my` / `app` / `myapp`), so one concept boosted that path several times over; and the name carried full path / disambiguation weight even though it names the whole repo, not any symbol. Now path relevance counts each query word once, and a word matching the project name (derived from `go.mod`, `package.json`, or the repo directory) is dropped from path scoring and from `codegraph_explore`'s type-disambiguation bias — unless it's the only term, so a bare project-name search still works. In a mixed-stack repo, a backend question now surfaces the backend even with the project name in the query. Thanks @MiNuo1. (#720)
 - Go: a function called only from inside an anonymous closure — a cobra `RunE: func(…) {…}` handler, a goroutine literal, or a callback closure stored in a package-level `var` — now shows its real caller. Previously the call leaked to the file node, so `codegraph_callers` and `codegraph_impact` reported such a function as having no meaningful caller; the call is now attributed to the enclosing declaration, so editing the function surfaces the closures that use it. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @Cyclone1070. (#693) (Go)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index e727b953c..25976be8b 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2195,4 +2195,72 @@ void wrong() { WidgetFactory::create().onlyOther(); }
       expect(callerNamesOf('Other::onlyOther')).toEqual([]);
     });
   });
+
+  describe('Java chained static-factory call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves Foo.getInstance().bar() via the factory return type, never a same-named decoy', async () => {
+      // Aaa sorts first and has a same-named bar() — it must never win the chain.
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.java'),
+        `class Aaa { void bar() {} }
+class Foo {
+    static Foo getInstance() { return new Foo(); }
+    void bar() {}
+}
+class Caller {
+    void run() { Foo.getInstance().bar(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::bar')).toEqual(['run']);
+      expect(callerNamesOf('Aaa::bar')).toEqual([]);
+    });
+
+    it('resolves a factory chain that passes arguments — Foo.create(cfg).build()', async () => {
+      // The factory call carries an argument; the extractor must normalize the
+      // receiver to empty parens (`Foo.create().build`) so the chain still splits.
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.java'),
+        `class Config {}
+class Foo {
+    static Foo create(Config c) { return new Foo(); }
+    void build() {}
+}
+class Caller {
+    void run() { Foo.create(new Config()).build(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::build')).toEqual(['run']);
+    });
+
+    it('creates NO edge when the factory return type lacks the method (silent miss, not a wrong edge)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.java'),
+        `class Foo {
+    static Foo getInstance() { return new Foo(); }
+}
+class Other { void onlyOther() {} }
+class Caller {
+    void run() { Foo.getInstance().onlyOther(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Foo has no onlyOther() — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 41143fe08..5e37514d4 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 5;
+export const EXTRACTION_VERSION = 6;
diff --git a/src/extraction/languages/java.ts b/src/extraction/languages/java.ts
index c59764466..3d3ecaa4d 100644
--- a/src/extraction/languages/java.ts
+++ b/src/extraction/languages/java.ts
@@ -2,6 +2,40 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+/**
+ * Tree-sitter-java node types for a method's `type` (return) field that can
+ * never be a method receiver — there's no class to chain a `.method()` on, so we
+ * store no `returnType` for them.
+ */
+const JAVA_NON_CLASS_RETURN_NODES = new Set([
+  'void_type',
+  'integral_type', // int, long, short, byte, char
+  'floating_point_type', // float, double
+  'boolean_type',
+]);
+
+/**
+ * A Java method's declared return type, normalized to the bare class name a
+ * chained `Foo.getInstance().bar()` could be called on (the #645/#608 mechanism).
+ * Reads the `type` field: primitives/void/arrays yield undefined (no class to
+ * chain on), `List<Foo>` is unwrapped to its base type `List`, and a dotted
+ * package/outer-class qualifier (`java.util.List`) is stripped to the simple
+ * name. Constructors have no `type` field → undefined.
+ */
+function extractJavaReturnType(node: SyntaxNode, source: string): string | undefined {
+  const typeNode = getChildByField(node, 'type');
+  if (!typeNode) return undefined;
+  if (JAVA_NON_CLASS_RETURN_NODES.has(typeNode.type)) return undefined;
+  // An array return (`Foo[]`) isn't a receiver you call instance methods on.
+  if (typeNode.type === 'array_type') return undefined;
+  // Strip type arguments (`List<Foo>` → `List`) — the chain resolves on the base.
+  const raw = getNodeText(typeNode, source).trim().replace(/<[^>]*>/g, '');
+  // Strip a dotted package / outer-class qualifier (`java.util.List` → `List`).
+  const last = raw.split('.').pop()?.trim();
+  if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last;
+}
+
 export const javaExtractor: LanguageExtractor = {
   functionTypes: [],
   classTypes: ['class_declaration'],
@@ -23,6 +57,7 @@ export const javaExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameters',
   returnField: 'type',
+  getReturnType: extractJavaReturnType,
   getSignature: (node, source) => {
     const params = getChildByField(node, 'parameters');
     const returnType = getChildByField(node, 'type');
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 61bd674b7..5adaada7e 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2376,6 +2376,33 @@ export class TreeSitterExtractor {
         return;
       }
 
+      // Java static-factory / fluent chain: `Foo.getInstance().bar()` — the
+      // receiver is itself a method call, so resolution must infer bar's class
+      // from what `Foo.getInstance` RETURNS (its declared return type), the
+      // #645/#608 mechanism. Encode `<inner-receiver>.<inner-method>().<method>`;
+      // the `().` marker lets the Java chain resolver split it, and normalizing to
+      // empty parens drops any factory args (`Foo.create(cfg).bar()`) that would
+      // otherwise leave a `(cfg)` in the receiver text and break the split.
+      if (
+        methodName &&
+        this.language === 'java' &&
+        objectField.type === 'method_invocation'
+      ) {
+        const innerObj = getChildByField(objectField, 'object');
+        const innerName = getChildByField(objectField, 'name');
+        if (innerObj && innerName) {
+          calleeName = `${getNodeText(innerObj, this.source)}.${getNodeText(innerName, this.source)}().${methodName}`;
+          this.unresolvedReferences.push({
+            fromNodeId: callerId,
+            referenceName: calleeName,
+            referenceKind: 'calls',
+            line: node.startPosition.row + 1,
+            column: node.startPosition.column,
+          });
+          return;
+        }
+      }
+
       let receiverName: string;
       if (objectField.type === 'field_access') {
         const inner = getChildByField(objectField, 'object');
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index f283e6130..80b765dea 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -576,6 +576,39 @@ export function matchPhpCallChain(
   return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
 }
 
+/**
+ * Resolve a Java chained call whose receiver is a static factory / fluent call —
+ * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
+ * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
+ * (its declared return type); the outer method is then resolved and VALIDATED on
+ * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
+ * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
+ * unrelated class is never matched).
+ */
+export function matchJavaCallChain(
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+): ResolvedRef | null {
+  const m = ref.referenceName.match(/^(.+)\(\)\.(\w+)$/);
+  if (!m || !m[1] || !m[2]) return null;
+  const inner = m[1]; // `Foo.getInstance`
+  const method = m[2]; // `bar`
+  // Require an explicit receiver (`Receiver.factory`) — a bare `factory().bar`
+  // chain (a method on `this`) isn't handled here.
+  const lastDot = inner.lastIndexOf('.');
+  if (lastDot <= 0) return null;
+  const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
+  const factoryMethod = inner.slice(lastDot + 1);
+  if (!factoryClass || !factoryMethod) return null;
+  const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
+  if (!ret) return null;
+  // When several classes share the returned simple name, the caller file's
+  // import of that type is the only signal that names WHICH one (#314).
+  const imports = context.getImportMappings(ref.filePath, ref.language);
+  const importedFqn = imports.find((i) => i.localName === ret)?.source;
+  return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqn);
+}
+
 /**
  * Java/Kotlin: infer a receiver's declared type by walking field declarations
  * in the class enclosing the call site. The field's `signature` is already in
@@ -1006,6 +1039,14 @@ export function matchReference(
     if (result) return result;
   }
 
+  // 1d. Java chained static-factory / fluent call — `Foo.getInstance().bar()`
+  // encoded as `Foo.getInstance().bar` (#645/#608 mechanism). Resolve bar's class
+  // from getInstance's declared return type, then validate the method on it.
+  if (ref.language === 'java') {
+    result = matchJavaCallChain(ref, context);
+    if (result) return result;
+  }
+
   // 2. Method call pattern
   result = matchMethodCall(ref, context);
   if (result) return result;

From 3e0465085031cd824723b70c57d53ad251aca247 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 00:12:37 -0400
Subject: [PATCH 13/51] fix(kotlin): resolve chained companion-factory calls
 Foo.getInstance().bar() (#750) (#752)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A Kotlin method called through a companion-object factory, fluent chain, or
constructor — `Foo.getInstance().bar()`, `Config.create(opts).build()`,
`STMTransaction(f).commit()` — dropped the receiver to a BARE method name, which
then name-matched a same-named method on an unrelated class (a wrong edge) or
failed to resolve. Ports the #645/#608 mechanism to Kotlin:

- Part 1: capture Kotlin return types in the extractor. tree-sitter-kotlin
  exposes no field names, so the return type is read positionally (the type node
  after function_value_parameters); inferred/Unit/Nothing returns yield none.
- Part 2: encode a CLASS/companion-factory call-receiver chain as `inner().method`.
  Gated to a capitalized receiver (`Foo.getInstance()` / `Foo(args)`) so instance
  chains (`list.filter{}.map{}`) keep their bare-name behavior — re-encoding those
  would only drop the edge, regressing recall in fluent codebases.
- Part 3: generalize matchJavaCallChain -> matchDottedCallChain (shared by the JVM
  dot-notation languages); resolve the method on the factory's return type, or on
  the constructed class for a Kotlin `Foo(args).method()` receiver. Validated via
  resolveMethodOnType, so a wrong inference yields NO edge.

Validated: synthetic decoy + args + absent-method safety tests; full suite green;
real-repo A/B on arrow-kt/arrow (734 .kt) — node count identical (no explosion),
+49 validated-correct chained edges, and the removed edges are wrong bare-name
guesses the fix correctly stops emitting (419/438 from test/doc files; the 18
from product code are stdlib `.apply{}`, self-loops, and bare-name mismatches) —
a net precision improvement, ~0 correct product edges lost. Java path unchanged
(constructor branch is Kotlin-gated). EXTRACTION_VERSION 6 -> 7.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 73 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/kotlin.ts   | 41 ++++++++++++++++
 src/extraction/tree-sitter.ts        | 45 ++++++++++++-----
 src/resolution/name-matcher.ts       | 54 ++++++++++++++------
 6 files changed, 187 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1894e966d..6b1bc4834 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Kotlin method calls made through a companion-object factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to drop the receiver entirely, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Kotlin return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Kotlin indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Kotlin)
 - Java method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to lose the receiver's type, so when two classes had a same-named method the call silently attached to whichever was indexed first — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Java return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Covers factories and fluent builders that take arguments (`hashKeys().arrayListValues()`), including builders that return a nested type. Existing Java indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Java)
 - PHP: a method called through a chained static factory — `Cls::for($x)->method(...)`, the canonical Laravel per-credential / per-tenant client idiom — now records a caller edge. Previously the receiver type (what `for()` returns) was never recovered, so `codegraph_callers` returned nothing for the method and the call was invisible to `codegraph_impact`. CodeGraph now captures PHP return types — `: self` / `: static` resolve to the declaring class, `: SomeClass` to that class — and resolves the chained method on the factory's result, creating the edge only when that class actually has the method (so a wrong inference produces no edge). Existing PHP indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @cvanderlinden. (#608) (PHP)
 - Search relevance: including the project name in a query (a user naturally writes `MyApp backend routes`) no longer buries the part of the codebase the query is actually about. The project name lexically matches whatever stack embeds it — a `MyAppFrontend/` directory, a `MyAppApp` class — and it was over-weighted two ways: a single PascalCase word was scored once per sub-token (`my` / `app` / `myapp`), so one concept boosted that path several times over; and the name carried full path / disambiguation weight even though it names the whole repo, not any symbol. Now path relevance counts each query word once, and a word matching the project name (derived from `go.mod`, `package.json`, or the repo directory) is dropped from path scoring and from `codegraph_explore`'s type-disambiguation bias — unless it's the only term, so a bare project-name search still works. In a mixed-stack repo, a backend question now surfaces the backend even with the project name in the query. Thanks @MiNuo1. (#720)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 25976be8b..4c3a885b0 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2256,6 +2256,79 @@ class Other { void onlyOther() {} }
 class Caller {
     void run() { Foo.getInstance().onlyOther(); }
 }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Foo has no onlyOther() — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
+
+  describe('Kotlin chained companion-factory call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves Foo.getInstance().bar() via the companion return type, never a same-named decoy', async () => {
+      // Aaa sorts first and has a same-named bar() — without the chain fix Kotlin
+      // dropped the receiver to a bare `bar` and attached to Aaa (a wrong edge).
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.kt'),
+        `class Aaa { fun bar() {} }
+class Foo {
+    companion object {
+        fun getInstance(): Foo = Foo()
+    }
+    fun bar() {}
+}
+class Caller {
+    fun run() { Foo.getInstance().bar() }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::bar')).toEqual(['run']);
+      expect(callerNamesOf('Aaa::bar')).toEqual([]);
+    });
+
+    it('resolves a companion factory chain that passes arguments — Foo.create(cfg).build()', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.kt'),
+        `class Config
+class Foo {
+    companion object {
+        fun create(c: Config): Foo = Foo()
+    }
+    fun build() {}
+}
+class Caller {
+    fun run() { Foo.create(Config()).build() }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::build')).toEqual(['run']);
+    });
+
+    it('creates NO edge when the companion return type lacks the method (silent miss, not a wrong edge)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.kt'),
+        `class Foo {
+    companion object {
+        fun getInstance(): Foo = Foo()
+    }
+}
+class Other { fun onlyOther() {} }
+class Caller {
+    fun run() { Foo.getInstance().onlyOther() }
+}
 `
       );
       cg = await CodeGraph.init(tempDir, { index: true });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 5e37514d4..f4b0a9508 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 6;
+export const EXTRACTION_VERSION = 7;
diff --git a/src/extraction/languages/kotlin.ts b/src/extraction/languages/kotlin.ts
index c9a066539..55ad883de 100644
--- a/src/extraction/languages/kotlin.ts
+++ b/src/extraction/languages/kotlin.ts
@@ -2,6 +2,46 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+/** Kotlin return types that can't be a chained-call receiver (no class to chain on). */
+const KOTLIN_NON_CLASS_RETURN = new Set(['Unit', 'Nothing']);
+
+/**
+ * A Kotlin function's declared return type, normalized to the bare class name a
+ * chained `Foo.getInstance().bar()` could be called on (the #645/#608 mechanism).
+ * tree-sitter-kotlin exposes no field names, so the return type is found
+ * positionally: the first `user_type` / `nullable_type` that FOLLOWS
+ * `function_value_parameters` (an extension receiver's type sits before the
+ * params, so it's never mistaken for the return). An inferred return (expression
+ * body with no `: Type`), a lambda return type, or `Unit` / `Nothing` → undefined.
+ */
+function extractKotlinReturnType(node: SyntaxNode, source: string): string | undefined {
+  let seenParams = false;
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (!child) continue;
+    if (child.type === 'function_value_parameters') {
+      seenParams = true;
+      continue;
+    }
+    if (!seenParams) continue;
+    // The return type is the type node right after the params. If we reach the
+    // body or a `where`-clause first, there's no declared return type.
+    if (child.type === 'function_body' || child.type === 'type_constraints') return undefined;
+    if (child.type === 'user_type' || child.type === 'nullable_type') {
+      const ut =
+        child.type === 'nullable_type'
+          ? (child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type') ?? child)
+          : child;
+      const typeId = ut.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
+      const name = getNodeText(typeId ?? ut, source).trim();
+      if (!name || !/^[A-Za-z_]\w*$/.test(name)) return undefined;
+      if (KOTLIN_NON_CLASS_RETURN.has(name)) return undefined;
+      return name;
+    }
+  }
+  return undefined;
+}
+
 /** Check if a node matches the `fun interface` misparse pattern */
 function isFunInterfaceNode(node: SyntaxNode): boolean {
   let hasFun = false;
@@ -130,6 +170,7 @@ export const kotlinExtractor: LanguageExtractor = {
   },
   paramsField: 'function_value_parameters',
   returnField: 'type',
+  getReturnType: extractKotlinReturnType,
   resolveBody: (node, _bodyField) => {
     // Kotlin's tree-sitter grammar doesn't use field names, so getChildByField fails.
     // Find body by type: function_body for functions/methods, class_body for classes,
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 5adaada7e..a5b3b75fb 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2525,22 +2525,41 @@ export class TreeSitterExtractor {
                 calleeName = methodName;
               }
             } else if (
-              (this.language === 'cpp' || this.language === 'c') &&
+              (this.language === 'cpp' || this.language === 'c' || this.language === 'kotlin') &&
               receiver &&
               receiver.type === 'call_expression'
             ) {
-              // C/C++ receiver that is itself a call — `Foo::instance().bar()`,
-              // `openSession()->run()`, `mgr.view().render()`. Keep the inner
-              // call so resolution can infer bar()'s class from what the inner
-              // call RETURNS (#645). Encode as `<innerCallee>().<method>`; the
-              // `().` marker never appears in an ordinary ref, so the C++
-              // resolver can detect and split it. Other languages keep the
-              // bare-name behavior (dropping the receiver) below.
-              const innerFn = getChildByField(receiver, 'function');
-              const innerCallee = innerFn
-                ? getNodeText(innerFn, this.source).replace(/->/g, '.').replace(/\s+/g, '')
-                : '';
-              calleeName = innerCallee ? `${innerCallee}().${methodName}` : methodName;
+              // Receiver that is itself a call — `Foo::instance().bar()`,
+              // `openSession()->run()`, `mgr.view().render()` (C/C++), or
+              // `Foo.getInstance().bar()` (Kotlin). Keep the inner call so
+              // resolution can infer bar()'s class from what the inner call
+              // RETURNS (#645/#608). Encode as `<innerCallee>().<method>`; the
+              // `().` marker never appears in an ordinary ref, so the resolver
+              // can detect and split it. Other languages keep the bare-name
+              // behavior (dropping the receiver) below.
+              let innerCallee: string;
+              let reencode: boolean;
+              if (this.language === 'kotlin') {
+                // tree-sitter-kotlin has no field names — the inner callee is the
+                // call_expression's first named child (a navigation_expression
+                // `Foo.getInstance`, or a bare identifier for a free call).
+                const innerNav = receiver.namedChild(0);
+                innerCallee = innerNav ? getNodeText(innerNav, this.source).replace(/\s+/g, '') : '';
+                // Only re-encode a CLASS / companion-factory chain, whose receiver
+                // chain starts with a capitalized type (`Foo.getInstance().bar()`).
+                // An instance chain (`list.filter{}.map{}`) has a lowercase receiver
+                // whose type we can't recover here — re-encoding it would only drop
+                // the edge (no chain resolution, no bare-name fallback), regressing
+                // recall in fluent codebases. Leave those to the bare-name path.
+                reencode = /^[A-Z]/.test(innerCallee);
+              } else {
+                const innerFn = getChildByField(receiver, 'function');
+                innerCallee = innerFn
+                  ? getNodeText(innerFn, this.source).replace(/->/g, '.').replace(/\s+/g, '')
+                  : '';
+                reencode = !!innerCallee;
+              }
+              calleeName = reencode ? `${innerCallee}().${methodName}` : methodName;
             } else {
               calleeName = methodName;
             }
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index 80b765dea..da61e7d87 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -577,15 +577,16 @@ export function matchPhpCallChain(
 }
 
 /**
- * Resolve a Java chained call whose receiver is a static factory / fluent call —
+ * Resolve a dotted chained call whose receiver is a static factory / fluent call —
  * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
  * (#645/#608 mechanism). The receiver's type is what `Foo.getInstance` returns
  * (its declared return type); the outer method is then resolved and VALIDATED on
  * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
  * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
- * unrelated class is never matched).
+ * unrelated class is never matched). Shared by the JVM dot-notation languages
+ * (Java, Kotlin) — same receiver shape, same `Class::method` qualified names.
  */
-export function matchJavaCallChain(
+export function matchDottedCallChain(
   ref: UnresolvedRef,
   context: ResolutionContext,
 ): ResolvedRef | null {
@@ -593,20 +594,42 @@ export function matchJavaCallChain(
   if (!m || !m[1] || !m[2]) return null;
   const inner = m[1]; // `Foo.getInstance`
   const method = m[2]; // `bar`
-  // Require an explicit receiver (`Receiver.factory`) — a bare `factory().bar`
-  // chain (a method on `this`) isn't handled here.
   const lastDot = inner.lastIndexOf('.');
-  if (lastDot <= 0) return null;
+
+  // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
+  // capitalized inner is a class construction, so the receiver's type is the
+  // class itself — resolve the method on it. Kotlin only: there an unprefixed
+  // capitalized call constructs the class, whereas in Java a bare `Foo()` is a
+  // method call (constructors need `new`), so we must not assume construction.
+  // A lowercase bare inner is a top-level `factory().method()` whose type we
+  // can't recover — bail.
+  if (lastDot <= 0) {
+    if (ref.language !== 'kotlin' || !/^[A-Z]/.test(inner)) return null;
+    return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
+  }
+
+  // Factory/fluent receiver `Receiver.factory(args).method()`: the receiver's
+  // type is what `Receiver.factory` returns (its declared return type).
   const factoryClass = inner.slice(0, lastDot).split('.').pop(); // simple class name
   const factoryMethod = inner.slice(lastDot + 1);
   if (!factoryClass || !factoryMethod) return null;
   const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
   if (!ret) return null;
-  // When several classes share the returned simple name, the caller file's
-  // import of that type is the only signal that names WHICH one (#314).
+  return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
+}
+
+/**
+ * When several classes share a simple type name, the caller file's import of
+ * that type is the only signal that names WHICH one (#314). Returns the imported
+ * FQN for `typeName` in the ref's file, or undefined.
+ */
+function importedFqnOf(
+  typeName: string,
+  ref: UnresolvedRef,
+  context: ResolutionContext,
+): string | undefined {
   const imports = context.getImportMappings(ref.filePath, ref.language);
-  const importedFqn = imports.find((i) => i.localName === ret)?.source;
-  return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqn);
+  return imports.find((i) => i.localName === typeName)?.source;
 }
 
 /**
@@ -1039,11 +1062,12 @@ export function matchReference(
     if (result) return result;
   }
 
-  // 1d. Java chained static-factory / fluent call — `Foo.getInstance().bar()`
-  // encoded as `Foo.getInstance().bar` (#645/#608 mechanism). Resolve bar's class
-  // from getInstance's declared return type, then validate the method on it.
-  if (ref.language === 'java') {
-    result = matchJavaCallChain(ref, context);
+  // 1d. JVM (Java / Kotlin) chained static-factory / fluent call —
+  // `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar` (#645/#608
+  // mechanism). Resolve bar's class from getInstance's declared return type, then
+  // validate the method on it.
+  if (ref.language === 'java' || ref.language === 'kotlin') {
+    result = matchDottedCallChain(ref, context);
     if (result) return result;
   }
 

From aa07dc59d408b0ff5121866dab1f2bc3ff2aa56e Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 00:30:03 -0400
Subject: [PATCH 14/51] fix(csharp): resolve chained static-factory calls
 Foo.Create().Bar() (#750) (#753)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A C# method called through a static factory or fluent chain —
`Foo.Create().Bar()`, `JObject.Parse(s).Property(...)`,
`Instant.FromUtc(...).InZone(zone)` — lost the receiver's type, so the chained
method didn't resolve and the call was invisible to callers/impact/trace. Ports
the #645/#608 mechanism to C# (additive, like Java #751):

- Part 1: capture C# return types in the extractor, reading the `returns` field
  (`static Foo Create()` -> `Foo`); predefined/array/generic/nullable/namespaced
  types are normalized or skipped.
- Part 2: encode a chained `member_access_expression` receiver
  (`Foo.Create(args).Bar()`) as `inner().Bar` with normalized empty parens, so
  factory calls that take arguments still split. Non-chained member calls keep
  their existing `recv.Method` text.
- Part 3: resolve via the shared matchDottedCallChain (now Java/Kotlin/C#),
  validated by resolveMethodOnType so a wrong inference yields NO edge.

Known limitation (safe): C# extension-method chains don't resolve, since the
method lives on the extension class, not the receiver's type — no edge, never a
wrong one.

Validated: synthetic decoy + args + absent-method safety tests; full suite green;
real-repo A/B on Newtonsoft.Json (945 .cs: +3, 0 lost) and nodatime (488 .cs:
+73, 0 lost) — node count identical (no explosion), 0 edges lost, precision
spot-checked verbatim (Instant.FromUtc().InZone(), Offset.FromHoursAndMinutes().Plus(),
OffsetDateTimePattern.CreateWithInvariantCulture().WithTwoDigitYearMax()).
EXTRACTION_VERSION 7 -> 8.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 66 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/csharp.ts   | 21 +++++++++
 src/extraction/tree-sitter.ts        | 16 +++++++
 src/resolution/name-matcher.ts       |  8 ++--
 6 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6b1bc4834..98405fc12 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
 - Kotlin method calls made through a companion-object factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to drop the receiver entirely, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Kotlin return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Kotlin indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Kotlin)
 - Java method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to lose the receiver's type, so when two classes had a same-named method the call silently attached to whichever was indexed first — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Java return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Covers factories and fluent builders that take arguments (`hashKeys().arrayListValues()`), including builders that return a nested type. Existing Java indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Java)
 - PHP: a method called through a chained static factory — `Cls::for($x)->method(...)`, the canonical Laravel per-credential / per-tenant client idiom — now records a caller edge. Previously the receiver type (what `for()` returns) was never recovered, so `codegraph_callers` returned nothing for the method and the call was invisible to `codegraph_impact`. CodeGraph now captures PHP return types — `: self` / `: static` resolve to the declaring class, `: SomeClass` to that class — and resolves the chained method on the factory's result, creating the edge only when that class actually has the method (so a wrong inference produces no edge). Existing PHP indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @cvanderlinden. (#608) (PHP)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 4c3a885b0..c37a1d09f 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2336,4 +2336,70 @@ class Caller {
       expect(callerNamesOf('Other::onlyOther')).toEqual([]);
     });
   });
+
+  describe('C# chained static-factory call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves Foo.Create().Bar() via the factory return type, never a same-named decoy', async () => {
+      // Aaa sorts first and has a same-named Bar() — it must never win the chain.
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.cs'),
+        `class Aaa { void Bar() {} }
+class Foo {
+    static Foo Create() { return new Foo(); }
+    void Bar() {}
+}
+class Caller {
+    void Run() { Foo.Create().Bar(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::Bar')).toEqual(['Run']);
+      expect(callerNamesOf('Aaa::Bar')).toEqual([]);
+    });
+
+    it('resolves a factory chain that passes arguments — Foo.Make(cfg).Build()', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.cs'),
+        `class Config {}
+class Foo {
+    static Foo Make(Config c) { return new Foo(); }
+    void Build() {}
+}
+class Caller {
+    void Run() { Foo.Make(new Config()).Build(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::Build')).toEqual(['Run']);
+    });
+
+    it('creates NO edge when the factory return type lacks the method (silent miss, not a wrong edge)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.cs'),
+        `class Foo {
+    static Foo Create() { return new Foo(); }
+}
+class Other { void OnlyOther() {} }
+class Caller {
+    void Run() { Foo.Create().OnlyOther(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Foo has no OnlyOther() — must not mis-attach to the same-named Other::OnlyOther.
+      expect(callerNamesOf('Other::OnlyOther')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index f4b0a9508..4d1a815ae 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 7;
+export const EXTRACTION_VERSION = 8;
diff --git a/src/extraction/languages/csharp.ts b/src/extraction/languages/csharp.ts
index 22cde79c1..ba5f452ca 100644
--- a/src/extraction/languages/csharp.ts
+++ b/src/extraction/languages/csharp.ts
@@ -32,6 +32,26 @@ export function blankCsharpPreprocessorDirectives(source: string): string {
   return source.replace(re, (m, indent) => indent + ' '.repeat(m.length - indent.length));
 }
 
+/**
+ * A C# method's declared return type, normalized to the bare class name a chained
+ * `Foo.Create().Bar()` could be called on (the #645/#608 mechanism). The return
+ * type lives in the `returns` field (`static Foo Create()` → `Foo`); built-in
+ * `predefined_type` (void/int/string/…) and arrays yield undefined, generics are
+ * unwrapped to the base type, nullable `Foo?` is stripped, and a dotted namespace
+ * is reduced to the simple name. Constructors have no `returns` field → undefined.
+ */
+function extractCsharpReturnType(node: SyntaxNode, source: string): string | undefined {
+  const typeNode = node.childForFieldName('returns');
+  if (!typeNode) return undefined;
+  if (typeNode.type === 'predefined_type' || typeNode.type === 'array_type') return undefined;
+  let t = getNodeText(typeNode, source).trim();
+  t = t.replace(/\?+$/, ''); // nullable `Foo?`
+  t = t.replace(/<[^>]*>/g, ''); // generics `List<Foo>` → `List`
+  const last = t.split('.').pop()?.trim(); // namespace `Ns.Foo` → `Foo`
+  if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last;
+}
+
 export const csharpExtractor: LanguageExtractor = {
   preParse: blankCsharpPreprocessorDirectives,
   functionTypes: [],
@@ -67,6 +87,7 @@ export const csharpExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameters',
   returnField: 'type',
+  getReturnType: extractCsharpReturnType,
   getVisibility: (node) => {
     for (let i = 0; i < node.childCount; i++) {
       const child = node.child(i);
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index a5b3b75fb..b64f8d753 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2567,6 +2567,22 @@ export class TreeSitterExtractor {
         } else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') {
           // Scoped call: Module::function()
           calleeName = getNodeText(func, this.source);
+        } else if (this.language === 'csharp' && func.type === 'member_access_expression') {
+          // C# member call `recv.Method(...)`. When the receiver is itself a call
+          // — a chained factory `Foo.Create(args).Bar()` — encode `inner().Bar`
+          // with normalized empty parens so resolution can infer Bar's class from
+          // what `Foo.Create` RETURNS (#645/#608). A non-call receiver keeps the
+          // full member-access text (the existing `recv.Method` behavior).
+          const recv = getChildByField(func, 'expression');
+          const nameNode = getChildByField(func, 'name');
+          const methodName = nameNode ? getNodeText(nameNode, this.source) : '';
+          if (recv && recv.type === 'invocation_expression' && methodName) {
+            const innerFunc = getChildByField(recv, 'function');
+            const innerCallee = innerFunc ? getNodeText(innerFunc, this.source).replace(/\s+/g, '') : '';
+            calleeName = innerCallee ? `${innerCallee}().${methodName}` : methodName;
+          } else {
+            calleeName = getNodeText(func, this.source);
+          }
         } else {
           calleeName = getNodeText(func, this.source);
         }
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index da61e7d87..aa235c505 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -583,8 +583,8 @@ export function matchPhpCallChain(
  * (its declared return type); the outer method is then resolved and VALIDATED on
  * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
  * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
- * unrelated class is never matched). Shared by the JVM dot-notation languages
- * (Java, Kotlin) — same receiver shape, same `Class::method` qualified names.
+ * unrelated class is never matched). Shared by the dot-notation languages
+ * (Java, Kotlin, C#) — same receiver shape, same `Class::method` qualified names.
  */
 export function matchDottedCallChain(
   ref: UnresolvedRef,
@@ -1062,11 +1062,11 @@ export function matchReference(
     if (result) return result;
   }
 
-  // 1d. JVM (Java / Kotlin) chained static-factory / fluent call —
+  // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C#) —
   // `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar` (#645/#608
   // mechanism). Resolve bar's class from getInstance's declared return type, then
   // validate the method on it.
-  if (ref.language === 'java' || ref.language === 'kotlin') {
+  if (ref.language === 'java' || ref.language === 'kotlin' || ref.language === 'csharp') {
     result = matchDottedCallChain(ref, context);
     if (result) return result;
   }

From 48d4654e8dde62c695b2c2fc83ed66efbb943bed Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 01:38:37 -0400
Subject: [PATCH 15/51] feat(resolution): conformance-aware chained-method
 resolution (#750) (#754)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(resolution): conformance-aware chained-method resolution (#750)

A chained static-factory/fluent call whose method lives on a SUPERTYPE the
receiver conforms to — a protocol-extension method (Swift), an interface default
method, or an inherited superclass method — now resolves. resolveMethodOnType
falls back to walking the return type's implements/extends edges (via the new
context.getSupertypes) when the method isn't a direct member. Because those edges
don't exist during the single-pass resolution, a second pass
(resolveChainedCallsViaConformance) re-resolves the deferred chained refs after
edges are built. Still validated, so a wrong inference yields no edge.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* docs(changelog): conformance-aware chained-method resolution (#750)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 66 +++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/index.ts                         | 11 ++++
 src/resolution/index.ts              | 89 +++++++++++++++++++++++++++-
 src/resolution/name-matcher.ts       | 21 ++++++-
 src/resolution/types.ts              | 10 ++++
 7 files changed, 196 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 98405fc12..69ded4f7f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
 - Kotlin method calls made through a companion-object factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to drop the receiver entirely, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Kotlin return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Kotlin indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Kotlin)
 - Java method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to lose the receiver's type, so when two classes had a same-named method the call silently attached to whichever was indexed first — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Java return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Covers factories and fluent builders that take arguments (`hashKeys().arrayListValues()`), including builders that return a nested type. Existing Java indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Java)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index c37a1d09f..9ddd16159 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2402,4 +2402,70 @@ class Caller {
       expect(callerNamesOf('Other::OnlyOther')).toEqual([]);
     });
   });
+
+  describe('Chained call resolves a method on a supertype (conformance, #750)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves a chained method defined only on a SUPERCLASS the return type extends', async () => {
+      // draw() lives on Base; Widget (the factory's return type) has no draw() of
+      // its own. Decoy.draw must never win. Needs the conformance second pass.
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.java'),
+        `class Base { void draw() {} }
+class Widget extends Base {}
+class Decoy { void draw() {} }
+class Factory { static Widget create() { return new Widget(); } }
+class Caller {
+    void run() { Factory.create().draw(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Base::draw')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::draw')).toEqual([]);
+    });
+
+    it('resolves a chained method defined on an INTERFACE the return type implements (default method)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.java'),
+        `interface Drawable { default void draw() {} }
+class Widget implements Drawable {}
+class Decoy { void draw() {} }
+class Factory { static Widget create() { return new Widget(); } }
+class Caller {
+    void run() { Factory.create().draw(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Drawable::draw')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::draw')).toEqual([]);
+    });
+
+    it('still creates NO edge when no supertype has the method (safety preserved)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.java'),
+        `class Base {}
+class Widget extends Base {}
+class Other { void onlyOther() {} }
+class Factory { static Widget create() { return new Widget(); } }
+class Caller {
+    void run() { Factory.create().onlyOther(); }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Neither Widget nor Base has onlyOther() — must not attach to Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 4d1a815ae..f2ea639c7 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 8;
+export const EXTRACTION_VERSION = 9;
diff --git a/src/index.ts b/src/index.ts
index f967f6eb7..2e2eef88c 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -376,6 +376,12 @@ export class CodeGraph {
               total,
             });
           });
+
+          // Second pass: chained calls whose method lives on a supertype the
+          // receiver conforms to (protocol-extension / inherited / default-
+          // interface). Needs the implements/extends edges the main pass just
+          // built, so it runs after resolution (#750).
+          this.resolver.resolveChainedCallsViaConformance();
         }
 
         // Refresh planner stats + checkpoint the WAL after bulk writes.
@@ -492,6 +498,11 @@ export class CodeGraph {
               });
             });
           }
+
+          // Second pass: chained calls whose method lives on a supertype the
+          // receiver conforms to (protocol-extension / inherited). Needs the
+          // implements/extends edges built above (#750).
+          this.resolver.resolveChainedCallsViaConformance();
         }
 
         // Refresh planner stats + checkpoint the WAL after bulk writes.
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index ac22ecb91..ffada8d5a 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -16,7 +16,7 @@ import {
   FrameworkResolver,
   ImportMapping,
 } from './types';
-import { matchReference, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
+import { matchReference, matchDottedCallChain, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
 import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs, isPhpIncludePathRef } from './import-resolver';
 import { detectFrameworks } from './frameworks';
 import { synthesizeCallbackEdges } from './callback-synthesizer';
@@ -27,6 +27,17 @@ import { logDebug } from '../errors';
 import type { ReExport } from './types';
 import { LRUCache } from './lru-cache';
 
+/** Node kinds that can declare supertypes (extends/implements). */
+const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
+  'class', 'struct', 'interface', 'trait', 'protocol', 'enum',
+]);
+
+/** Languages whose chained calls use the dotted `inner().method` encoding. */
+const DOT_CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp']);
+
+/** The extractor's chained-receiver encoding: `<inner>().<method>`. */
+const CHAIN_SHAPE = /^(.+)\(\)\.(\w+)$/;
+
 /**
  * Cache size limits. Each per-resolver cache is bounded so memory
  * stays flat on large codebases (20k+ files). Sizes were chosen to
@@ -185,6 +196,12 @@ export class ReferenceResolver {
   private queries: QueryBuilder;
   private context: ResolutionContext;
   private frameworks: FrameworkResolver[] = [];
+  // Chained static-factory/fluent call refs the first pass couldn't resolve,
+  // collected in-memory (the batched resolver deletes unresolved refs from the
+  // DB, so they can't be re-read). Drained by resolveChainedCallsViaConformance
+  // once implements/extends edges exist, to resolve methods on a supertype the
+  // receiver conforms to (#750).
+  private deferredChainRefs: UnresolvedRef[] = [];
   // Per-`.razor`/`.cshtml`-file `@using` namespace set (own directives + folder
   // `_Imports.razor`, cascading to the project root). Used to disambiguate a
   // markup type ref to the right C# namespace.
@@ -400,6 +417,25 @@ export class ReferenceResolver {
         return result;
       },
 
+      getSupertypes: (typeName: string, language) => {
+        // Union the `implements`/`extends` targets of every same-named type node.
+        // Matching by simple name (not id) reconciles a type declared in one node
+        // (`KF::Builder`) with conformance declared in a separate extension node
+        // (`KF.Builder: KFOptionSetter`) — both have name `Builder`.
+        const typeNodes = this.context
+          .getNodesByName(typeName)
+          .filter((n) => SUPERTYPE_BEARING_KINDS.has(n.kind) && n.language === language);
+        if (typeNodes.length === 0) return [];
+        const supertypes = new Set<string>();
+        for (const tn of typeNodes) {
+          for (const edge of this.queries.getOutgoingEdges(tn.id, ['implements', 'extends'])) {
+            const target = this.queries.getNodeById(edge.target);
+            if (target?.name && target.name !== typeName) supertypes.add(target.name);
+          }
+        }
+        return [...supertypes];
+      },
+
       getImportMappings: (filePath: string, language) => {
         const cacheKey = filePath;
         const cached = this.importMappingCache.get(cacheKey);
@@ -684,7 +720,19 @@ export class ReferenceResolver {
       candidates.push(nameResult);
     }
 
-    if (candidates.length === 0) return null;
+    if (candidates.length === 0) {
+      // Defer a chained static-factory/fluent call the first pass couldn't
+      // resolve — its method may live on a supertype the receiver conforms to,
+      // resolvable once implements/extends edges exist (the conformance pass).
+      if (
+        ref.referenceKind === 'calls' &&
+        DOT_CHAIN_LANGUAGES.has(ref.language) &&
+        CHAIN_SHAPE.test(ref.referenceName)
+      ) {
+        this.deferredChainRefs.push(ref);
+      }
+      return null;
+    }
 
     // Return highest confidence candidate
     return candidates.reduce((best, curr) =>
@@ -767,6 +815,43 @@ export class ReferenceResolver {
     return result;
   }
 
+  /**
+   * Second resolution pass for chained static-factory / fluent calls whose
+   * chained method is defined on a SUPERTYPE the receiver's type conforms to —
+   * a protocol-extension / inherited / default-interface method (#750). The
+   * first pass can't resolve these because `implements`/`extends` edges aren't
+   * built yet; this runs AFTER edges are persisted, so `context.getSupertypes`
+   * (and the conformance fallback in resolveMethodOnType) can walk them.
+   *
+   * Operates only on the leftover unresolved refs that have the `inner().method`
+   * chain shape, for the dotted-chain languages — a small set — and is idempotent
+   * (re-resolving an already-resolved ref is a no-op since it's been deleted).
+   * Returns the number of newly-created edges.
+   */
+  resolveChainedCallsViaConformance(): number {
+    const deferred = this.deferredChainRefs;
+    this.deferredChainRefs = [];
+    if (deferred.length === 0) return 0;
+
+    // Read fresh edges (the main pass built the implements/extends edges after
+    // these refs were deferred). matchDottedCallChain now resolves a method on a
+    // supertype via context.getSupertypes -> resolveMethodOnType's conformance walk.
+    this.clearCaches();
+    const resolved: ResolvedRef[] = [];
+    for (const ref of deferred) {
+      const match = this.gateLanguage(matchDottedCallChain(ref, this.context), ref);
+      if (match) resolved.push(match);
+    }
+    if (resolved.length === 0) return 0;
+
+    const edges = this.createEdges(resolved);
+    if (edges.length > 0) {
+      this.queries.insertEdges(edges);
+      this.clearCaches();
+    }
+    return edges.length;
+  }
+
   /**
    * Resolve and persist in batches to keep memory bounded.
    * Processes unresolved references in chunks, persisting edges and cleaning
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index aa235c505..f986e14b1 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -267,6 +267,8 @@ function resolveMethodOnType(
    * signal Java imports carry but the call site doesn't (#314).
    */
   preferredFqn?: string,
+  /** Recursion guard for the supertype/conformance walk. */
+  depth = 0,
 ): ResolvedRef | null {
   // Look up methods by name and match by qualifiedName ending in
   // `<typeName>::<methodName>`. This works whether the method is defined
@@ -284,7 +286,24 @@ function resolveMethodOnType(
       matches.push(m);
     }
   }
-  if (matches.length === 0) return null;
+  if (matches.length === 0) {
+    // Conformance fallback: the method may be defined on a supertype `typeName`
+    // extends, or on a protocol / trait it conforms to (e.g. a Swift protocol-
+    // extension method, a C# default-interface or extension method, a Kotlin
+    // extension on a supertype). Walk supertypes transitively (depth-capped) via
+    // the resolved implements/extends edges — empty in the first resolution pass,
+    // populated in the conformance pass. Still VALIDATED (the method must exist on
+    // a supertype), so a wrong inference produces no edge.
+    if (depth < 4 && context.getSupertypes) {
+      for (const supertype of context.getSupertypes(typeName, ref.language)) {
+        const via = resolveMethodOnType(
+          supertype, methodName, ref, context, confidence, resolvedBy, preferredFqn, depth + 1,
+        );
+        if (via) return via;
+      }
+    }
+    return null;
+  }
 
   if (matches.length > 1 && preferredFqn) {
     const ext = ref.language === 'kotlin' ? '.kt' : '.java';
diff --git a/src/resolution/types.ts b/src/resolution/types.ts
index 2cbf7a697..8c2fc168f 100644
--- a/src/resolution/types.ts
+++ b/src/resolution/types.ts
@@ -81,6 +81,16 @@ export interface ResolutionContext {
   getAllFiles(): string[];
   /** Get nodes by lowercase name (O(1) lookup for fuzzy matching) */
   getNodesByLowerName(lowerName: string): Node[];
+  /**
+   * Direct supertypes of the type named `typeName` (same language): the classes
+   * it extends and the interfaces / protocols / traits it implements/conforms to,
+   * by simple name. Backed by the resolved `implements`/`extends` edges, so it is
+   * EMPTY during the first resolution pass (edges aren't built yet) and populated
+   * afterward — the conformance pass uses it to resolve a chained method defined
+   * on a supertype the receiver type conforms to (e.g. a protocol-extension
+   * method). Optional so external/test contexts compile without it.
+   */
+  getSupertypes?(typeName: string, language: Language): string[];
   /** Get cached import mappings for a file */
   getImportMappings(filePath: string, language: Language): ImportMapping[];
   /**

From 7c7f0dd56fc9e545bc02436b599cfaece26ebd23 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 01:54:12 -0400
Subject: [PATCH 16/51] fix(swift): resolve chained static-factory/fluent calls
 + nested-extension naming (#750) (#755)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Completes Swift in the #750 chained-call series (after Java #751, Kotlin #752,
C# #753, conformance #754). Two parts:

1. Swift chained-call resolution (the #645/#608 mechanism): capture Swift return
   types (positional, member types -> last segment), encode capitalized-receiver
   chains `Foo.make().draw()` / `Foo(args).draw()`, resolve+validate via the
   shared matchDottedCallChain (+ constructor branch). Fixes the decoy wrong-edge
   bug where a chained method dropped to a bare name and attached to a same-named
   method on an unrelated class.

2. Nested-type extension naming fix: `extension KF.Builder: KFOptionSetter` parsed
   as a class_declaration named `KF.Builder` (dot) — inconsistent with the type's
   own declaration `KF::Builder` (name `Builder`) — so the extension's conformances
   and members were invisible to a chained call on the type. A Swift resolveName
   now names a nested-type extension by its last segment (`Builder`), so its
   `implements`/`extends` edges and methods are found by the supertype walk
   (conformance #754) and the simple-name method match.

Validated: synthetic decoy + args + constructor + absent-method tests; full suite
green; nested-extension repro (`KF.url().onSuccess()` resolves via conformance to
the protocol method). Real-repo A/B vs main (conformance) — Alamofire and
Kingfisher both **0 added / 0 removed, node count unchanged**: NEUTRAL and SAFE.
The prior -168 Kingfisher regression (from the naming inconsistency) is eliminated;
Swift's unique-named fluent methods already resolved by bare name, so the chain
path lands the same edges — the value here is decoy-collision correctness, the
nested-extension naming fix, and consistency with the other four languages.
EXTRACTION_VERSION 9 -> 10.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 66 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/swift.ts    | 55 +++++++++++++++++++++++
 src/extraction/tree-sitter.ts        | 32 ++++++++------
 src/resolution/index.ts              |  2 +-
 src/resolution/name-matcher.ts       | 30 +++++++++----
 7 files changed, 163 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69ded4f7f..10a48ff48 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### Fixes
 
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
+- Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
 - Kotlin method calls made through a companion-object factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to drop the receiver entirely, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Kotlin return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Kotlin indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Kotlin)
 - Java method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.getInstance().bar()` or `Config.create(opts).build()` used to lose the receiver's type, so when two classes had a same-named method the call silently attached to whichever was indexed first — or didn't resolve at all — corrupting callers, impact, and trace. CodeGraph now captures Java return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Covers factories and fluent builders that take arguments (`hashKeys().arrayListValues()`), including builders that return a nested type. Existing Java indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Java)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 9ddd16159..dc4f7a4b9 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2403,6 +2403,72 @@ class Caller {
     });
   });
 
+  describe('Swift chained static-factory call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves Foo.make().draw() via the factory return type, never a same-named decoy', async () => {
+      // Aaa sorts first and has a same-named draw() — without the fix Swift dropped
+      // the receiver to a bare `draw` and attached to Aaa (a wrong edge).
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.swift'),
+        `class Aaa { func draw() {} }
+class Foo {
+    static func make() -> Foo { return Foo() }
+    func draw() {}
+}
+func runCaller() { Foo.make().draw() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::draw')).toEqual(['runCaller']);
+      expect(callerNamesOf('Aaa::draw')).toEqual([]);
+    });
+
+    it('resolves a constructor chain Foo().draw() and an args factory chain Foo.build(c).render()', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.swift'),
+        `class Config {}
+class Foo {
+    static func build(_ c: Config) -> Foo { return Foo() }
+    func draw() {}
+    func render() {}
+}
+func runCaller() {
+    Foo().draw()
+    Foo.build(Config()).render()
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::draw')).toEqual(['runCaller']);
+      expect(callerNamesOf('Foo::render')).toEqual(['runCaller']);
+    });
+
+    it('creates NO edge when the factory return type lacks the method (silent miss, not a wrong edge)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.swift'),
+        `class Foo {
+    static func make() -> Foo { return Foo() }
+}
+class Other { func onlyOther() {} }
+func runCaller() { Foo.make().onlyOther() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Foo has no onlyOther() — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
+
   describe('Chained call resolves a method on a supertype (conformance, #750)', () => {
     function callerNamesOf(qualifiedName: string): string[] {
       const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index f2ea639c7..27e2fdc76 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 9;
+export const EXTRACTION_VERSION = 10;
diff --git a/src/extraction/languages/swift.ts b/src/extraction/languages/swift.ts
index 373fa8a9e..e9401d372 100644
--- a/src/extraction/languages/swift.ts
+++ b/src/extraction/languages/swift.ts
@@ -2,6 +2,44 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+/**
+ * A Swift function's declared return type, normalized to the bare class name a
+ * chained `Foo.make().draw()` could be called on (the #645/#608 mechanism).
+ * tree-sitter-swift labels BOTH the function name (`simple_identifier`) and the
+ * return type (a `user_type`) with the field `name`, so `childForFieldName`
+ * returns the name; the return type is found positionally — the first type node
+ * after the `simple_identifier` name, before the body. Optionals (`Foo?`) are
+ * unwrapped; arrays/tuples/function types and `Void` yield undefined.
+ */
+function extractSwiftReturnType(node: SyntaxNode, source: string): string | undefined {
+  let seenName = false;
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (!child) continue;
+    if (child.type === 'simple_identifier' && !seenName) {
+      seenName = true;
+      continue;
+    }
+    if (!seenName) continue;
+    if (child.type === 'function_body') return undefined; // body reached: no return type
+    let typeNode: SyntaxNode | null = null;
+    if (child.type === 'user_type') typeNode = child;
+    else if (child.type === 'optional_type') {
+      typeNode = child.namedChildren.find((c: SyntaxNode) => c.type === 'user_type') ?? null;
+    }
+    if (typeNode) {
+      // Use the whole type node's text, strip generics, then take the LAST
+      // dotted segment — a member type `KF.Builder` resolves to `Builder` (its
+      // first type_identifier is the OUTER `KF`, which would be wrong).
+      const name = getNodeText(typeNode, source).trim().replace(/<[^>]*>/g, '');
+      const last = name.split('.').pop()?.trim();
+      if (!last || !/^[A-Za-z_]\w*$/.test(last) || last === 'Void') return undefined;
+      return last;
+    }
+  }
+  return undefined;
+}
+
 export const swiftExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration'],
   classTypes: ['class_declaration'],
@@ -18,6 +56,23 @@ export const swiftExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameter',
   returnField: 'return_type',
+  getReturnType: extractSwiftReturnType,
+  resolveName: (node, source) => {
+    // A nested-type extension `extension KF.Builder { … }` parses as a
+    // class_declaration whose `name` is a multi-segment `user_type` (`KF.Builder`
+    // = type_identifiers `KF`, `Builder`). Name the node by the LAST segment
+    // (`Builder`) so it shares the simple name of the extended type's own
+    // declaration (`struct Builder` → `KF::Builder`) instead of becoming a
+    // distinct `KF.Builder` node. Without this, the extension's conformances and
+    // members are invisible to a chained call on the type — supertype lookup and
+    // method matching both key off the simple name (#750). Simple names (regular
+    // class/struct/enum, or `extension Plain`) fall through to default extraction.
+    if (node.type !== 'class_declaration') return undefined;
+    const nameNode = getChildByField(node, 'name');
+    if (!nameNode || nameNode.type !== 'user_type') return undefined;
+    const ids = nameNode.namedChildren.filter((c: SyntaxNode) => c.type === 'type_identifier');
+    return ids.length > 1 ? getNodeText(ids[ids.length - 1]!, source) : undefined;
+  },
   getSignature: (node, source) => {
     // Swift function signature: func name(params) -> ReturnType
     const params = getChildByField(node, 'parameter');
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index b64f8d753..7fef67562 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2525,32 +2525,36 @@ export class TreeSitterExtractor {
                 calleeName = methodName;
               }
             } else if (
-              (this.language === 'cpp' || this.language === 'c' || this.language === 'kotlin') &&
+              (this.language === 'cpp' ||
+                this.language === 'c' ||
+                this.language === 'kotlin' ||
+                this.language === 'swift') &&
               receiver &&
               receiver.type === 'call_expression'
             ) {
               // Receiver that is itself a call — `Foo::instance().bar()`,
               // `openSession()->run()`, `mgr.view().render()` (C/C++), or
-              // `Foo.getInstance().bar()` (Kotlin). Keep the inner call so
-              // resolution can infer bar()'s class from what the inner call
-              // RETURNS (#645/#608). Encode as `<innerCallee>().<method>`; the
-              // `().` marker never appears in an ordinary ref, so the resolver
+              // `Foo.getInstance().bar()` (Kotlin) / `Foo.make().draw()` (Swift).
+              // Keep the inner call so resolution can infer bar()'s class from what
+              // the inner call RETURNS (#645/#608). Encode as `<innerCallee>().<method>`;
+              // the `().` marker never appears in an ordinary ref, so the resolver
               // can detect and split it. Other languages keep the bare-name
               // behavior (dropping the receiver) below.
               let innerCallee: string;
               let reencode: boolean;
-              if (this.language === 'kotlin') {
-                // tree-sitter-kotlin has no field names — the inner callee is the
+              if (this.language === 'kotlin' || this.language === 'swift') {
+                // tree-sitter-kotlin/swift expose the inner callee as the
                 // call_expression's first named child (a navigation_expression
-                // `Foo.getInstance`, or a bare identifier for a free call).
+                // `Foo.getInstance`, or a bare identifier for a free/constructor call).
                 const innerNav = receiver.namedChild(0);
                 innerCallee = innerNav ? getNodeText(innerNav, this.source).replace(/\s+/g, '') : '';
-                // Only re-encode a CLASS / companion-factory chain, whose receiver
-                // chain starts with a capitalized type (`Foo.getInstance().bar()`).
-                // An instance chain (`list.filter{}.map{}`) has a lowercase receiver
-                // whose type we can't recover here — re-encoding it would only drop
-                // the edge (no chain resolution, no bare-name fallback), regressing
-                // recall in fluent codebases. Leave those to the bare-name path.
+                // Only re-encode a CLASS / companion-factory / constructor chain,
+                // whose receiver chain starts with a capitalized type
+                // (`Foo.getInstance().bar()`, `Foo().bar()`). An instance chain
+                // (`list.filter{}.map{}`) has a lowercase receiver whose type we
+                // can't recover here — re-encoding it would only drop the edge (no
+                // chain resolution, no bare-name fallback), regressing recall in
+                // fluent codebases. Leave those to the bare-name path.
                 reencode = /^[A-Z]/.test(innerCallee);
               } else {
                 const innerFn = getChildByField(receiver, 'function');
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index ffada8d5a..a2d237a34 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -33,7 +33,7 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
 ]);
 
 /** Languages whose chained calls use the dotted `inner().method` encoding. */
-const DOT_CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp']);
+const DOT_CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
 const CHAIN_SHAPE = /^(.+)\(\)\.(\w+)$/;
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index f986e14b1..5b90f126d 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -595,6 +595,13 @@ export function matchPhpCallChain(
   return resolveMethodOnType(resolvedClass, method, ref, context, 0.85, 'instance-method');
 }
 
+/**
+ * Languages where an unprefixed capitalized call `Foo(args)` constructs the
+ * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`,
+ * so a bare `Foo()` there is a method call, not construction — excluded.
+ */
+const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift']);
+
 /**
  * Resolve a dotted chained call whose receiver is a static factory / fluent call —
  * `Foo.getInstance().bar()`, encoded by the extractor as `Foo.getInstance().bar`
@@ -603,7 +610,7 @@ export function matchPhpCallChain(
  * it (resolveMethodOnType requires `Type::method` to exist), so a wrong inference
  * yields no edge rather than a wrong one (e.g. a same-named `bar()` on an
  * unrelated class is never matched). Shared by the dot-notation languages
- * (Java, Kotlin, C#) — same receiver shape, same `Class::method` qualified names.
+ * (Java, Kotlin, C#, Swift) — same receiver shape, same `Class::method` qualified names.
  */
 export function matchDottedCallChain(
   ref: UnresolvedRef,
@@ -617,13 +624,13 @@ export function matchDottedCallChain(
 
   // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
   // capitalized inner is a class construction, so the receiver's type is the
-  // class itself — resolve the method on it. Kotlin only: there an unprefixed
-  // capitalized call constructs the class, whereas in Java a bare `Foo()` is a
-  // method call (constructors need `new`), so we must not assume construction.
-  // A lowercase bare inner is a top-level `factory().method()` whose type we
-  // can't recover — bail.
+  // class itself — resolve the method on it. Only in languages where an
+  // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
+  // a bare `Foo()` is a method call (constructors need `new`), so we must not
+  // assume construction. A lowercase bare inner is a top-level `factory().method()`
+  // whose type we can't recover — bail.
   if (lastDot <= 0) {
-    if (ref.language !== 'kotlin' || !/^[A-Z]/.test(inner)) return null;
+    if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null;
     return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
   }
 
@@ -1081,11 +1088,16 @@ export function matchReference(
     if (result) return result;
   }
 
-  // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C#) —
+  // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift) —
   // `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar` (#645/#608
   // mechanism). Resolve bar's class from getInstance's declared return type, then
   // validate the method on it.
-  if (ref.language === 'java' || ref.language === 'kotlin' || ref.language === 'csharp') {
+  if (
+    ref.language === 'java' ||
+    ref.language === 'kotlin' ||
+    ref.language === 'csharp' ||
+    ref.language === 'swift'
+  ) {
     result = matchDottedCallChain(ref, context);
     if (result) return result;
   }

From 5805f01957c39320b0532a942a426f5a697f1682 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 02:41:59 -0400
Subject: [PATCH 17/51] fix(rust): resolve chained associated-function calls
 Foo::new().bar() (#750) (#757)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A Rust call through a chained associated function — `Foo::new().bar()`,
`Foo::with(cfg).build()` — dropped the receiver to a bare method name, which
then attached to a same-named method on an unrelated type (a wrong edge) or
didn't resolve. Ports the #645/#608 mechanism for Rust's `::` receivers:

- Part 1: capture Rust return types; `-> Self` yields the `self` marker (resolved
  to the impl's own type, like PHP), references/generics are unwrapped/reduced.
- Part 2: encode an associated-function chain (`Foo::new().bar`), gated to a
  scoped_identifier receiver so instance chains (`x.foo().bar()`) keep bare-name.
- Part 3: resolve via matchScopedCallChain (PHP's `::` resolver, generalized),
  validated by resolveMethodOnType. Wire Rust into the conformance second pass
  (matchScopedCallChain variant) so a chained method provided by a trait the type
  implements (`impl Trait for Type` → existing implements edges) resolves too.

Validated: synthetic decoy + args + Self + trait-default-conformance + absent
safety tests; full suite green (lone failure is the known-flaky #662 daemon test,
passes in isolation). Real-repo A/B vs main: clap (329 .rs) a net precision win —
**+937 added (96% correct builder methods), 622 wrong->right retargets**
(`Command::new().arg()` was mis-resolving to `ArgGroup::arg`, now `Command::arg`),
+162 net unique edges; the pure-drops are largely wrong bare-name edges the fix
correctly stops emitting. tokio-rs/bytes 0/0 (no regression). Known limit: the
single-hop mechanism re-encodes only the first hop of a chain (deeper hops keep
bare-name) — clap's unusually deep builder chains are partly covered.
EXTRACTION_VERSION 10 -> 11.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 83 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/rust.ts     | 31 +++++++++++
 src/extraction/tree-sitter.ts        | 25 ++++++---
 src/resolution/index.ts              | 20 +++++--
 src/resolution/name-matcher.ts       | 29 +++++-----
 7 files changed, 163 insertions(+), 28 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 10a48ff48..c3badbbb8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index dc4f7a4b9..c91f76b72 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2534,4 +2534,87 @@ class Caller {
       expect(callerNamesOf('Other::onlyOther')).toEqual([]);
     });
   });
+
+  describe('Rust chained associated-function call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves Foo::new().bar() (and a Self return) via the associated fn, never a same-named decoy', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.rs'),
+        `struct Aaa { _x: i32 }
+impl Aaa { fn bar(&self) {} }
+struct Foo { _x: i32 }
+impl Foo {
+    fn new() -> Foo { Foo { _x: 0 } }
+    fn make() -> Self { Foo { _x: 0 } }
+    fn bar(&self) {}
+}
+fn caller() {
+    Foo::new().bar();
+    Foo::make().bar();
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::bar')).toEqual(['caller']);
+      expect(callerNamesOf('Aaa::bar')).toEqual([]);
+    });
+
+    it('resolves a chain that passes arguments — Foo::with(c).build()', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.rs'),
+        `struct Config;
+struct Foo { _x: i32 }
+impl Foo {
+    fn with(c: Config) -> Foo { Foo { _x: 0 } }
+    fn build(&self) {}
+}
+fn caller() { Foo::with(Config).build(); }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::build')).toEqual(['caller']);
+    });
+
+    it('resolves a chained method from a trait the type implements (default method, via conformance)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.rs'),
+        `struct Foo { _x: i32 }
+impl Foo { fn new() -> Foo { Foo { _x: 0 } } }
+struct Decoy { _x: i32 }
+impl Decoy { fn draw(&self) {} }
+trait Drawable { fn draw(&self) {} }
+impl Drawable for Foo {}
+fn caller() { Foo::new().draw(); }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Drawable::draw')).toEqual(['caller']);
+      expect(callerNamesOf('Decoy::draw')).toEqual([]);
+    });
+
+    it('creates NO edge when neither the type nor a supertype has the method (silent miss)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.rs'),
+        `struct Foo { _x: i32 }
+impl Foo { fn new() -> Foo { Foo { _x: 0 } } }
+struct Other { _x: i32 }
+impl Other { fn only_other(&self) {} }
+fn caller() { Foo::new().only_other(); }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Foo has no only_other() — must not mis-attach to the same-named Other::only_other.
+      expect(callerNamesOf('Other::only_other')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 27e2fdc76..de8f45176 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 10;
+export const EXTRACTION_VERSION = 11;
diff --git a/src/extraction/languages/rust.ts b/src/extraction/languages/rust.ts
index a68163cc3..bdc4477ba 100644
--- a/src/extraction/languages/rust.ts
+++ b/src/extraction/languages/rust.ts
@@ -2,6 +2,36 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+/**
+ * A Rust function's declared return type, normalized to the bare type a chained
+ * `Foo::new().bar()` could be called on (the #645/#608 mechanism). Reads the
+ * `return_type` field: `-> Self` yields the marker `self` (resolved to the impl's
+ * own type at resolution time, like PHP's `self`/`static`); a concrete `-> Foo` /
+ * `-> FooBuilder` its name; a reference (`&Foo`) is unwrapped; generics are reduced
+ * to the base type (`Vec<Foo>` → `Vec`); primitives / unit / tuple yield undefined.
+ * Stdlib types that aren't in the graph simply fail the later existence check.
+ */
+function extractRustReturnType(node: SyntaxNode, source: string): string | undefined {
+  let rt = getChildByField(node, 'return_type');
+  if (!rt) return undefined;
+  if (rt.type === 'reference_type') {
+    rt =
+      rt.namedChildren.find(
+        (c: SyntaxNode) =>
+          c.type === 'type_identifier' ||
+          c.type === 'scoped_type_identifier' ||
+          c.type === 'generic_type',
+      ) ?? rt;
+  }
+  if (!rt || rt.type === 'primitive_type' || rt.type === 'unit_type' || rt.type === 'tuple_type') {
+    return undefined;
+  }
+  const text = getNodeText(rt, source).trim().replace(/<[^>]*>/g, '');
+  const last = text.split('::').pop()?.trim();
+  if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last === 'Self' ? 'self' : last;
+}
+
 export const rustExtractor: LanguageExtractor = {
   // `function_signature_item` is a trait method DECLARATION (`fn render(&self);`,
   // no body). Extracting it makes a trait's method set first-class, which
@@ -23,6 +53,7 @@ export const rustExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameters',
   returnField: 'return_type',
+  getReturnType: extractRustReturnType,
   getSignature: (node, source) => {
     const params = getChildByField(node, 'parameters');
     const returnType = getChildByField(node, 'return_type');
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 7fef67562..6ac38ddfc 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2528,18 +2528,19 @@ export class TreeSitterExtractor {
               (this.language === 'cpp' ||
                 this.language === 'c' ||
                 this.language === 'kotlin' ||
-                this.language === 'swift') &&
+                this.language === 'swift' ||
+                this.language === 'rust') &&
               receiver &&
               receiver.type === 'call_expression'
             ) {
               // Receiver that is itself a call — `Foo::instance().bar()`,
-              // `openSession()->run()`, `mgr.view().render()` (C/C++), or
-              // `Foo.getInstance().bar()` (Kotlin) / `Foo.make().draw()` (Swift).
-              // Keep the inner call so resolution can infer bar()'s class from what
-              // the inner call RETURNS (#645/#608). Encode as `<innerCallee>().<method>`;
-              // the `().` marker never appears in an ordinary ref, so the resolver
-              // can detect and split it. Other languages keep the bare-name
-              // behavior (dropping the receiver) below.
+              // `openSession()->run()`, `mgr.view().render()` (C/C++),
+              // `Foo.getInstance().bar()` (Kotlin) / `Foo.make().draw()` (Swift), or
+              // `Foo::new().bar()` (Rust). Keep the inner call so resolution can
+              // infer bar()'s class from what the inner call RETURNS (#645/#608).
+              // Encode as `<innerCallee>().<method>`; the `().` marker never appears
+              // in an ordinary ref, so the resolver can detect and split it. Other
+              // languages keep the bare-name behavior (dropping the receiver) below.
               let innerCallee: string;
               let reencode: boolean;
               if (this.language === 'kotlin' || this.language === 'swift') {
@@ -2561,7 +2562,13 @@ export class TreeSitterExtractor {
                 innerCallee = innerFn
                   ? getNodeText(innerFn, this.source).replace(/->/g, '.').replace(/\s+/g, '')
                   : '';
-                reencode = !!innerCallee;
+                // Rust: only re-encode an associated-function chain
+                // (`Foo::new().bar()`), whose inner callee is a path/`scoped_identifier`.
+                // An instance chain (`x.foo().bar()`, inner callee a field_expression)
+                // keeps bare-name — the `::` resolver can't recover a variable's type,
+                // so re-encoding would only drop the edge. C/C++ re-encode any inner.
+                reencode =
+                  this.language === 'rust' ? innerFn?.type === 'scoped_identifier' : !!innerCallee;
               }
               calleeName = reencode ? `${innerCallee}().${methodName}` : methodName;
             } else {
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index a2d237a34..0cb14098d 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -16,7 +16,7 @@ import {
   FrameworkResolver,
   ImportMapping,
 } from './types';
-import { matchReference, matchDottedCallChain, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
+import { matchReference, matchDottedCallChain, matchScopedCallChain, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
 import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs, isPhpIncludePathRef } from './import-resolver';
 import { detectFrameworks } from './frameworks';
 import { synthesizeCallbackEdges } from './callback-synthesizer';
@@ -32,8 +32,13 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
   'class', 'struct', 'interface', 'trait', 'protocol', 'enum',
 ]);
 
-/** Languages whose chained calls use the dotted `inner().method` encoding. */
-const DOT_CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift']);
+/**
+ * Languages whose chained static-factory/fluent calls defer to the conformance
+ * second pass. Dotted-receiver languages resolve via matchDottedCallChain; the
+ * `::`-receiver ones (Rust) via matchScopedCallChain.
+ */
+const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust']);
+const SCOPED_CHAIN_LANGUAGES = new Set(['rust']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
 const CHAIN_SHAPE = /^(.+)\(\)\.(\w+)$/;
@@ -726,7 +731,7 @@ export class ReferenceResolver {
       // resolvable once implements/extends edges exist (the conformance pass).
       if (
         ref.referenceKind === 'calls' &&
-        DOT_CHAIN_LANGUAGES.has(ref.language) &&
+        CHAIN_LANGUAGES.has(ref.language) &&
         CHAIN_SHAPE.test(ref.referenceName)
       ) {
         this.deferredChainRefs.push(ref);
@@ -839,7 +844,12 @@ export class ReferenceResolver {
     this.clearCaches();
     const resolved: ResolvedRef[] = [];
     for (const ref of deferred) {
-      const match = this.gateLanguage(matchDottedCallChain(ref, this.context), ref);
+      // `::`-receiver languages (Rust) split on `::` (matchScopedCallChain);
+      // dotted-receiver languages on `.` (matchDottedCallChain).
+      const chainMatch = SCOPED_CHAIN_LANGUAGES.has(ref.language)
+        ? matchScopedCallChain(ref, this.context)
+        : matchDottedCallChain(ref, this.context);
+      const match = this.gateLanguage(chainMatch, ref);
       if (match) resolved.push(match);
     }
     if (resolved.length === 0) return 0;
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index 5b90f126d..de9a3c362 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -570,15 +570,17 @@ export function matchCppCallChain(
 }
 
 /**
- * Resolve a PHP fluent static-factory chain whose receiver is a static call —
- * `Cls::for($x)->method()`, encoded by the extractor as `Cls::for().method`
- * (#608, the per-credential Laravel client idiom). The receiver's type is what
- * `Cls::for` returns: a `: self` / `: static` resolves to `Cls` itself, a
- * concrete `: Type` to that type. The outer method is then resolved and
- * VALIDATED on it (resolveMethodOnType requires the method to exist), so a
- * wrong inference yields no edge rather than a wrong one.
+ * Resolve a `::`-scoped factory chain whose receiver is a scoped/static call —
+ * PHP `Cls::for($x)->method()` (#608, the per-credential Laravel client idiom) or
+ * Rust `Foo::new().bar()` (an associated-function call) — both encoded by the
+ * extractor as `Cls::factory().method`. The receiver's type is what `Cls::factory`
+ * returns: a `self` marker (PHP `: self`/`: static`, Rust `-> Self`) resolves to
+ * the factory's own type, a concrete return type to that type. The outer method is
+ * then resolved and VALIDATED on it (resolveMethodOnType requires the method to
+ * exist on the type or a supertype it conforms to), so a wrong inference yields no
+ * edge rather than a wrong one. Shared by the `::`-receiver languages (PHP, Rust).
  */
-export function matchPhpCallChain(
+export function matchScopedCallChain(
   ref: UnresolvedRef,
   context: ResolutionContext,
 ): ResolvedRef | null {
@@ -1080,11 +1082,12 @@ export function matchReference(
     if (result) return result;
   }
 
-  // 1c. PHP fluent static-factory chain — `Cls::for($x)->method()` encoded as
-  // `Cls::for().method` (#608). Same idea as 1b: the receiver's type is the
-  // factory's `: self` / `: Type` return.
-  if (ref.language === 'php') {
-    result = matchPhpCallChain(ref, context);
+  // 1c. `::`-scoped factory chain — PHP `Cls::for($x)->method()` (#608) or Rust
+  // `Foo::new().bar()`, both encoded as `Cls::factory().method`. The receiver's
+  // type is the factory's `self` (PHP `: self`/`: static`, Rust `-> Self`) or
+  // concrete return type.
+  if (ref.language === 'php' || ref.language === 'rust') {
+    result = matchScopedCallChain(ref, context);
     if (result) return result;
   }
 

From ccced9e358bd534541584c1982e08ea79cc96685 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 11:31:24 -0400
Subject: [PATCH 18/51] fix(go): resolve chained factory-function calls
 New().Method() (#750) (#760)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(go): resolve chained factory-function calls New().Method() (#750)

A Go call through a chained factory function — `New().Method()`,
`With(cfg).Build()` — dropped the receiver to a bare method name, which then
attached to a same-named method on an unrelated type (a wrong edge) or didn't
resolve. Ports the #645/#608 mechanism for Go's bare-factory receivers:

- Part 1: capture Go return types; a pointer `*Foo` -> `Foo`, a multi-return
  `(*Foo, error)` -> its first result, qualified `pkg.Foo` -> `Foo`.
- Part 2: encode a bare-factory chain (`New().Method`), gated to an `identifier`
  receiver so instance chains (`obj.Method().Other()`) keep bare-name.
- Part 3: matchDottedCallChain bare-inner Go branch looks up the FUNCTION's
  return type, then resolves+validates the method on it. Wired into the
  conformance pass so a method promoted from an embedded struct (`type Widget
  struct{ Base }` -> the existing `extends` edge) resolves. FALLBACK: when the
  inner isn't a resolvable function (a package-level VARIABLE holding a function
  value, e.g. gin's `engine()`), fall back to bare-name so the edge isn't dropped.

Validated: synthetic decoy + args + multi-return + embedded-conformance + absent
safety tests (4/4); full suite green. Real-repo A/B on gin (99 .go): pre-fallback
-40 = 25 wrong self-loops removed (good) + 15 correct `Engine::ServeHTTP` dropped
(gin's ginS variable-factory `engine()`); the fallback recovers the 15. gin A/B
re-confirm with the fallback is PENDING (local index flakiness, not a code issue).
EXTRACTION_VERSION 11 -> 12.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(go): stop the chained-call fallback from looping the batched resolver

The Go variable-inner fallback (for chains like `engine().ServeHTTP()` whose
inner is a package-level var, not a factory function) resolved the method via
a synthetic bare-name ref and propagated THAT ref as `.original`. Its
`referenceName` was the bare `ServeHTTP`, not the stored `engine().ServeHTTP`,
so `resolveAndPersistBatched`'s keyed `deleteSpecificResolvedReferences` no-oped,
the offset-0 batch never drained, and the loop re-resolved + re-inserted the
same rows forever — a runaway that grew a 99-file repo (gin) to 5,050,206 edges
/ 1.4 GB before filling the disk.

- name-matcher.ts: tie the bare-name match back to the original `ref` so the
  batch-cleanup delete matches the stored row and the loop drains.
- index.ts: add a non-progress guard to resolveAndPersistBatched — if the
  unresolved_refs table doesn't shrink after a batch, stop instead of growing
  the graph without bound (defense-in-depth for any future keyed-delete mismatch).
- resolution.test.ts: regression test for the variable-inner chain — asserts the
  fallback edge resolves AND the edge count stays bounded (no explosion).

gin A/B (post-fix): db 5.8 MB / 3,699 calls edges; net-zero unique-edge diff vs
main (the fallback recovers the dropped edges, adds no wrong ones). Full suite green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   1 +
 __tests__/resolution.test.ts         | 111 +++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/languages/go.ts       |  38 +++++++++
 src/extraction/tree-sitter.ts        |  28 ++++---
 src/resolution/index.ts              |  15 +++-
 src/resolution/name-matcher.ts       |  53 ++++++++++---
 7 files changed, 222 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c3badbbb8..c2f3a3de7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Go method calls made through a chained factory function now resolve to the correct type. A call like `New().Method()` used to drop the receiver, so the chained method attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Go return types (a pointer `*Foo` resolves to `Foo`, and a multi-return `(*Foo, error)` to its first result), infers the chained receiver's type from what the factory function returns, and resolves the method on it — including methods promoted from an embedded struct — creating the edge only when the type or an embedded type genuinely has the method. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Go)
 - Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index c91f76b72..5f3a80719 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2617,4 +2617,115 @@ fn caller() { Foo::new().only_other(); }
       expect(callerNamesOf('Other::only_other')).toEqual([]);
     });
   });
+
+  describe('Go chained factory-function call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves New().Bar() via the factory return type (pointer), never a same-named decoy', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.go'),
+        `package main
+type Aaa struct{}
+func (a *Aaa) Bar() {}
+type Foo struct{}
+func New() *Foo { return &Foo{} }
+func (f *Foo) Bar() {}
+func caller() { New().Bar() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::Bar')).toEqual(['caller']);
+      expect(callerNamesOf('Aaa::Bar')).toEqual([]);
+    });
+
+    it('resolves an args chain and a multi-return factory — With(c).Build(), (*Foo, error)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.go'),
+        `package main
+type Config struct{}
+type Foo struct{}
+func With(c Config) (*Foo, error) { return &Foo{}, nil }
+func (f *Foo) Build() {}
+func caller() { With(Config{}).Build() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Foo::Build')).toEqual(['caller']);
+    });
+
+    it('resolves a method provided by an embedded struct (via conformance)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.go'),
+        `package main
+type Base struct{}
+func (b *Base) Embedded() {}
+type Decoy struct{}
+func (d *Decoy) Embedded() {}
+type Widget struct{ Base }
+func NewWidget() *Widget { return &Widget{} }
+func caller() { NewWidget().Embedded() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Base::Embedded')).toEqual(['caller']);
+      expect(callerNamesOf('Decoy::Embedded')).toEqual([]);
+    });
+
+    it('creates NO edge when neither the type nor an embedded type has the method (silent miss)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.go'),
+        `package main
+type Foo struct{}
+func New() *Foo { return &Foo{} }
+type Other struct{}
+func (o *Other) OnlyOther() {}
+func caller() { New().OnlyOther() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Foo has no OnlyOther() — must not mis-attach to the same-named Other::OnlyOther.
+      expect(callerNamesOf('Other::OnlyOther')).toEqual([]);
+    });
+
+    it('falls back to bare-name resolution for a VARIABLE-inner chain without exploding the graph', async () => {
+      // `engine` is a package-level VARIABLE holding a func value, not a factory
+      // FUNCTION — so its return type can't be recovered and the chain falls back
+      // to bare-name resolution of the method (restoring the pre-re-encoding edge).
+      // Regression for the runaway this fallback originally caused: it resolved
+      // with a mutated `original.referenceName` (the bare `ServeHTTP`, not the
+      // stored `engine().ServeHTTP`), so the batched resolver's keyed delete
+      // no-oped, the offset-0 batch never drained, and edges inserted forever
+      // (5M edges / 1.4 GB on a 99-file repo). The fallback now ties the match to
+      // the original ref, and a non-progress guard backstops the loop.
+      fs.writeFileSync(
+        path.join(tempDir, 'main.go'),
+        `package main
+type Server struct{}
+func (s *Server) ServeHTTP() {}
+var engine = func() *Server { return &Server{} }
+func caller() { engine().ServeHTTP() }
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Recall: the variable-inner chain still finds the method by bare name.
+      expect(callerNamesOf('Server::ServeHTTP')).toEqual(['caller']);
+      // No runaway: a single call site yields a single edge, not millions.
+      const target = cg
+        .getNodesByKind('method')
+        .find((n) => n.qualifiedName === 'Server::ServeHTTP')!;
+      const rawCalls = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls');
+      expect(rawCalls.length).toBeLessThan(5);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index de8f45176..85b796f7a 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 11;
+export const EXTRACTION_VERSION = 12;
diff --git a/src/extraction/languages/go.ts b/src/extraction/languages/go.ts
index d6df2680b..5e41fbfaa 100644
--- a/src/extraction/languages/go.ts
+++ b/src/extraction/languages/go.ts
@@ -1,6 +1,43 @@
+import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+/**
+ * A Go function's declared return type, normalized to the bare type a chained
+ * `New().Method()` could be called on (the #645/#608 mechanism). Reads the
+ * `result` field: a pointer `*Foo` is unwrapped to `Foo`, a multi-return
+ * `(*Foo, error)` takes the first result (the idiomatic value-or-error shape),
+ * a qualified `pkg.Foo` reduces to its last segment, and generics to the base.
+ * Built-ins / unnamed results simply fail the later existence check.
+ */
+function extractGoReturnType(node: SyntaxNode, source: string): string | undefined {
+  let result = getChildByField(node, 'result');
+  if (!result) return undefined;
+  // Multi-return `(T, error)` → the first result's type.
+  if (result.type === 'parameter_list') {
+    const first = result.namedChildren.find((c: SyntaxNode) => c.type === 'parameter_declaration');
+    if (!first) return undefined;
+    result = getChildByField(first, 'type') ?? first;
+  }
+  // Unwrap a pointer `*Foo` → `Foo`.
+  if (result?.type === 'pointer_type') {
+    result =
+      result.namedChildren.find(
+        (c: SyntaxNode) =>
+          c.type === 'type_identifier' || c.type === 'qualified_type' || c.type === 'generic_type',
+      ) ?? result;
+  }
+  if (!result) return undefined;
+  const text = getNodeText(result, source)
+    .trim()
+    .replace(/^\*/, '')
+    .replace(/<[^>]*>/g, '')
+    .replace(/\[[^\]]*\]/g, ''); // strip generic args `Foo[T]`
+  const last = text.split('.').pop()?.trim(); // qualified `pkg.Foo` → `Foo`
+  if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last;
+}
+
 export const goExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration'],
   classTypes: [], // Go doesn't have classes
@@ -17,6 +54,7 @@ export const goExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameters',
   returnField: 'result',
+  getReturnType: extractGoReturnType,
   getSignature: (node, source) => {
     const params = getChildByField(node, 'parameters');
     const result = getChildByField(node, 'result');
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 6ac38ddfc..ace1c2daf 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2529,18 +2529,19 @@ export class TreeSitterExtractor {
                 this.language === 'c' ||
                 this.language === 'kotlin' ||
                 this.language === 'swift' ||
-                this.language === 'rust') &&
+                this.language === 'rust' ||
+                this.language === 'go') &&
               receiver &&
               receiver.type === 'call_expression'
             ) {
               // Receiver that is itself a call — `Foo::instance().bar()`,
               // `openSession()->run()`, `mgr.view().render()` (C/C++),
-              // `Foo.getInstance().bar()` (Kotlin) / `Foo.make().draw()` (Swift), or
-              // `Foo::new().bar()` (Rust). Keep the inner call so resolution can
-              // infer bar()'s class from what the inner call RETURNS (#645/#608).
-              // Encode as `<innerCallee>().<method>`; the `().` marker never appears
-              // in an ordinary ref, so the resolver can detect and split it. Other
-              // languages keep the bare-name behavior (dropping the receiver) below.
+              // `Foo.getInstance().bar()` (Kotlin) / `Foo.make().draw()` (Swift),
+              // `Foo::new().bar()` (Rust), or `New().Method()` (Go). Keep the inner
+              // call so resolution can infer bar()'s class from what the inner call
+              // RETURNS (#645/#608). Encode as `<innerCallee>().<method>`; the `().`
+              // marker never appears in an ordinary ref, so the resolver can detect
+              // and split it. Other languages keep the bare-name behavior below.
               let innerCallee: string;
               let reencode: boolean;
               if (this.language === 'kotlin' || this.language === 'swift') {
@@ -2564,11 +2565,14 @@ export class TreeSitterExtractor {
                   : '';
                 // Rust: only re-encode an associated-function chain
                 // (`Foo::new().bar()`), whose inner callee is a path/`scoped_identifier`.
-                // An instance chain (`x.foo().bar()`, inner callee a field_expression)
-                // keeps bare-name — the `::` resolver can't recover a variable's type,
-                // so re-encoding would only drop the edge. C/C++ re-encode any inner.
-                reencode =
-                  this.language === 'rust' ? innerFn?.type === 'scoped_identifier' : !!innerCallee;
+                // Go: only a bare package-level factory chain (`New().Method()`),
+                // whose inner callee is an `identifier`. An instance chain
+                // (`x.foo().bar()` Rust, `obj.Method().Other()` Go) keeps bare-name —
+                // the resolver can't recover a variable's type, so re-encoding would
+                // only drop the edge. C/C++ re-encode any inner.
+                if (this.language === 'rust') reencode = innerFn?.type === 'scoped_identifier';
+                else if (this.language === 'go') reencode = innerFn?.type === 'identifier';
+                else reencode = !!innerCallee;
               }
               calleeName = reencode ? `${innerCallee}().${methodName}` : methodName;
             } else {
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 0cb14098d..9626687ae 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -37,7 +37,7 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
  * second pass. Dotted-receiver languages resolve via matchDottedCallChain; the
  * `::`-receiver ones (Rust) via matchScopedCallChain.
  */
-const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust']);
+const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go']);
 const SCOPED_CHAIN_LANGUAGES = new Set(['rust']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
@@ -884,6 +884,7 @@ export class ReferenceResolver {
 
     // Process in batches. We always read from offset 0 because resolved refs
     // are deleted after each batch, shifting the remaining rows forward.
+    let prevRemaining = Number.POSITIVE_INFINITY;
     while (true) {
       const batch = this.queries.getUnresolvedReferencesBatch(0, batchSize);
       if (batch.length === 0) break;
@@ -937,6 +938,18 @@ export class ReferenceResolver {
       if (result.resolved.length === 0 && result.unresolved.length === batch.length) {
         break;
       }
+
+      // Non-progress guard (defense-in-depth). Because we re-read from offset 0
+      // each pass, the unresolved_refs table MUST shrink every iteration — both
+      // resolved and unresolved refs are deleted above. If it didn't shrink, a
+      // resolver returned a match whose `original.referenceName` differs from the
+      // stored row, so the keyed delete no-ops, and we'd re-read + re-resolve +
+      // re-insert the same rows forever (the runaway that grew a 99-file repo to
+      // 5M edges / 1.4 GB before the Go-fallback fix). Stop rather than grow the
+      // graph without bound.
+      const remaining = this.queries.getUnresolvedReferencesCount();
+      if (remaining >= prevRemaining) break;
+      prevRemaining = remaining;
     }
 
     // Dynamic-edge synthesis: now that all base `calls` edges are persisted,
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index de9a3c362..a94a129d4 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -624,14 +624,42 @@ export function matchDottedCallChain(
   const method = m[2]; // `bar`
   const lastDot = inner.lastIndexOf('.');
 
-  // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
-  // capitalized inner is a class construction, so the receiver's type is the
-  // class itself — resolve the method on it. Only in languages where an
-  // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
-  // a bare `Foo()` is a method call (constructors need `new`), so we must not
-  // assume construction. A lowercase bare inner is a top-level `factory().method()`
-  // whose type we can't recover — bail.
   if (lastDot <= 0) {
+    // Go: bare package-level factory FUNCTION `New().method()` — the receiver's
+    // type is what `New` returns; resolve the method on that.
+    if (ref.language === 'go') {
+      const ret = lookupCalleeReturnType(inner, ref, context);
+      if (ret) {
+        return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
+      }
+      // `inner` isn't a function with a captured return type — typically a
+      // package-level VARIABLE holding a function value (e.g. gin's `engine()`),
+      // whose type we can't recover. Fall back to bare-name resolution of the
+      // method so we don't DROP an edge the un-re-encoded bare path would have
+      // found. (When `inner` IS a real factory function but the method doesn't
+      // exist on its return type, `ret` is truthy and we returned no edge above —
+      // the absent-method safety guarantee is preserved.)
+      //
+      // CRITICAL: resolve the TARGET via a synthetic bare-name ref, but return the
+      // match tied to the ORIGINAL `ref` (referenceName `inner().method`). The
+      // batched resolver (resolveAndPersistBatched) reads unresolved rows from
+      // offset 0 every pass and relies on deleteSpecificResolvedReferences —
+      // keyed on referenceName — to clear each resolved row so the batch empties.
+      // If we propagated the synthetic ref's bare `method` as `.original`, the
+      // delete would never match the stored `inner().method` row, the batch would
+      // never drain, and the loop would re-resolve + re-insert forever (a runaway
+      // that grew gin's graph to 5M edges / 1.4 GB before this fix).
+      const bareRef = { ...ref, referenceName: method };
+      const bareMatch = matchByExactName(bareRef, context) ?? matchFuzzy(bareRef, context);
+      return bareMatch ? { ...bareMatch, original: ref } : null;
+    }
+    // Constructor receiver `Foo(args).method()` (encoded `Foo().method`): a bare,
+    // capitalized inner is a class construction, so the receiver's type is the
+    // class itself — resolve the method on it. Only in languages where an
+    // unprefixed capitalized call constructs the class (Kotlin, Swift); in Java/C#
+    // a bare `Foo()` is a method call (constructors need `new`), so we must not
+    // assume construction. A lowercase bare inner is a top-level `factory().method()`
+    // whose type we can't recover — bail.
     if (!CONSTRUCTS_VIA_BARE_CALL.has(ref.language) || !/^[A-Z]/.test(inner)) return null;
     return resolveMethodOnType(inner, method, ref, context, 0.85, 'instance-method', importedFqnOf(inner, ref, context));
   }
@@ -1091,15 +1119,16 @@ export function matchReference(
     if (result) return result;
   }
 
-  // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift) —
-  // `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar` (#645/#608
-  // mechanism). Resolve bar's class from getInstance's declared return type, then
-  // validate the method on it.
+  // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
+  // Go) — `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar`, or Go's
+  // bare-factory `New().Method()` as `New().Method` (#645/#608 mechanism). Resolve
+  // the method's class from the inner call's declared return type, then validate it.
   if (
     ref.language === 'java' ||
     ref.language === 'kotlin' ||
     ref.language === 'csharp' ||
-    ref.language === 'swift'
+    ref.language === 'swift' ||
+    ref.language === 'go'
   ) {
     result = matchDottedCallChain(ref, context);
     if (result) return result;

From 2f96f58cbbb77410fa606412bb7d9531223b8437 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 12:09:33 -0400
Subject: [PATCH 19/51] fix(scala): resolve chained static-factory/apply calls
 Foo.create().bar() (#750) (#761)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports the #645 (C++) / #608 (PHP) chained-receiver mechanism to Scala. A call
whose receiver is itself a call — `Foo.create().bar()` (companion factory),
`Builder(cfg).bar()` (case-class apply), or a fluent chain — used to drop the
receiver to a bare `bar`, which name-matched a same-named method on an unrelated
type. The most common wrong edge was a stdlib `Option`/`Iterator` `.map`/`.flatMap`/
`.foreach` mis-attributed onto the project's own same-named class.

- scala.ts: `getReturnType` reads the `return_type` field — generic `List[Foo]`
  → container `List`, qualified `pkg.Foo` → `Foo`, `this.type` left undefined.
- tree-sitter.ts: re-encode `Foo.create().bar` when the inner call's receiver chain
  starts with a capital (companion factory / case-class apply); instance chains
  (`list.map().filter()`) stay bare.
- name-matcher.ts: `scala` joins the dotted-chain gate + CONSTRUCTS_VIA_BARE_CALL
  (case-class `apply` constructs the class); resolveMethodOnType validates, so a
  non-conventional `apply` returning another type yields no edge, not a wrong one.
- index.ts: `scala` joins CHAIN_LANGUAGES so trait-inherited methods resolve via
  the conformance second pass.

Validation: 4 synthetic tests (factory+decoy, case-class apply, trait conformance,
absent-method safety). Real-repo A/B on gatling (750 Scala files): +14 / -59 unique
edges — all corrections. The +14 are retargets (e.g. `HttpProtocolBuilder(cfg).baseUrl`
now resolves to HttpProtocolBuilder::baseUrl, not the same-named private BaseUrlSupport
helper); the -59 are wrong edges removed (stdlib Option/Iterator monad calls
mis-tied to the project's Validation::*, self-loops, decoy collisions) — zero genuine
factory chains dropped (verified: gatling has no real Validation.success().map() chains).
db stable at 40 MB. EXTRACTION_VERSION 12→13. Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 98 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/languages/scala.ts    | 23 +++++++
 src/extraction/tree-sitter.ts        |  8 ++-
 src/resolution/index.ts              |  2 +-
 src/resolution/name-matcher.ts       | 17 +++--
 7 files changed, 142 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2f3a3de7..7ba20dab3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### Fixes
 
 - Go method calls made through a chained factory function now resolve to the correct type. A call like `New().Method()` used to drop the receiver, so the chained method attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Go return types (a pointer `*Foo` resolves to `Foo`, and a multi-return `(*Foo, error)` to its first result), infers the chained receiver's type from what the factory function returns, and resolves the method on it — including methods promoted from an embedded struct — creating the edge only when the type or an embedded type genuinely has the method. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Go)
+- Scala method calls made through a companion-object factory, a fluent chain, or a case-class `apply` now resolve to the correct type. A call like `Foo.create().bar()` or `Builder(cfg).bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.flatMap` / `.foreach` onto your own same-named class. CodeGraph now captures Scala return types (a generic `List[Foo]` resolves to its container `List`, a qualified `pkg.Foo` to `Foo`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a trait the type extends — creating the edge only when that type or one of its traits genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Scala indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Scala)
 - Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 5f3a80719..f4f80a3c4 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2728,4 +2728,102 @@ func caller() { engine().ServeHTTP() }
       expect(rawCalls.length).toBeLessThan(5);
     });
   });
+
+  describe('Scala chained static-factory call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves a companion-factory chain Foo.create().doIt() to the return type, never a same-named decoy', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.scala'),
+        `object Foo {
+  def create(): Bar = new Bar()
+}
+class Bar {
+  def doIt(): Unit = {}
+}
+class Decoy {
+  def doIt(): Unit = {}
+}
+object Main {
+  def run(): Unit = { Foo.create().doIt() }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Bar::doIt')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::doIt')).toEqual([]);
+    });
+
+    it('resolves a case-class apply construction Point(x).dist() on the constructed class', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.scala'),
+        `class Point(x: Int) {
+  def dist(): Int = x
+}
+class Other {
+  def dist(): Int = 0
+}
+object Main {
+  def run(): Unit = { Point(3).dist() }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Point::dist')).toEqual(['run']);
+      expect(callerNamesOf('Other::dist')).toEqual([]);
+    });
+
+    it('resolves a chained method provided by a trait the return type extends (via conformance)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.scala'),
+        `trait Base {
+  def shared(): Unit = {}
+}
+class Widget extends Base
+class Decoy {
+  def shared(): Unit = {}
+}
+object Factory {
+  def make(): Widget = new Widget()
+}
+object Main {
+  def run(): Unit = { Factory.make().shared() }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Base::shared')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::shared')).toEqual([]);
+    });
+
+    it('creates NO edge when neither the factory return type nor a supertype has the method (silent miss)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'Main.scala'),
+        `object Foo {
+  def create(): Bar = new Bar()
+}
+class Bar {
+}
+class Other {
+  def onlyOther(): Unit = {}
+}
+object Main {
+  def run(): Unit = { Foo.create().onlyOther() }
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Bar has no onlyOther() — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 85b796f7a..ebded6525 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 12;
+export const EXTRACTION_VERSION = 13;
diff --git a/src/extraction/languages/scala.ts b/src/extraction/languages/scala.ts
index d7d10ee06..0e17e62df 100644
--- a/src/extraction/languages/scala.ts
+++ b/src/extraction/languages/scala.ts
@@ -44,6 +44,28 @@ function emitScalaTypeRefs(typeNode: SyntaxNode, fromId: string, ctx: { addUnres
   }
 }
 
+/**
+ * Capture a Scala method's declared return type as a bare type name, for the
+ * chained static-factory / fluent call mechanism (#750). `def create(): Bar`
+ * yields `Bar`; a generic `List[Bar]` yields its base `List` (the method is on
+ * the container, not the element); a qualified `pkg.Bar` yields `Bar`. A
+ * singleton self-type (`this.type`, the fluent-builder idiom) is left undefined
+ * — its type can't be recovered here, so the chain falls through rather than
+ * inferring a wrong receiver.
+ */
+function extractScalaReturnType(node: SyntaxNode, source: string): string | undefined {
+  const rt = node.childForFieldName('return_type');
+  if (!rt) return undefined;
+  const raw = getNodeText(rt, source).trim();
+  if (raw.startsWith('this.')) return undefined; // `this.type` singleton — unhandled
+  const base = raw
+    .replace(/\[[^\]]*\]/g, '') // strip generic args: List[Bar] → List
+    .replace(/\s+/g, '');
+  const last = base.split('.').pop(); // qualified pkg.Bar → Bar
+  if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last;
+}
+
 function extractVisibility(node: SyntaxNode): 'public' | 'private' | 'protected' {
   for (let i = 0; i < node.namedChildCount; i++) {
     const child = node.namedChild(i);
@@ -77,6 +99,7 @@ export const scalaExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'parameters',
   returnField: 'return_type',
+  getReturnType: extractScalaReturnType,
   interfaceKind: 'trait',
 
   classifyClassNode: (node: SyntaxNode) => {
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index ace1c2daf..86c4c6d60 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2530,7 +2530,8 @@ export class TreeSitterExtractor {
                 this.language === 'kotlin' ||
                 this.language === 'swift' ||
                 this.language === 'rust' ||
-                this.language === 'go') &&
+                this.language === 'go' ||
+                this.language === 'scala') &&
               receiver &&
               receiver.type === 'call_expression'
             ) {
@@ -2572,6 +2573,11 @@ export class TreeSitterExtractor {
                 // only drop the edge. C/C++ re-encode any inner.
                 if (this.language === 'rust') reencode = innerFn?.type === 'scoped_identifier';
                 else if (this.language === 'go') reencode = innerFn?.type === 'identifier';
+                // Scala: only a companion-factory / case-class-apply chain whose
+                // receiver chain starts with a capitalized type (`Foo.create().bar()`,
+                // `Foo(args).bar()`). An instance chain (`list.map().filter()`) has a
+                // lowercase receiver whose type we can't recover — leave it bare.
+                else if (this.language === 'scala') reencode = /^[A-Z]/.test(innerCallee);
                 else reencode = !!innerCallee;
               }
               calleeName = reencode ? `${innerCallee}().${methodName}` : methodName;
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 9626687ae..c9ab40522 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -37,7 +37,7 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
  * second pass. Dotted-receiver languages resolve via matchDottedCallChain; the
  * `::`-receiver ones (Rust) via matchScopedCallChain.
  */
-const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go']);
+const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala']);
 const SCOPED_CHAIN_LANGUAGES = new Set(['rust']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index a94a129d4..257bab95c 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -600,9 +600,12 @@ export function matchScopedCallChain(
 /**
  * Languages where an unprefixed capitalized call `Foo(args)` constructs the
  * class (so a `Foo(args).method()` receiver's type is `Foo`). Java/C# need `new`,
- * so a bare `Foo()` there is a method call, not construction — excluded.
+ * so a bare `Foo()` there is a method call, not construction — excluded. Scala's
+ * `Foo(args)` is a case-class / companion `apply`, which conventionally returns
+ * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
+ * returns another type simply yields no edge rather than a wrong one.
  */
-const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift']);
+const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala']);
 
 /**
  * Resolve a dotted chained call whose receiver is a static factory / fluent call —
@@ -1120,15 +1123,17 @@ export function matchReference(
   }
 
   // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
-  // Go) — `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar`, or Go's
-  // bare-factory `New().Method()` as `New().Method` (#645/#608 mechanism). Resolve
-  // the method's class from the inner call's declared return type, then validate it.
+  // Go / Scala) — `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar`,
+  // Go's bare-factory `New().Method()` as `New().Method`, or Scala's companion
+  // factory `Foo.create().bar()` (#645/#608 mechanism). Resolve the method's class
+  // from the inner call's declared return type, then validate it.
   if (
     ref.language === 'java' ||
     ref.language === 'kotlin' ||
     ref.language === 'csharp' ||
     ref.language === 'swift' ||
-    ref.language === 'go'
+    ref.language === 'go' ||
+    ref.language === 'scala'
   ) {
     result = matchDottedCallChain(ref, context);
     if (result) return result;

From 16c73e2b0e027411e22039baeb32fbe60ab42b4c Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Tue, 9 Jun 2026 12:53:04 -0400
Subject: [PATCH 20/51] fix(dart): resolve chained static-factory / constructor
 calls Foo.create().bar() (#750) (#762)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports the #645/#608 chained-receiver mechanism to Dart, plus makes Dart factory
and named constructors first-class so their chains can resolve at all. A call
whose receiver is itself a call — `Foo.create().bar()` (static factory or
factory/named constructor) — used to drop the receiver to a bare `bar`, which
name-matched a same-named method on an unrelated type (commonly a stdlib
`Option`/`Iterator` `.map`/`.where` mis-tied to the project's own class).

- dart.ts: extractBareCall now re-encodes `Foo.create().bar` when the chain
  starts with a capitalized type; getReturnType captures the return type (generic
  `List<Foo>` → `List`); factory (`factory Foo.create()`) and named (`Foo._()`)
  constructors are indexed as `Foo::create` / `Foo::_` with return type = the
  class (via resolveName + getReturnType + constructor_signature in methodTypes).
- The UNNAMED ctor `Foo()` is deliberately NOT extracted (isMisparsedFunction),
  so plain construction stays an `instantiates` edge to the class rather than a
  call to a phantom `Foo::Foo` method.
- dartCtorInfo validates a "constructor" against the enclosing class name, so a
  method tree-sitter MISPARSES as a constructor — `@override (A, B) m()`, where
  the annotation swallows the record return type and `m()` looks like a one-id
  constructor_signature — is still extracted as the method it is (regression
  found on localsend; covered by a new test).
- name-matcher.ts / index.ts: `dart` joins the dotted-chain gate,
  CONSTRUCTS_VIA_BARE_CALL (case construction), and CHAIN_LANGUAGES (conformance
  for superclass/mixin methods). resolveMethodOnType validates, so a wrong
  inference yields no edge.

Validation: 7 synthetic tests (static factory, factory/named ctor, construction,
conformance, absent-method safety, the misparse regression, instantiation-not-
hijacked). Real-repo A/B on localsend (368 Dart files): hand-written +17/-10 — all
corrections (the -10 = 7 wrong stdlib/extension misattributions removed + 3 ctor
source-renames), plus additive factory/named-ctor call resolution. Instantiation
preserved; no node explosion. EXTRACTION_VERSION 13->14. Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   1 +
 __tests__/resolution.test.ts         | 168 +++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/languages/dart.ts     | 165 +++++++++++++++++++++++++-
 src/resolution/index.ts              |   2 +-
 src/resolution/name-matcher.ts       |  14 ++-
 6 files changed, 343 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ba20dab3..382cec9c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Go method calls made through a chained factory function now resolve to the correct type. A call like `New().Method()` used to drop the receiver, so the chained method attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Go return types (a pointer `*Foo` resolves to `Foo`, and a multi-return `(*Foo, error)` to its first result), infers the chained receiver's type from what the factory function returns, and resolves the method on it — including methods promoted from an embedded struct — creating the edge only when the type or an embedded type genuinely has the method. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Go)
 - Scala method calls made through a companion-object factory, a fluent chain, or a case-class `apply` now resolve to the correct type. A call like `Foo.create().bar()` or `Builder(cfg).bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.flatMap` / `.foreach` onto your own same-named class. CodeGraph now captures Scala return types (a generic `List[Foo]` resolves to its container `List`, a qualified `pkg.Foo` to `Foo`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a trait the type extends — creating the edge only when that type or one of its traits genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Scala indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Scala)
 - Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
+- Dart method calls made through a static factory, a factory or named constructor, or a fluent chain now resolve to the correct type. A call like `Foo.create().bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.where` onto your own same-named class. CodeGraph now indexes Dart **factory and named constructors** (`factory Foo.create()`, `Foo.named()`) as first-class members so calls to them resolve, captures Dart return types (a generic `List<Foo>` resolves to its container `List`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a superclass or mixin — creating the edge only when that type genuinely has the method. Plain construction (`Foo(...)`) is still recorded as instantiation. Existing Dart indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Dart)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index f4f80a3c4..ea2b3c5ca 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2826,4 +2826,172 @@ object Main {
       expect(callerNamesOf('Other::onlyOther')).toEqual([]);
     });
   });
+
+  describe('Dart chained static-factory / factory-constructor call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves a static-factory chain Foo.makeBar().doIt() to the return type, never a same-named decoy', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Foo {
+  static Bar makeBar() => Bar();
+}
+class Bar {
+  void doIt() {}
+}
+class Decoy {
+  void doIt() {}
+}
+void run() {
+  Foo.makeBar().doIt();
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Bar::doIt')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::doIt')).toEqual([]);
+    });
+
+    it('resolves a named factory-constructor chain Foo.create().ship() on the constructed class', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Foo {
+  Foo._();
+  factory Foo.create() => Foo._();
+  void ship() {}
+}
+class Decoy {
+  void ship() {}
+}
+void run() {
+  Foo.create().ship();
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // The factory constructor `Foo.create` is now a node whose return type is Foo,
+      // so `ship` resolves on Foo, not the same-named Decoy.
+      expect(callerNamesOf('Foo::ship')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::ship')).toEqual([]);
+    });
+
+    it('resolves a constructor-receiver chain Bar().doIt() on the constructed class', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Bar {
+  void doIt() {}
+}
+class Decoy {
+  void doIt() {}
+}
+void run() {
+  Bar().doIt();
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Bar::doIt')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::doIt')).toEqual([]);
+    });
+
+    it('resolves a chained method inherited from a superclass the return type extends (via conformance)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Base {
+  void render() {}
+}
+class Widget extends Base {
+  static Widget make() => Widget();
+}
+class Decoy {
+  void render() {}
+}
+void run() {
+  Widget.make().render();
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Base::render')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::render')).toEqual([]);
+    });
+
+    it('creates NO edge when neither the factory return type nor a supertype has the method (silent miss)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Foo {
+  static Bar makeBar() => Bar();
+}
+class Bar {
+}
+class Other {
+  void onlyOther() {}
+}
+void run() {
+  Foo.makeBar().onlyOther();
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Bar has no onlyOther() — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+
+    it('still extracts a method tree-sitter misparses as a constructor (@override + record return)', async () => {
+      // tree-sitter-dart misparses `@override (A, B) reduce()` — the annotation
+      // swallows the record return type, so `reduce()` looks like a single-
+      // identifier constructor_signature. It must NOT be skipped as an unnamed
+      // ctor (its name doesn't match the class); its body call must attribute to
+      // `reduce`, not the class.
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Base {}
+class Action extends Base {
+  Action({required int x});
+  @override
+  (int, String) reduce() {
+    return (compute(), "y");
+  }
+  int compute() => 1;
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // reduce must be a node and its body call must resolve to Action::compute.
+      expect(callerNamesOf('Action::compute')).toEqual(['reduce']);
+    });
+
+    it('keeps plain construction Foo() as instantiation, not a Foo::Foo method call', async () => {
+      // The unnamed constructor is intentionally NOT extracted as a `Foo::Foo`
+      // method, so `Foo(...)` resolves to the class (an `instantiates` edge),
+      // never hijacked into a call to a phantom constructor method.
+      fs.writeFileSync(
+        path.join(tempDir, 'main.dart'),
+        `class Widget {
+  final int x;
+  Widget(this.x);
+}
+void run() {
+  Widget(3);
+}
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // No Foo::Foo phantom method node.
+      expect(cg.getNodesByKind('method').some((n) => n.qualifiedName === 'Widget::Widget')).toBe(false);
+      // The construction resolves to the class as an `instantiates` edge.
+      const widget = cg.getNodesByKind('class').find((n) => n.name === 'Widget')!;
+      const incoming = cg.getIncomingEdges(widget.id);
+      expect(incoming.some((e) => e.kind === 'instantiates')).toBe(true);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index ebded6525..70ede13de 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 13;
+export const EXTRACTION_VERSION = 14;
diff --git a/src/extraction/languages/dart.ts b/src/extraction/languages/dart.ts
index 5b545d048..bd8797c7c 100644
--- a/src/extraction/languages/dart.ts
+++ b/src/extraction/languages/dart.ts
@@ -2,10 +2,128 @@ import type { Node as SyntaxNode } from 'web-tree-sitter';
 import { getNodeText } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
 
+/**
+ * The `function_signature` carrying a method's return type — unwrapped from a
+ * `method_signature` wrapper (Dart nests the signature one level for methods).
+ */
+function dartInnerSignature(node: SyntaxNode): SyntaxNode {
+  if (node.type === 'method_signature') {
+    const inner = node.namedChildren.find((c: SyntaxNode) =>
+      c.type === 'function_signature' || c.type === 'getter_signature' || c.type === 'setter_signature'
+    );
+    if (inner) return inner;
+  }
+  return node;
+}
+
+/**
+ * The factory/named-constructor signature inside a node, if any. A constructor
+ * parses as `method_signature > {factory_,}constructor_signature` (e.g.
+ * `factory Foo.create()` or `Foo._()`), whose children are the class identifier
+ * and — for a named ctor — the constructor-name identifier.
+ */
+function dartConstructorSignature(node: SyntaxNode): SyntaxNode | undefined {
+  if (node.type === 'factory_constructor_signature' || node.type === 'constructor_signature') {
+    return node;
+  }
+  if (node.type === 'method_signature') {
+    return node.namedChildren.find((c: SyntaxNode) =>
+      c.type === 'factory_constructor_signature' || c.type === 'constructor_signature'
+    );
+  }
+  return undefined;
+}
+
+/** The name of the class/mixin/extension/enum lexically enclosing `node`. */
+function dartEnclosingTypeName(node: SyntaxNode): string | undefined {
+  let p = node.parent;
+  while (p) {
+    if (
+      p.type === 'class_definition' || p.type === 'mixin_declaration' ||
+      p.type === 'extension_declaration' || p.type === 'enum_declaration'
+    ) {
+      return p.childForFieldName('name')?.text;
+    }
+    p = p.parent;
+  }
+  return undefined;
+}
+
+/**
+ * Validated constructor info for `node`, or undefined if it isn't genuinely a
+ * constructor. A constructor signature is structurally `<Class>` or
+ * `<Class>.<name>`, but tree-sitter-dart MISPARSES `@override (T) m()` — the
+ * annotation swallows the record return type `(T)`, leaving `m()` looking like a
+ * single-identifier constructor_signature. We disambiguate by the class name:
+ * a real ctor's class identifier matches the enclosing type; a misparsed method
+ * (`reduce` inside class `Action`) doesn't, and is treated as the method it is.
+ */
+function dartCtorInfo(node: SyntaxNode): { className: string; ctorName: string } | undefined {
+  const ctor = dartConstructorSignature(node);
+  if (!ctor) return undefined;
+  const ids = ctor.namedChildren.filter((c: SyntaxNode) => c.type === 'identifier');
+  const className = dartEnclosingTypeName(node);
+  if (!className || !ids[0]) return undefined;
+  if (ids[0].text !== className) return undefined; // misparsed method, not a ctor
+  // `<Class>.<name>` is a named ctor; bare `<Class>` is the unnamed ctor.
+  return { className, ctorName: ids[1]?.text ?? className };
+}
+
+/**
+ * Capture a Dart method/function's declared return type as a bare type name, for
+ * the chained static-factory / fluent call mechanism (#750). `Bar makeBar()`
+ * yields `Bar`; a generic `List<Foo>` yields its container `List` (the method is
+ * on the container, not the element); a prefixed `prefix.Bar` yields `Bar`. A
+ * factory / named constructor returns its enclosing class implicitly, so its
+ * "return type" is the class.
+ */
+function extractDartReturnType(node: SyntaxNode, source: string): string | undefined {
+  const ctor = dartCtorInfo(node);
+  if (ctor) return ctor.className;
+  const sig = dartInnerSignature(node);
+  // The return type precedes the method name; it's the first type_identifier
+  // (generic args sit in a sibling `type_arguments`, so this is the container).
+  const retType = sig.namedChildren.find((c: SyntaxNode) => c.type === 'type_identifier');
+  if (!retType) return undefined;
+  const text = getNodeText(retType, source).replace(/<[^>]*>/g, '').trim();
+  const last = text.split('.').pop(); // prefixed `p.Bar` → `Bar`
+  if (!last || !/^[A-Za-z_]\w*$/.test(last)) return undefined;
+  return last;
+}
+
+/**
+ * The callee name of the Dart call whose `argument_part` selector is `argPart`
+ * — mirrors the main extractBareCall accessor logic so a chained receiver
+ * (`Foo.create()` in `Foo.create().bar()`) can be reconstructed. Returns
+ * `Foo.create`, a bare `create`, or `Foo` (constructor) — or undefined.
+ */
+function dartCalleeOfArgPart(argPart: SyntaxNode): string | undefined {
+  const prev = argPart.previousNamedSibling;
+  if (!prev) return undefined;
+  if (prev.type === 'identifier') return prev.text; // bare `Foo()` / `create()`
+  if (prev.type === 'selector') {
+    const accessor = prev.namedChildren.find((c: SyntaxNode) =>
+      c.type === 'unconditional_assignable_selector' || c.type === 'conditional_assignable_selector'
+    );
+    const methodId = accessor?.namedChildren.find((c: SyntaxNode) => c.type === 'identifier');
+    if (methodId) {
+      const accessorPrev = prev.previousNamedSibling;
+      if (accessorPrev?.type === 'identifier') return accessorPrev.text + '.' + methodId.text;
+      return methodId.text;
+    }
+  }
+  return undefined;
+}
+
 export const dartExtractor: LanguageExtractor = {
   functionTypes: ['function_signature'],
   classTypes: ['class_definition'],
-  methodTypes: ['method_signature'],
+  // `method_signature` covers regular methods AND factory constructors (which
+  // parse as method_signature > factory_constructor_signature). A plain named
+  // constructor `Foo._()` parses as a bare `constructor_signature`, so include
+  // it too — resolveName names it by the ctor name and getReturnType gives it
+  // the class as its return type, so `Foo._().bar()` chains resolve (#750).
+  methodTypes: ['method_signature', 'constructor_signature'],
   interfaceTypes: [],
   structTypes: [],
   enumTypes: ['enum_declaration'],
@@ -33,6 +151,19 @@ export const dartExtractor: LanguageExtractor = {
   bodyField: 'body', // class_definition uses 'body' field
   paramsField: 'formal_parameter_list',
   returnField: 'type',
+  getReturnType: extractDartReturnType,
+  isMisparsedFunction: (_name, node) => {
+    // Skip the UNNAMED constructor `Foo()` (its ctor name equals the class). It's
+    // ordinary construction — an `instantiates` edge to the class `Foo` — so
+    // extracting it as a `Foo::Foo` method node would hijack instantiation
+    // resolution (a `Foo(...)` call would resolve to the ctor method, not the
+    // class). NAMED ctors `Foo.create()` / `Foo._()` ARE kept so their chains
+    // resolve (#750). dartCtorInfo validates against the class name, so a method
+    // tree-sitter misparsed as a ctor (`@override (T) m()`) is NOT skipped here.
+    // (isMisparsedFunction skips node creation but still visits the body.)
+    const ctor = dartCtorInfo(node);
+    return ctor != null && ctor.ctorName === ctor.className;
+  },
   getSignature: (node, source) => {
     // For function_signature: extract params + return type
     // For method_signature: delegate to inner function_signature
@@ -88,6 +219,23 @@ export const dartExtractor: LanguageExtractor = {
     }
     return false;
   },
+  resolveName: (node) => {
+    // Name a factory / named constructor by its constructor name — the 2nd
+    // identifier (`create` in `factory Foo.create()`, `_` in `Foo._()`) — not
+    // the class, so a call `Foo.create()` resolves to `Foo::create` (#750). The
+    // default Dart naming returns the FIRST identifier (the class), which
+    // collides every named ctor onto `Foo::Foo` and leaves `Foo.create()`
+    // unresolvable. An unnamed ctor `Foo()` has a single identifier — fall
+    // through (undefined) to the default class name. Letting the core's
+    // extractMethod own the factory (rather than a custom visitNode) keeps the
+    // body attribution intact: calls inside `factory Foo.create() { … }` are
+    // attributed to `Foo::create`, and getReturnType gives it return type Foo.
+    const ctor = dartCtorInfo(node);
+    // A named ctor `Foo.create` → `create`; the unnamed ctor `Foo()` → undefined
+    // (default naming gives the class name `Foo`, which is correct).
+    if (ctor && ctor.ctorName !== ctor.className) return ctor.ctorName;
+    return undefined;
+  },
   extractImport: (node, source) => {
     const importText = source.substring(node.startIndex, node.endIndex).trim();
     let moduleName = '';
@@ -160,6 +308,21 @@ export const dartExtractor: LanguageExtractor = {
             if (accessorPrev?.type === 'identifier') {
               return accessorPrev.text + '.' + methodId.text;
             }
+            // Chained static-factory / fluent call: the receiver is itself a call
+            // (`Foo.create().bar()`), so accessorPrev is that call's argument_part
+            // selector. Encode `<innerCallee>().<method>` so resolution can infer
+            // bar's class from what `Foo.create` RETURNS (#645/#608 mechanism) —
+            // but only when the chain starts with a capitalized type (a companion
+            // factory / static method / constructor); an instance chain
+            // (`obj.foo().bar()`) keeps the bare name (its receiver's type can't
+            // be recovered here).
+            if (accessorPrev?.type === 'selector' &&
+                accessorPrev.namedChildren.some((c: SyntaxNode) => c.type === 'argument_part')) {
+              const innerCallee = dartCalleeOfArgPart(accessorPrev);
+              if (innerCallee && /^[A-Z]/.test(innerCallee)) {
+                return `${innerCallee}().${methodId.text}`;
+              }
+            }
             return methodId.text;
           }
         }
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index c9ab40522..a8cdbe707 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -37,7 +37,7 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
  * second pass. Dotted-receiver languages resolve via matchDottedCallChain; the
  * `::`-receiver ones (Rust) via matchScopedCallChain.
  */
-const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala']);
+const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala', 'dart']);
 const SCOPED_CHAIN_LANGUAGES = new Set(['rust']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index 257bab95c..fff8219f5 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -605,7 +605,7 @@ export function matchScopedCallChain(
  * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
  * returns another type simply yields no edge rather than a wrong one.
  */
-const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala']);
+const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart']);
 
 /**
  * Resolve a dotted chained call whose receiver is a static factory / fluent call —
@@ -1123,17 +1123,19 @@ export function matchReference(
   }
 
   // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
-  // Go / Scala) — `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar`,
-  // Go's bare-factory `New().Method()` as `New().Method`, or Scala's companion
-  // factory `Foo.create().bar()` (#645/#608 mechanism). Resolve the method's class
-  // from the inner call's declared return type, then validate it.
+  // Go / Scala / Dart) — `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar`,
+  // Go's bare-factory `New().Method()` as `New().Method`, Scala's companion factory
+  // `Foo.create().bar()`, or Dart's static factory / factory-constructor
+  // `Foo.create().bar()` (#645/#608 mechanism). Resolve the method's class from the
+  // inner call's declared return type, then validate it.
   if (
     ref.language === 'java' ||
     ref.language === 'kotlin' ||
     ref.language === 'csharp' ||
     ref.language === 'swift' ||
     ref.language === 'go' ||
-    ref.language === 'scala'
+    ref.language === 'scala' ||
+    ref.language === 'dart'
   ) {
     result = matchDottedCallChain(ref, context);
     if (result) return result;

From d21d2dfa504b1e61979ea518e7f7a07a60571c0a Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 00:35:49 -0400
Subject: [PATCH 21/51] fix(objc): resolve chained message-send calls [[Foo
 create] doIt] (#750) (#786)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports the #645/#608 chained-receiver mechanism to Objective-C. A message send
whose receiver is itself a message send — `[[Foo create] doIt]` — used to drop
the receiver, so `doIt` name-matched a same-named method on an unrelated class
(commonly a test helper's `init` or an Apple-SDK method).

- objc.ts: getReturnType reads the method's `method_type`, SKIPPING nullability /
  ARC qualifiers (`nonnull instancetype` must yield instancetype, not `nonnull`).
- tree-sitter.ts: the message_expression branch now re-encodes a chained send
  `[[Foo create] doIt]` as `Foo.create().doIt` when the inner receiver is a
  capitalized class and the outer selector is unary.
- name-matcher.ts: `objc` joins the dotted-chain gate + CHAIN_LANGUAGES. A
  class-message factory returns an instance of the RECEIVER class by convention
  (`instancetype`), so when the factory's own return type isn't recoverable
  (`alloc`/`new`/`shared…` return instancetype, or aren't user nodes), the
  receiver's type is the class itself — this resolves the ubiquitous
  `[[X alloc] init]` and singleton chains. resolveMethodOnType validates against
  the class and its supertypes, so a wrong inference yields no edge.

Validation: 4 synthetic tests (factory+decoy, superclass conformance, absent-method
safety, the nonnull-instancetype singleton). Real-repo A/B on SDWebImage (208 files):
+35 / -75 — all corrections (the -75 are wrong `init` mis-matches to a test helper /
wrong class, retargeted to the right class's init in the +35, plus 2 Apple-SDK chains
on unindexed classes). db stable, no node explosion. EXTRACTION_VERSION 14->15.
Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   1 +
 __tests__/resolution.test.ts         | 137 +++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/languages/objc.ts     |  41 ++++++++
 src/extraction/tree-sitter.ts        |  27 ++++++
 src/resolution/index.ts              |   2 +-
 src/resolution/name-matcher.ts       |  32 +++++--
 7 files changed, 233 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 382cec9c2..fe1778ca2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Scala method calls made through a companion-object factory, a fluent chain, or a case-class `apply` now resolve to the correct type. A call like `Foo.create().bar()` or `Builder(cfg).bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.flatMap` / `.foreach` onto your own same-named class. CodeGraph now captures Scala return types (a generic `List[Foo]` resolves to its container `List`, a qualified `pkg.Foo` to `Foo`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a trait the type extends — creating the edge only when that type or one of its traits genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Scala indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Scala)
 - Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
 - Dart method calls made through a static factory, a factory or named constructor, or a fluent chain now resolve to the correct type. A call like `Foo.create().bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.where` onto your own same-named class. CodeGraph now indexes Dart **factory and named constructors** (`factory Foo.create()`, `Foo.named()`) as first-class members so calls to them resolve, captures Dart return types (a generic `List<Foo>` resolves to its container `List`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a superclass or mixin — creating the edge only when that type genuinely has the method. Plain construction (`Foo(...)`) is still recorded as instantiation. Existing Dart indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Dart)
+- Objective-C methods called through a chained message send now resolve to the correct class. A call like `[[Foo create] doIt]` used to drop the receiver, so `doIt` silently attached to a same-named method on an unrelated class — most often a test helper or stdlib class. CodeGraph now captures Objective-C method return types and infers the chained receiver's type from what the inner message returns. For the ubiquitous `[[X alloc] init]` and singleton (`[[X sharedInstance] …]`) patterns — where the factory returns `instancetype` — the receiver is the class `X` itself, so the chained method resolves on `X` (including methods inherited from a superclass), creating the edge only when the class genuinely has the method. Existing Objective-C indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Objective-C)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index ea2b3c5ca..868e9b07a 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -2994,4 +2994,141 @@ void run() {
       expect(incoming.some((e) => e.kind === 'instantiates')).toBe(true);
     });
   });
+
+  describe('Objective-C chained message-send call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+
+    it('resolves a chained message send [[Foo create] doIt] via the return type, never a same-named decoy', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.m'),
+        `@interface Bar : NSObject
+- (void)doIt;
+@end
+@implementation Bar
+- (void)doIt {}
+@end
+@interface Decoy : NSObject
+- (void)doIt;
+@end
+@implementation Decoy
+- (void)doIt {}
+@end
+@interface Foo : NSObject
++ (Bar *)create;
+@end
+@implementation Foo
++ (Bar *)create { return nil; }
+- (void)run { [[Foo create] doIt]; }
+@end
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Bar::doIt')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::doIt')).toEqual([]);
+    });
+
+    it('resolves a chained message whose method is inherited from a superclass (via conformance)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.m'),
+        `@interface Base : NSObject
+- (void)render;
+@end
+@implementation Base
+- (void)render {}
+@end
+@interface Widget : Base
+@end
+@implementation Widget
+@end
+@interface Decoy : NSObject
+- (void)render;
+@end
+@implementation Decoy
+- (void)render {}
+@end
+@interface Factory : NSObject
++ (Widget *)make;
+@end
+@implementation Factory
++ (Widget *)make { return nil; }
+- (void)run { [[Factory make] render]; }
+@end
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Base::render')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::render')).toEqual([]);
+    });
+
+    it('creates NO edge when the factory return type lacks the method (silent miss)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.m'),
+        `@interface Bar : NSObject
+@end
+@implementation Bar
+@end
+@interface Other : NSObject
+- (void)onlyOther;
+@end
+@implementation Other
+- (void)onlyOther {}
+@end
+@interface Foo : NSObject
++ (Bar *)create;
+@end
+@implementation Foo
++ (Bar *)create { return nil; }
+- (void)run { [[Foo create] onlyOther]; }
+@end
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // Bar has no onlyOther — must not mis-attach to the same-named Other::onlyOther.
+      expect(callerNamesOf('Other::onlyOther')).toEqual([]);
+    });
+
+    it('resolves a singleton chain [[Cache shared] clearAll] whose factory returns nonnull instancetype', async () => {
+      // The factory returns `nonnull instancetype` — the nullability qualifier must
+      // be skipped (not captured AS the type), and an instancetype class-message
+      // factory returns the receiver class, so clearAll resolves on Cache, never a
+      // same-named decoy. (Regression for both: the captured-`nonnull` bug and the
+      // ubiquitous `[[X alloc] init]` / singleton pattern.)
+      fs.writeFileSync(
+        path.join(tempDir, 'main.m'),
+        `@interface Cache : NSObject
++ (nonnull instancetype)shared;
+- (void)clearAll;
+@end
+@implementation Cache
++ (nonnull instancetype)shared { return nil; }
+- (void)clearAll {}
+@end
+@interface Decoy : NSObject
+- (void)clearAll;
+@end
+@implementation Decoy
+- (void)clearAll {}
+@end
+@interface Caller : NSObject
+- (void)run;
+@end
+@implementation Caller
+- (void)run { [[Cache shared] clearAll]; }
+@end
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(callerNamesOf('Cache::clearAll')).toEqual(['run']);
+      expect(callerNamesOf('Decoy::clearAll')).toEqual([]);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 70ede13de..2aba578ae 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 14;
+export const EXTRACTION_VERSION = 15;
diff --git a/src/extraction/languages/objc.ts b/src/extraction/languages/objc.ts
index 6671284aa..cf5ecc4d7 100644
--- a/src/extraction/languages/objc.ts
+++ b/src/extraction/languages/objc.ts
@@ -31,6 +31,46 @@ function extractObjcMethodName(node: SyntaxNode, source: string): string | undef
   return identifiers.map((id) => `${getNodeText(id, source)}:`).join('');
 }
 
+/** Nullability / ARC qualifiers that sit where a return type's first type
+ *  identifier does (`(nonnull instancetype)`, `(nullable Bar *)`) — never the type. */
+const OBJC_TYPE_QUALIFIERS = new Set([
+  'nonnull', 'nullable', 'null_unspecified', 'null_resettable',
+  '_Nonnull', '_Nullable', '_Null_unspecified', '__nonnull', '__nullable',
+  'const', 'volatile', 'strong', 'weak', 'copy', 'assign', 'retain', 'oneway',
+  '__strong', '__weak', '__unsafe_unretained', '__autoreleasing', '__kindof',
+]);
+
+/** Collect the type identifiers under a `method_type`, in document order. */
+function collectTypeIdentifiers(node: SyntaxNode, source: string, out: string[]): void {
+  if (node.type === 'type_identifier') out.push(getNodeText(node, source).trim());
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (child) collectTypeIdentifiers(child, source, out);
+  }
+}
+
+/**
+ * Capture an ObjC method's declared return type as a bare class name, for the
+ * chained static-factory call mechanism (#750). `+ (Bar *)create` yields `Bar`;
+ * a nullability/ARC qualifier (`(nonnull instancetype)`, `(nullable Bar *)`) is
+ * skipped to reach the real type. `void` / `id` / `instancetype` / primitives
+ * yield undefined — for a class-message factory that means the receiver's type
+ * is the class itself (handled in resolution), so `[[X alloc] init]` and
+ * singleton chains still resolve.
+ */
+function extractObjcReturnType(node: SyntaxNode, source: string): string | undefined {
+  if (node.type !== 'method_definition' && node.type !== 'method_declaration') return undefined;
+  const methodType = node.namedChildren.find((c) => c.type === 'method_type');
+  if (!methodType) return undefined;
+  const ids: string[] = [];
+  collectTypeIdentifiers(methodType, source, ids);
+  const name = ids.find((n) => !OBJC_TYPE_QUALIFIERS.has(n));
+  if (!name || !/^[A-Za-z_]\w*$/.test(name) || name === 'void' || name === 'id' || name === 'instancetype') {
+    return undefined;
+  }
+  return name;
+}
+
 function extractObjcPropertyName(node: SyntaxNode, source: string): string | null {
   if (node.type !== 'property_declaration') return null;
 
@@ -73,6 +113,7 @@ export const objcExtractor: LanguageExtractor = {
   nameField: 'declarator',
   bodyField: 'body',
   paramsField: 'parameters',
+  getReturnType: extractObjcReturnType,
   resolveName: extractObjcMethodName,
   extractPropertyName: extractObjcPropertyName,
   resolveBody: (node, bodyField) => {
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 86c4c6d60..546c66dc9 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -2482,6 +2482,33 @@ export class TreeSitterExtractor {
           } else {
             calleeName = methodName;
           }
+        } else if (receiverField && receiverField.type === 'message_expression' && /^\w+$/.test(methodName)) {
+          // Chained message send `[[Foo create] doIt]` — the receiver is itself a
+          // class message. Recover the inner `Class.selector` and encode
+          // `Class.selector().doIt` so resolution infers doIt's class from what
+          // `Class.selector` RETURNS (#645/#608). Only a CLASS-factory chain
+          // (capitalized inner receiver); a unary outer selector is required
+          // because the chain resolver's method part is `\w+` (no `:`). An
+          // instance chain (`[[obj foo] bar]`, lowercase inner) stays bare.
+          const innerRecv = getChildByField(receiverField, 'receiver');
+          const innerRecvName = innerRecv ? getNodeText(innerRecv, this.source) : '';
+          if (innerRecv?.type === 'identifier' && /^[A-Z]/.test(innerRecvName)) {
+            const innerKw: string[] = [];
+            for (let i = 0; i < receiverField.namedChildCount; i++) {
+              if (receiverField.fieldNameForNamedChild(i) === 'method') {
+                const kw = receiverField.namedChild(i);
+                if (kw) innerKw.push(getNodeText(kw, this.source));
+              }
+            }
+            let innerColon = false;
+            for (let i = 0; i < receiverField.childCount; i++) {
+              if (receiverField.child(i)?.type === ':') { innerColon = true; break; }
+            }
+            const innerSelector = innerColon ? innerKw.map((k) => `${k}:`).join('') : innerKw[0];
+            calleeName = innerSelector ? `${innerRecvName}.${innerSelector}().${methodName}` : methodName;
+          } else {
+            calleeName = methodName;
+          }
         } else {
           calleeName = methodName;
         }
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index a8cdbe707..9435dac37 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -37,7 +37,7 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
  * second pass. Dotted-receiver languages resolve via matchDottedCallChain; the
  * `::`-receiver ones (Rust) via matchScopedCallChain.
  */
-const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala', 'dart']);
+const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala', 'dart', 'objc']);
 const SCOPED_CHAIN_LANGUAGES = new Set(['rust']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index fff8219f5..19f0a7a70 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -673,7 +673,23 @@ export function matchDottedCallChain(
   const factoryMethod = inner.slice(lastDot + 1);
   if (!factoryClass || !factoryMethod) return null;
   const ret = lookupCalleeReturnType(`${factoryClass}::${factoryMethod}`, ref, context);
-  if (!ret) return null;
+  if (!ret) {
+    // Objective-C: a class-message factory — `[X alloc]`, `[X new]`,
+    // `[X sharedFoo]` — returns an instance of the RECEIVER class `X` by
+    // convention (`instancetype`). So when the factory's own return type isn't
+    // recoverable (its selector returns `instancetype`, or `alloc`/`new` aren't
+    // user-defined nodes at all), the receiver's type is the class `X` itself.
+    // This resolves the ubiquitous `[[X alloc] init]` and singleton chains.
+    // resolveMethodOnType validates against X (and its supertypes), so a class
+    // whose method actually lives elsewhere yields NO edge, not a wrong one — and
+    // crucially this does NOT fire when a concrete return type WAS captured but
+    // simply lacks the method (that already returned null above: absent-method
+    // safety, so a same-named decoy is still never matched).
+    if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) {
+      return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
+    }
+    return null;
+  }
   return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
 }
 
@@ -1123,11 +1139,12 @@ export function matchReference(
   }
 
   // 1d. Dotted chained static-factory / fluent call (Java / Kotlin / C# / Swift /
-  // Go / Scala / Dart) — `Foo.getInstance().bar()` encoded as `Foo.getInstance().bar`,
-  // Go's bare-factory `New().Method()` as `New().Method`, Scala's companion factory
-  // `Foo.create().bar()`, or Dart's static factory / factory-constructor
-  // `Foo.create().bar()` (#645/#608 mechanism). Resolve the method's class from the
-  // inner call's declared return type, then validate it.
+  // Go / Scala / Dart / Objective-C) — `Foo.getInstance().bar()` encoded as
+  // `Foo.getInstance().bar`, Go's bare-factory `New().Method()` as `New().Method`,
+  // Scala's companion factory, Dart's static factory / factory-constructor, or
+  // ObjC's chained message send `[[Foo create] doIt]` encoded as `Foo.create().doIt`
+  // (#645/#608 mechanism). Resolve the method's class from the inner call's
+  // declared return type, then validate it.
   if (
     ref.language === 'java' ||
     ref.language === 'kotlin' ||
@@ -1135,7 +1152,8 @@ export function matchReference(
     ref.language === 'swift' ||
     ref.language === 'go' ||
     ref.language === 'scala' ||
-    ref.language === 'dart'
+    ref.language === 'dart' ||
+    ref.language === 'objc'
   ) {
     result = matchDottedCallChain(ref, context);
     if (result) return result;

From a4d19a5ed8b416c54a21c4f7daace660256a54f1 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 00:51:31 -0400
Subject: [PATCH 22/51] docs(design): record the chained static-factory call
 resolution mechanism (#750) (#787)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A checked-in design doc for the #645/#608/#750 chained-call mechanism — the
permanent, discoverable record the work previously lacked (it lived only in git
history, the tracking issue, and an untracked scratch handoff). Covers the 3-part
mechanism, the three shared resolvers + receiver styles, the per-language coverage
matrix (12 shipped with A/B results), the conformance pass, and the full 21-language
README classification (incl. why TypeScript + Luau were skipped and Pascal is blocked).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/design/chained-call-resolution.md | 145 +++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 docs/design/chained-call-resolution.md

diff --git a/docs/design/chained-call-resolution.md b/docs/design/chained-call-resolution.md
new file mode 100644
index 000000000..4cf38ebef
--- /dev/null
+++ b/docs/design/chained-call-resolution.md
@@ -0,0 +1,145 @@
+# Design + status: chained static-factory / fluent call resolution
+
+**Status:** SHIPPED for **11 languages** (C++, C, PHP, Java, Kotlin, C#, Swift, Rust,
+Go, Scala, Dart, Objective-C) + a conformance pass. **TypeScript and Luau were evaluated
+and intentionally skipped** (both gradually typed → the mechanism is +0 / regresses on
+real code). **Pascal/Delphi** is blocked on a larger prerequisite (its method-call
+extraction is broadly incomplete). See "Full README classification" below. Tracking
+issue: **#750** (which began as "the statically-typed README languages" but that
+enumeration was incomplete — it missed ObjC / Pascal / Luau).
+
+**Motivation:** a call whose **receiver is itself a call** — a factory / singleton /
+builder that returns an object — should produce a `calls` edge to the chained method:
+
+```java
+Foo.getInstance().bar();   // bar() should resolve to Foo::bar, never a same-named decoy
+```
+
+Before this work, every statically-typed language **dropped the receiver** and
+name-matched the bare method (`bar`), so in 7 of 9 languages it silently attached to a
+**same-named method on an unrelated type** — a correctness bug, not just missing coverage.
+
+---
+
+## The 3-part mechanism (per language)
+
+1. **Capture the factory's declared return type** — a per-language `getReturnType`
+   hook writes `nodes.return_type` (schema v5). `*Foo`→`Foo`, `List<Bar>`→`List`,
+   `pkg.Foo`→`Foo`, `-> Self` / `: self` / `this.type` → the declaring type.
+2. **Preserve the chained receiver at extraction** — `tree-sitter.ts` (or a bespoke
+   extractor) encodes `Foo.getInstance().bar()` as the marker string
+   `Foo.getInstance().bar` (the `().` marker never appears in an ordinary ref). A
+   per-language gate keeps **instance** chains (`list.map().filter()`) bare so their
+   existing resolution is untouched — only capitalized-receiver / factory chains re-encode.
+3. **Resolve AND VALIDATE** — at resolution the receiver's type is inferred from what
+   the inner call returns, then the outer method is resolved **on that type** and
+   validated: the method must exist on the type (or a supertype it conforms to), so a
+   wrong inference yields **no edge**, never a wrong one.
+
+Three shared resolvers in `src/resolution/name-matcher.ts`, all calling
+`resolveMethodOnType` (which has the conformance supertype-walk):
+
+| Resolver | Receiver style | Languages |
+|---|---|---|
+| `matchCppCallChain` | `field_expression` (`Foo::instance().bar`) | C++, C |
+| `matchScopedCallChain` | `::` (`Cls::for($x)->m`, `Foo::new().bar`) | PHP, Rust |
+| `matchDottedCallChain` | `.` (`Foo.create().bar`) | Java, Kotlin, C#, Swift, Go, Scala, Dart |
+
+**Conformance pass (#754).** When the chained method lives on a **supertype** the
+return type conforms to (an inherited / default-interface / trait / mixin / embedded
+method), the first pass can't see it — `implements`/`extends` edges aren't built yet.
+So failed chain refs are deferred (`CHAIN_LANGUAGES` in `resolution/index.ts`) and
+re-resolved in a second pass `resolveChainedCallsViaConformance()` after edges exist,
+walking `context.getSupertypes(...)`.
+
+**Adding a language:** `getReturnType` in `languages/*.ts`; encode the chained receiver
++ a node-type gate; add the language to the right `matchReference` gate (and
+`CONSTRUCTS_VIA_BARE_CALL` if a bare capitalized call constructs the class); add to
+`CHAIN_LANGUAGES`; synthetic tests + a real-repo A/B; bump `EXTRACTION_VERSION`.
+
+---
+
+## Coverage (validated — each via synthetic decoy/absent-method tests + a real-repo A/B)
+
+| Language | PR | Receiver | Real-repo A/B (unique `calls` edges) | Notes |
+|---|---|---|---|---|
+| **C++ / C** | #645 (#742) | `field_expression` | — | The original: singletons / factories / chained getters. |
+| **PHP** | #608 (#749) | `::` → `->` | — | `Cls::for($x)->method()` — the Laravel per-tenant client idiom. `: self`/`: static`. |
+| **Java** | #751 | `.` | Guava **+1,507 / −0** | Missing-edge → purely additive. |
+| **Kotlin** | #752 | `.` | arrow **+49 / −438** | Wrong-edge → precision win (438 removed = test/doc noise + wrong). Needed the capitalized-receiver gate + constructor-receiver handling. |
+| **C#** | #753 | `.` | Newtonsoft +3 / NodaTime **+73 / −0** | Additive. Return type is the `returns` field; extension-method chains correctly don't resolve. |
+| **conformance** | #754 | (resolver upgrade) | arrow **+22 / −0** | Supertype walk — enables Swift protocol-ext, Rust trait, Go embedded, Dart mixin, Java/Kotlin/C# inherited chains. |
+| **Swift** | #755 | `.` | Alamofire / Kingfisher **0 / 0** | Neutral-safe (unique fluent names already bare-resolved). Needed a nested-extension naming fix (`KF.Builder`→`KF::Builder`). |
+| **Rust** | #757 | `::` | clap **+937 / −775** | Precision win (622 wrong→right retargets, +162 net). `-> Self`; trait-default methods via conformance. Single-hop. |
+| **Go** | #760 | `.` | gin **net-zero** | `New().Method()`; embedded structs via conformance. Variable-inner fallback. **Found + fixed a batched-resolver runaway** (a mutated `original.referenceName` looped the offset-0 batch → 5M edges / 1.4 GB; fixed by tying the fallback to the original ref + a non-progress guard). |
+| **Scala** | #761 | `.` | gatling **+14 / −59** | Precision win (−59 = stdlib `Option`/`Iterator` `.map`/`.flatMap` the baseline mis-tied to gatling's `Validation::*`). Companion factories + case-class `apply`. |
+| **Dart** | #762 | `.` | localsend hand-written **+17 / −10** | Precision win **+ constructors made first-class** (factory/named ctors `Foo.create()`/`Foo._()` are now indexed; unnamed `Foo()` stays `instantiates`). `dartCtorInfo` validates a ctor against the enclosing class name — handles a tree-sitter misparse where `@override (A,B) m()` makes `m()` look like a ctor. |
+| **Objective-C** | #786 | message send | SDWebImage **+35 / −75** | Precision win. Chained message send `[[Foo create] doIt]` over `message_expression`. getReturnType skips nullability qualifiers (`nonnull instancetype`). A class-message factory returns the receiver class by convention, so `[[X alloc] init]` / singleton chains resolve on `X` (validated). The −75 are wrong `init` mis-matches retargeted to the right class. |
+| **TypeScript** | — | `.` | typeorm +0/−6 · nest **+0/−164** | **Evaluated, NOT shipped** — gradual typing; see below. |
+| **Luau** | — | `:` / `.` | Fusion +0/−0 · matter +0/−0 | **Evaluated, NOT shipped** — gradually typed; additive-safe (missing-edge gap, no regression) but real Luau rarely annotates factory returns, so +0 on both benchmarks. Works for `Foo.create(): Bar` then `:doIt()` (synthetic). |
+
+`EXTRACTION_VERSION` is now **15** (C++→…→Dart→Objective-C). Re-index with `codegraph index -f`
+to pick up the newer extractor on an existing graph.
+
+## Why TypeScript was skipped
+
+The mechanism resolves a chain from the factory's **declared** return type. TypeScript
+leans on **type inference** — e.g. NestJS's `Test.createTestingModule(m) { return new
+TestingModuleBuilder(...) }` has no `: TestingModuleBuilder` annotation — so the
+factory's type can't be recovered, the re-encoded chain can't resolve, and it **drops
+the bare-name edge** the existing resolver found. Real-repo A/B was **+0 added on both
+typeorm and nest** with a net recall regression (−164 on nest, mostly the ubiquitous
+`Test.createTestingModule({…}).compile()` pattern). The removed edges were mostly
+*wrong* (baseline mis-resolved `.compile()` to `ModuleCompiler::compile`), so it's
+precision-positive but recall-negative — against the recall-first invariant, and adding
+nothing where it doesn't hurt (TS method names are unique enough that bare-name already
+lands them). It was fully implemented (5 synthetic tests passed, runaway-safe bare-name
+fallback) and consciously not shipped. The only path to a TS win would be reading
+**inferred** return types (resolving `return new X()` in the factory body) — a much
+larger change. Full write-up on issue #750.
+
+---
+
+## Full README classification (all 21 languages)
+
+The mechanism's real requirement is a **declared return type** to recover the receiver's
+type — not "statically typed" (PHP qualifies via its `: self` / `: Type` return
+declarations). Against the README's full supported-language list:
+
+| Bucket | Languages |
+|---|---|
+| **Covered** (12) | C++, C, PHP, Java, Kotlin, C#, Swift, Rust, Go, Scala, Dart, Objective-C |
+| **Evaluated, skipped** (2) | **TypeScript** — gradual typing → inference-typed factories can't be recovered; net recall regression. **Luau** — gradually typed; additive-safe but +0 on Fusion AND matter (real Luau rarely annotates factory returns). Both: the mechanism needs reliably-declared return types, which gradually-typed code too often omits. |
+| **Blocked by a prerequisite** (1) | **Pascal/Delphi** — statically typed (so the mechanism *would* pay off), but its method-call extraction from procedure bodies is broadly incomplete: paren-less calls (`TFoo.GetInstance.DoIt`) parse as a bare `exprDot` (not in `callTypes`), and even paren'd calls (`f.Regular()`) produce no edge (no receiver-type tracking for Pascal locals). Building Pascal's call graph is a substantial standalone extractor effort; the chained-call port is a small part of it. Separate follow-up. |
+| **Out of scope — no declared return types** (6) | JavaScript, Ruby, Lua, Svelte, Vue, Liquid (Liquid has no methods/chains at all) |
+| **Partial / separate** (1) | Python — only optional `-> T` hints; tracked as #578, not part of this mechanism |
+
+So #750's original framing ("the 9 statically-typed README languages") was incomplete —
+it missed three more typed languages. Resolved: **Objective-C** shipped (#786, same
+wrong-edge gap, mechanism ports directly); **Luau** evaluated and skipped (gradual
+typing → +0 on real repos, additive-safe); **Pascal** is gated on unrelated extractor
+work (its call graph is broadly incomplete).
+
+The through-line: this mechanism fits languages with **reliably-declared return types**
+(the 12 shipped). Gradually-typed languages (TypeScript, Luau) omit them too often for
+it to pay off, and dynamically-typed languages have none.
+
+---
+
+## Edge cases / model
+- **Single-hop**: a chain re-encodes one hop; deeper hops (`a.b().c().d()`) keep the
+  bare name (the inner `()` defeats the `Class::method` split). Re-measure on deep
+  fluent-builder repos.
+- **Validation, not guessing**: every resolver ends in `resolveMethodOnType`, so an
+  unknown / wrong inferred type produces **no edge** — the decoy / absent-method
+  guarantee that makes this safe to ship.
+- **Per-language receiver gate** keeps instance chains bare so existing resolution is
+  never regressed; the A/B "removed" counts are wrong-edge corrections, not losses.
+
+## Related work
+- **Dynamic-dispatch / callback synthesis** (a *different* mechanism): observer /
+  EventEmitter / React-render / JSX-child / django-ORM edge synthesis lives in
+  `callback-edge-synthesis.md` + `dynamic-dispatch-coverage-playbook.md`.
+- The verbose session working-notes for #750 are in
+  `.claude/handoffs/chained-call-multilang-probe.md` (scratch; this doc is the
+  permanent record).

From af56f3539d16be4dcdbc3d97696815d5d6135dd9 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 08:37:04 -0400
Subject: [PATCH 23/51] fix(pascal): resolve chained factory calls
 TFoo.GetInstance().DoIt() (#750) (#791)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports the #645/#608 chained-receiver mechanism to Pascal/Delphi — which I'd
previously mis-scoped as blocked. The paren'd chained form extracts fine; it just
hit the chained-call gap like the others (with a decoy, `TFoo.GetInstance().DoIt()`
mis-resolved to a same-named method on an unrelated class).

- pascal.ts: getReturnType reads the method's `typeref` (a `function GetInstance:
  TBar` returns TBar; an interface return `IFoo` is captured too).
- tree-sitter.ts: extractPascalCall now re-encodes a chained call `TFoo.GetInstance().DoIt`
  (the exprDot's receiver is an exprCall) instead of collapsing it to bare `DoIt`.
  Gated on the Delphi type-naming convention (`TFoo`/`IFoo`) so a capitalized
  VARIABLE chain (Pascal capitalizes locals too — `Curve.X().Y()`, `Self.X().Y()`)
  stays bare and keeps its existing bare-name resolution.
- name-matcher.ts: `pascal` joins the dotted-chain gate + CHAIN_LANGUAGES +
  CONSTRUCTS_VIA_BARE_CALL (a `TFoo(x)` typecast yields a TFoo). When the factory's
  return type wasn't captured (a `constructor Create` has no `: TBar` but returns
  its class), resolve the method on the factory class itself. resolveMethodOnType
  validates, so a wrong inference yields no edge.

Validation: 4 synthetic tests (factory+decoy, constructor chain, typecast chain,
absent-method safety). Real-repo A/B on PascalCoin (772 files): +19 / -18 — 15 of
the -18 are correct class→interface retargets (`GetInstance(): IAsn1OctetString`
resolves `.GetOctets` on the declared interface, not baseline's concrete-class
guess); 3 are negligible drops (0.02%). EXTRACTION_VERSION 15->16. Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   1 +
 __tests__/resolution.test.ts         | 135 +++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/languages/pascal.ts   |  10 ++
 src/extraction/tree-sitter.ts        |  41 ++++++--
 src/resolution/index.ts              |   2 +-
 src/resolution/name-matcher.ts       |  21 ++++-
 7 files changed, 201 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe1778ca2..dd60c4af4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
 - Dart method calls made through a static factory, a factory or named constructor, or a fluent chain now resolve to the correct type. A call like `Foo.create().bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.where` onto your own same-named class. CodeGraph now indexes Dart **factory and named constructors** (`factory Foo.create()`, `Foo.named()`) as first-class members so calls to them resolve, captures Dart return types (a generic `List<Foo>` resolves to its container `List`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a superclass or mixin — creating the edge only when that type genuinely has the method. Plain construction (`Foo(...)`) is still recorded as instantiation. Existing Dart indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Dart)
 - Objective-C methods called through a chained message send now resolve to the correct class. A call like `[[Foo create] doIt]` used to drop the receiver, so `doIt` silently attached to a same-named method on an unrelated class — most often a test helper or stdlib class. CodeGraph now captures Objective-C method return types and infers the chained receiver's type from what the inner message returns. For the ubiquitous `[[X alloc] init]` and singleton (`[[X sharedInstance] …]`) patterns — where the factory returns `instancetype` — the receiver is the class `X` itself, so the chained method resolves on `X` (including methods inherited from a superclass), creating the edge only when the class genuinely has the method. Existing Objective-C indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Objective-C)
+- Pascal/Delphi methods called through a chained factory call now resolve to the correct class. A call like `TFoo.GetInstance().DoIt()` used to drop the receiver, so `DoIt` silently attached to a same-named method on an unrelated class. CodeGraph now captures Pascal return types and infers the chained receiver's type from what the factory function returns — resolving to the declared type (including an interface return like `IFoo`), and for a constructor (`TFoo.Create().…`) or a typecast (`TFoo(x).…`) to the class `TFoo` itself, since both yield a `TFoo`. The edge is created only when that type genuinely has the method (so a wrong inference produces no edge). Existing Pascal/Delphi indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Pascal/Delphi)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 868e9b07a..f33197eda 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -3131,4 +3131,139 @@ void run() {
       expect(callerNamesOf('Decoy::clearAll')).toEqual([]);
     });
   });
+
+  describe('Pascal/Delphi chained static-factory call resolution (#645/#608 mechanism)', () => {
+    function callerNamesOf(qualifiedName: string): string[] {
+      const target = cg.getNodesByKind('method').find((n) => n.qualifiedName === qualifiedName);
+      if (!target) return [];
+      const names = cg
+        .getIncomingEdges(target.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.source)?.name)
+        .filter((n): n is string => !!n);
+      return [...new Set(names)].sort();
+    }
+    function isCalled(qn: string): boolean {
+      const t = cg.getNodesByKind('method').find((n) => n.qualifiedName === qn);
+      return !!t && cg.getIncomingEdges(t.id).some((e) => e.kind === 'calls');
+    }
+
+    it('resolves a chained factory call TFoo.GetInstance().DoIt() via the return type, never a same-named decoy', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TBar = class
+    procedure DoIt;
+  end;
+  TDecoy = class
+    procedure DoIt;
+  end;
+  TFoo = class
+    class function GetInstance: TBar;
+  end;
+implementation
+procedure TBar.DoIt; begin end;
+procedure TDecoy.DoIt; begin end;
+class function TFoo.GetInstance: TBar; begin Result := nil; end;
+procedure Run;
+begin
+  TFoo.GetInstance().DoIt();
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(isCalled('TBar::DoIt')).toBe(true);
+      expect(isCalled('TDecoy::DoIt')).toBe(false);
+    });
+
+    it('resolves a constructor chain TFoo.Create().Configure() on the constructed class', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TFoo = class
+    constructor Create;
+    procedure Configure;
+  end;
+  TDecoy = class
+    procedure Configure;
+  end;
+implementation
+constructor TFoo.Create; begin end;
+procedure TFoo.Configure; begin end;
+procedure TDecoy.Configure; begin end;
+procedure Run;
+begin
+  TFoo.Create().Configure();
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // A constructor returns its own class (no `: TBar` annotation), so Configure
+      // resolves on TFoo, not the same-named decoy.
+      expect(isCalled('TFoo::Configure')).toBe(true);
+      expect(isCalled('TDecoy::Configure')).toBe(false);
+    });
+
+    it('resolves a typecast chain TFoo(x).DoIt() on the cast type', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TFoo = class
+    procedure DoIt;
+  end;
+  TDecoy = class
+    procedure DoIt;
+  end;
+implementation
+procedure TFoo.DoIt; begin end;
+procedure TDecoy.DoIt; begin end;
+procedure Run(obj: TObject);
+begin
+  TFoo(obj).DoIt();
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(isCalled('TFoo::DoIt')).toBe(true);
+      expect(isCalled('TDecoy::DoIt')).toBe(false);
+    });
+
+    it('creates NO edge when the factory return type lacks the method (silent miss)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TBar = class
+  end;
+  TOther = class
+    procedure OnlyOther;
+  end;
+  TFoo = class
+    class function GetInstance: TBar;
+  end;
+implementation
+procedure TOther.OnlyOther; begin end;
+class function TFoo.GetInstance: TBar; begin Result := nil; end;
+procedure Run;
+begin
+  TFoo.GetInstance().OnlyOther();
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // TBar has no OnlyOther — must not mis-attach to the same-named TOther::OnlyOther.
+      expect(isCalled('TOther::OnlyOther')).toBe(false);
+    });
+  });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 2aba578ae..1b847d000 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 15;
+export const EXTRACTION_VERSION = 16;
diff --git a/src/extraction/languages/pascal.ts b/src/extraction/languages/pascal.ts
index aed6a59fe..004dadc83 100644
--- a/src/extraction/languages/pascal.ts
+++ b/src/extraction/languages/pascal.ts
@@ -17,6 +17,16 @@ export const pascalExtractor: LanguageExtractor = {
   bodyField: 'body',
   paramsField: 'args',
   returnField: 'type',
+  // Pascal/Delphi `function GetInstance: TBar` — the return type is a `typeref`
+  // child. Capture its bare class name for the chained static-factory call
+  // mechanism (#750). A procedure (no return) has no typeref → undefined.
+  getReturnType: (node, source) => {
+    const typeref = node.namedChildren.find((c: SyntaxNode) => c.type === 'typeref');
+    if (!typeref) return undefined;
+    const id = typeref.namedChildren.find((c: SyntaxNode) => c.type === 'identifier') ?? typeref;
+    const name = getNodeText(id, source).trim();
+    return /^[A-Za-z_]\w*$/.test(name) ? name : undefined;
+  },
   getSignature: (node, source) => {
     const args = getChildByField(node, 'args');
     const returnType = node.namedChildren.find(
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 546c66dc9..253bc3af9 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -4312,12 +4312,41 @@ export class TreeSitterExtractor {
 
     let calleeName = '';
     if (firstChild.type === 'exprDot') {
-      // Qualified call: Obj.Method(...)
-      const identifiers = firstChild.namedChildren.filter(
-        (c: SyntaxNode) => c.type === 'identifier'
-      );
-      if (identifiers.length > 0) {
-        calleeName = identifiers.map((id: SyntaxNode) => getNodeText(id, this.source)).join('.');
+      // Chained static-factory call: `TFoo.GetInstance().DoIt()` — the exprDot's
+      // receiver is itself an `exprCall`, so the bare identifier list would
+      // collapse to just `DoIt` and mis-resolve to a same-named method on an
+      // unrelated class. Encode `TFoo.GetInstance().DoIt` so resolution infers
+      // DoIt's class from what `TFoo.GetInstance` RETURNS (#645/#608). Only a
+      // capitalized class-factory chain; a unary outer method.
+      const innerCall = firstChild.namedChildren.find((c: SyntaxNode) => c.type === 'exprCall');
+      const outerId = firstChild.namedChildren.filter((c: SyntaxNode) => c.type === 'identifier').pop();
+      const method = outerId ? getNodeText(outerId, this.source) : '';
+      if (innerCall && method && /^\w+$/.test(method)) {
+        const innerFirst = innerCall.namedChild(0);
+        let innerCallee = '';
+        if (innerFirst?.type === 'exprDot') {
+          innerCallee = innerFirst.namedChildren
+            .filter((c: SyntaxNode) => c.type === 'identifier')
+            .map((id: SyntaxNode) => getNodeText(id, this.source))
+            .join('.');
+        } else if (innerFirst?.type === 'identifier') {
+          innerCallee = getNodeText(innerFirst, this.source);
+        }
+        // Gate on the Delphi type-naming convention — `TFoo` classes / `IFoo`
+        // interfaces — so a class-factory chain re-encodes but a capitalized
+        // VARIABLE/parameter chain (Pascal capitalizes locals too: `Curve.X().Y()`,
+        // `Self.X().Y()`) stays bare and keeps its existing bare-name resolution.
+        calleeName = innerCallee && /^[TI][A-Z]/.test(innerCallee)
+          ? `${innerCallee}().${method}`
+          : method;
+      } else {
+        // Qualified call: Obj.Method(...)
+        const identifiers = firstChild.namedChildren.filter(
+          (c: SyntaxNode) => c.type === 'identifier'
+        );
+        if (identifiers.length > 0) {
+          calleeName = identifiers.map((id: SyntaxNode) => getNodeText(id, this.source)).join('.');
+        }
       }
     } else if (firstChild.type === 'identifier') {
       calleeName = getNodeText(firstChild, this.source);
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 9435dac37..96484001e 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -37,7 +37,7 @@ const SUPERTYPE_BEARING_KINDS = new Set<Node['kind']>([
  * second pass. Dotted-receiver languages resolve via matchDottedCallChain; the
  * `::`-receiver ones (Rust) via matchScopedCallChain.
  */
-const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala', 'dart', 'objc']);
+const CHAIN_LANGUAGES = new Set(['java', 'kotlin', 'csharp', 'swift', 'rust', 'go', 'scala', 'dart', 'objc', 'pascal']);
 const SCOPED_CHAIN_LANGUAGES = new Set(['rust']);
 
 /** The extractor's chained-receiver encoding: `<inner>().<method>`. */
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index 19f0a7a70..b1280a78f 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -603,9 +603,11 @@ export function matchScopedCallChain(
  * so a bare `Foo()` there is a method call, not construction — excluded. Scala's
  * `Foo(args)` is a case-class / companion `apply`, which conventionally returns
  * `Foo` — and resolveMethodOnType validates, so a non-conventional `apply` that
- * returns another type simply yields no edge rather than a wrong one.
+ * returns another type simply yields no edge rather than a wrong one. Pascal/Delphi:
+ * a `TFoo(x)` is a TYPECAST whose result is a `TFoo`, so `TFoo(x).method()` resolves
+ * the method on `TFoo` — same shape, same validation.
  */
-const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart']);
+const CONSTRUCTS_VIA_BARE_CALL = new Set(['kotlin', 'swift', 'scala', 'dart', 'pascal']);
 
 /**
  * Resolve a dotted chained call whose receiver is a static factory / fluent call —
@@ -688,6 +690,18 @@ export function matchDottedCallChain(
     if (ref.language === 'objc' && /^[A-Z]/.test(factoryClass)) {
       return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
     }
+    // Pascal/Delphi: the extractor only re-encodes a `TFoo`/`IFoo`-prefixed chain
+    // (the type-naming convention), so `factoryClass` is always a real class here.
+    // A factory whose return type wasn't captured is a CONSTRUCTOR
+    // (`TFileMem.Create().SetCachePerformance` — `constructor Create` has no `:
+    // TBar` annotation but returns its own class) or an unannotated function. In
+    // both cases the receiver's type is the class itself, so resolve the method on
+    // `factoryClass`. resolveMethodOnType validates against it (and its
+    // supertypes), so a wrong inference yields no edge — and this never fires when
+    // a return type WAS captured but lacks the method (absent-method safety above).
+    if (ref.language === 'pascal' && /^[TI]/.test(factoryClass)) {
+      return resolveMethodOnType(factoryClass, method, ref, context, 0.8, 'instance-method', importedFqnOf(factoryClass, ref, context));
+    }
     return null;
   }
   return resolveMethodOnType(ret, method, ref, context, 0.85, 'instance-method', importedFqnOf(ret, ref, context));
@@ -1153,7 +1167,8 @@ export function matchReference(
     ref.language === 'go' ||
     ref.language === 'scala' ||
     ref.language === 'dart' ||
-    ref.language === 'objc'
+    ref.language === 'objc' ||
+    ref.language === 'pascal'
   ) {
     result = matchDottedCallChain(ref, context);
     if (result) return result;

From 4c35b72136ba58febdaefbee2c395939e11288f3 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 08:39:34 -0400
Subject: [PATCH 24/51] =?UTF-8?q?docs(design):=20Pascal/Delphi=20chained?=
 =?UTF-8?q?=20calls=20shipped=20(#791)=20=E2=80=94=2013=20languages=20(#75?=
 =?UTF-8?q?0)=20(#792)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates the chained-call design doc: Pascal moves from "blocked" to covered
(#791) — the earlier "blocked" read was wrong, caused by probing only the
paren-less form. 13 languages now shipped; EXTRACTION_VERSION 16.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/design/chained-call-resolution.md | 31 +++++++++++++-------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/docs/design/chained-call-resolution.md b/docs/design/chained-call-resolution.md
index 4cf38ebef..8485a02f7 100644
--- a/docs/design/chained-call-resolution.md
+++ b/docs/design/chained-call-resolution.md
@@ -1,12 +1,11 @@
 # Design + status: chained static-factory / fluent call resolution
 
-**Status:** SHIPPED for **11 languages** (C++, C, PHP, Java, Kotlin, C#, Swift, Rust,
-Go, Scala, Dart, Objective-C) + a conformance pass. **TypeScript and Luau were evaluated
-and intentionally skipped** (both gradually typed → the mechanism is +0 / regresses on
-real code). **Pascal/Delphi** is blocked on a larger prerequisite (its method-call
-extraction is broadly incomplete). See "Full README classification" below. Tracking
-issue: **#750** (which began as "the statically-typed README languages" but that
-enumeration was incomplete — it missed ObjC / Pascal / Luau).
+**Status:** SHIPPED for **13 languages** (C++, C, PHP, Java, Kotlin, C#, Swift, Rust,
+Go, Scala, Dart, Objective-C, Pascal/Delphi) + a conformance pass. **TypeScript and Luau
+were evaluated and intentionally skipped** (both gradually typed → the mechanism is +0 /
+regresses on real code). See "Full README classification" below. Tracking issue:
+**#750** (which began as "the statically-typed README languages" but that enumeration was
+incomplete — it missed ObjC / Pascal / Luau).
 
 **Motivation:** a call whose **receiver is itself a call** — a factory / singleton /
 builder that returns an object — should produce a `calls` edge to the chained method:
@@ -75,10 +74,11 @@ walking `context.getSupertypes(...)`.
 | **Scala** | #761 | `.` | gatling **+14 / −59** | Precision win (−59 = stdlib `Option`/`Iterator` `.map`/`.flatMap` the baseline mis-tied to gatling's `Validation::*`). Companion factories + case-class `apply`. |
 | **Dart** | #762 | `.` | localsend hand-written **+17 / −10** | Precision win **+ constructors made first-class** (factory/named ctors `Foo.create()`/`Foo._()` are now indexed; unnamed `Foo()` stays `instantiates`). `dartCtorInfo` validates a ctor against the enclosing class name — handles a tree-sitter misparse where `@override (A,B) m()` makes `m()` look like a ctor. |
 | **Objective-C** | #786 | message send | SDWebImage **+35 / −75** | Precision win. Chained message send `[[Foo create] doIt]` over `message_expression`. getReturnType skips nullability qualifiers (`nonnull instancetype`). A class-message factory returns the receiver class by convention, so `[[X alloc] init]` / singleton chains resolve on `X` (validated). The −75 are wrong `init` mis-matches retargeted to the right class. |
+| **Pascal/Delphi** | #791 | `.` (`exprDot`) | PascalCoin **+19 / −18** | Precision win. `TFoo.GetInstance().DoIt()` over Pascal's `exprCall`/`exprDot`. getReturnType from the `typeref` (incl. interface returns `IFoo`). Re-encoding gated on the Delphi `TFoo`/`IFoo` type convention so capitalized *variable* chains stay bare. A constructor (no `: TBar`) or typecast `TFoo(x)` resolves on the class. 15 of the −18 are correct class→interface retargets (`GetInstance(): IAsn1OctetString`). |
 | **TypeScript** | — | `.` | typeorm +0/−6 · nest **+0/−164** | **Evaluated, NOT shipped** — gradual typing; see below. |
 | **Luau** | — | `:` / `.` | Fusion +0/−0 · matter +0/−0 | **Evaluated, NOT shipped** — gradually typed; additive-safe (missing-edge gap, no regression) but real Luau rarely annotates factory returns, so +0 on both benchmarks. Works for `Foo.create(): Bar` then `:doIt()` (synthetic). |
 
-`EXTRACTION_VERSION` is now **15** (C++→…→Dart→Objective-C). Re-index with `codegraph index -f`
+`EXTRACTION_VERSION` is now **16** (C++→…→Objective-C→Pascal). Re-index with `codegraph index -f`
 to pick up the newer extractor on an existing graph.
 
 ## Why TypeScript was skipped
@@ -108,20 +108,21 @@ declarations). Against the README's full supported-language list:
 
 | Bucket | Languages |
 |---|---|
-| **Covered** (12) | C++, C, PHP, Java, Kotlin, C#, Swift, Rust, Go, Scala, Dart, Objective-C |
+| **Covered** (13) | C++, C, PHP, Java, Kotlin, C#, Swift, Rust, Go, Scala, Dart, Objective-C, Pascal/Delphi |
 | **Evaluated, skipped** (2) | **TypeScript** — gradual typing → inference-typed factories can't be recovered; net recall regression. **Luau** — gradually typed; additive-safe but +0 on Fusion AND matter (real Luau rarely annotates factory returns). Both: the mechanism needs reliably-declared return types, which gradually-typed code too often omits. |
-| **Blocked by a prerequisite** (1) | **Pascal/Delphi** — statically typed (so the mechanism *would* pay off), but its method-call extraction from procedure bodies is broadly incomplete: paren-less calls (`TFoo.GetInstance.DoIt`) parse as a bare `exprDot` (not in `callTypes`), and even paren'd calls (`f.Regular()`) produce no edge (no receiver-type tracking for Pascal locals). Building Pascal's call graph is a substantial standalone extractor effort; the chained-call port is a small part of it. Separate follow-up. |
+| **Known limitation (not blocking)** | **Pascal/Delphi** is shipped (#791), but only the **paren'd** chain `TFoo.GetInstance().DoIt()` is covered — the **paren-less** form `TFoo.GetInstance.DoIt` parses as a bare `exprDot` (not in `callTypes`) and isn't extracted as a call at all. Emitting paren-less method calls is a separate extractor follow-up (and a broader Pascal-coverage win independent of chains). |
 | **Out of scope — no declared return types** (6) | JavaScript, Ruby, Lua, Svelte, Vue, Liquid (Liquid has no methods/chains at all) |
 | **Partial / separate** (1) | Python — only optional `-> T` hints; tracked as #578, not part of this mechanism |
 
 So #750's original framing ("the 9 statically-typed README languages") was incomplete —
-it missed three more typed languages. Resolved: **Objective-C** shipped (#786, same
-wrong-edge gap, mechanism ports directly); **Luau** evaluated and skipped (gradual
-typing → +0 on real repos, additive-safe); **Pascal** is gated on unrelated extractor
-work (its call graph is broadly incomplete).
+it missed three more typed languages, all now resolved: **Objective-C** shipped (#786,
+same wrong-edge gap, mechanism ports directly); **Pascal/Delphi** shipped (#791, a clean
+port for the paren'd chain — an initial "blocked" read was wrong, caused by probing only
+the paren-less form); **Luau** evaluated and skipped (gradual typing → +0 on real repos,
+additive-safe).
 
 The through-line: this mechanism fits languages with **reliably-declared return types**
-(the 12 shipped). Gradually-typed languages (TypeScript, Luau) omit them too often for
+(the 13 shipped). Gradually-typed languages (TypeScript, Luau) omit them too often for
 it to pay off, and dynamically-typed languages have none.
 
 ---

From 35dce04e1fe28c0ff187bb3b10efb203815a4a90 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 08:54:17 -0400
Subject: [PATCH 25/51] feat(pascal): extract paren-less method calls
 (Obj.Free; / TFoo.GetInstance.DoIt;) (#793)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pascal/Delphi lets a no-arg method or procedure drop its parens, so the call
parses as a bare `exprDot` (not an `exprCall`) and was never recorded as a call —
callers/impact/trace missed all of them (e.g. `Obj.Free`, `List.Clear`, the
paren-less factory chain `TFoo.GetInstance.DoIt`).

extractPascalParenlessCall handles these, wired into visitPascalBlock scoped to
STATEMENT position only: a bare `Obj.Field;` statement is a no-op, so a
statement-level dot expression is a call — but a dot in assignment LHS/RHS or a
condition is left alone, since there it's genuinely ambiguous with a
field/property access. The chained paren-less form reuses the #750 chain encoding
(gated on the Delphi `TFoo`/`IFoo` type convention) and resolves the same way.

PascalCoin A/B: +1131 / -1 — purely additive, and all 1131 new edges resolve to
METHOD nodes (zero field/property false positives, confirming the statement-level
gate). 3 new synthetic tests (paren-less call, paren-less chained factory, and the
property-write/read non-extraction guard). EXTRACTION_VERSION 16->17. Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 87 ++++++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/tree-sitter.ts        | 80 +++++++++++++++++++++++--
 4 files changed, 164 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dd60c4af4..19f406db4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Dart method calls made through a static factory, a factory or named constructor, or a fluent chain now resolve to the correct type. A call like `Foo.create().bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.where` onto your own same-named class. CodeGraph now indexes Dart **factory and named constructors** (`factory Foo.create()`, `Foo.named()`) as first-class members so calls to them resolve, captures Dart return types (a generic `List<Foo>` resolves to its container `List`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a superclass or mixin — creating the edge only when that type genuinely has the method. Plain construction (`Foo(...)`) is still recorded as instantiation. Existing Dart indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Dart)
 - Objective-C methods called through a chained message send now resolve to the correct class. A call like `[[Foo create] doIt]` used to drop the receiver, so `doIt` silently attached to a same-named method on an unrelated class — most often a test helper or stdlib class. CodeGraph now captures Objective-C method return types and infers the chained receiver's type from what the inner message returns. For the ubiquitous `[[X alloc] init]` and singleton (`[[X sharedInstance] …]`) patterns — where the factory returns `instancetype` — the receiver is the class `X` itself, so the chained method resolves on `X` (including methods inherited from a superclass), creating the edge only when the class genuinely has the method. Existing Objective-C indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Objective-C)
 - Pascal/Delphi methods called through a chained factory call now resolve to the correct class. A call like `TFoo.GetInstance().DoIt()` used to drop the receiver, so `DoIt` silently attached to a same-named method on an unrelated class. CodeGraph now captures Pascal return types and infers the chained receiver's type from what the factory function returns — resolving to the declared type (including an interface return like `IFoo`), and for a constructor (`TFoo.Create().…`) or a typecast (`TFoo(x).…`) to the class `TFoo` itself, since both yield a `TFoo`. The edge is created only when that type genuinely has the method (so a wrong inference produces no edge). Existing Pascal/Delphi indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Pascal/Delphi)
+- Pascal/Delphi **paren-less method calls are now tracked**. Pascal lets a no-argument method or procedure drop its parentheses (`Obj.Free;`, `List.Clear;`, `TFoo.GetInstance.DoIt;`), which previously weren't recorded as calls at all — so callers, impact, and trace missed them. CodeGraph now extracts these, scoped to statement position so a field or property access (which looks identical) is never mistaken for a call. On a real Delphi codebase this added ~1,100 previously-missing call edges with no false positives. Existing Pascal/Delphi indexes should be re-indexed (`codegraph index -f`) to benefit. (Pascal/Delphi)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index f33197eda..35607e3dd 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -3265,5 +3265,92 @@ end.
       // TBar has no OnlyOther — must not mis-attach to the same-named TOther::OnlyOther.
       expect(isCalled('TOther::OnlyOther')).toBe(false);
     });
+
+    it('extracts paren-less method calls (Pascal lets a no-arg method drop its parens)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TFoo = class
+    procedure DoThing;
+    procedure Reset;
+  end;
+implementation
+procedure TFoo.DoThing; begin end;
+procedure TFoo.Reset; begin end;
+procedure Run(f: TFoo);
+begin
+  f.DoThing;
+  f.Reset;
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(isCalled('TFoo::DoThing')).toBe(true);
+      expect(isCalled('TFoo::Reset')).toBe(true);
+    });
+
+    it('resolves a PAREN-LESS chained factory call TFoo.GetInstance.DoIt via the return type', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TBar = class
+    procedure DoIt;
+  end;
+  TDecoy = class
+    procedure DoIt;
+  end;
+  TFoo = class
+    class function GetInstance: TBar;
+  end;
+implementation
+procedure TBar.DoIt; begin end;
+procedure TDecoy.DoIt; begin end;
+class function TFoo.GetInstance: TBar; begin Result := nil; end;
+procedure Run;
+begin
+  TFoo.GetInstance.DoIt;
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      expect(isCalled('TBar::DoIt')).toBe(true);
+      expect(isCalled('TDecoy::DoIt')).toBe(false);
+    });
+
+    it('does NOT turn a property write/read into a call edge (only statement-level dots are calls)', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TFoo = class
+    function GetValue: Integer;
+    procedure SetValue(v: Integer);
+    property Value: Integer read GetValue write SetValue;
+  end;
+implementation
+function TFoo.GetValue: Integer; begin Result := 0; end;
+procedure TFoo.SetValue(v: Integer); begin end;
+procedure Run(f: TFoo);
+var x: Integer;
+begin
+  f.Value := 5;
+  x := f.Value;
+end;
+end.
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // A property read/write is a bare dot in assignment position, not a statement,
+      // so it must not be mis-extracted as a call to the property's getter/setter.
+      expect(isCalled('TFoo::GetValue')).toBe(false);
+      expect(isCalled('TFoo::SetValue')).toBe(false);
+    });
   });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 1b847d000..435b263e9 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 16;
+export const EXTRACTION_VERSION = 17;
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 253bc3af9..c641ab96b 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -4371,6 +4371,69 @@ export class TreeSitterExtractor {
     }
   }
 
+  /**
+   * Extract a PAREN-LESS Pascal method/procedure call (`Obj.Method;`,
+   * `TFoo.GetInstance.DoIt;`). Pascal lets a no-arg method drop its parens, so it
+   * parses as a bare `exprDot` (not an `exprCall`). A bare `exprDot` is
+   * syntactically identical to a field/property access, so this is only ever
+   * called for a STATEMENT-level exprDot (caller-gated): a bare `Obj.Field;`
+   * statement is a no-op, so a statement-level dot expression is a call. (An
+   * exprDot in assignment LHS/RHS or a condition is left alone — there it really
+   * can be a field/property read.)
+   */
+  private extractPascalParenlessCall(node: SyntaxNode): void {
+    if (this.nodeStack.length === 0) return;
+    const callerId = this.nodeStack[this.nodeStack.length - 1];
+    if (!callerId) return;
+
+    const receiver = node.namedChild(0);
+    const outerId = node.namedChildren.filter((c: SyntaxNode) => c.type === 'identifier').pop();
+    const method = outerId ? getNodeText(outerId, this.source) : '';
+    if (!method) return;
+
+    let calleeName = '';
+    // Chained: the receiver is itself a call — a paren-less `TFoo.GetInstance` (an
+    // inner exprDot) or a paren'd `TFoo.GetInstance()` (an exprCall). Encode the
+    // chain `TFoo.GetInstance().DoIt` so resolution infers DoIt's class from what
+    // the factory RETURNS (#645/#608), gated on the Delphi `TFoo`/`IFoo` type
+    // convention; a capitalized VARIABLE chain stays a bare method name.
+    if ((receiver?.type === 'exprDot' || receiver?.type === 'exprCall') && /^\w+$/.test(method)) {
+      const innerCalleeNode = receiver.type === 'exprCall' ? receiver.namedChild(0) : receiver;
+      const innerCallee = !innerCalleeNode
+        ? ''
+        : innerCalleeNode.type === 'identifier'
+          ? getNodeText(innerCalleeNode, this.source)
+          : innerCalleeNode.namedChildren
+              .filter((c: SyntaxNode) => c.type === 'identifier')
+              .map((id: SyntaxNode) => getNodeText(id, this.source))
+              .join('.');
+      if (innerCallee && /^[TI][A-Z]/.test(innerCallee)) {
+        calleeName = `${innerCallee}().${method}`;
+        // The T/I-prefixed inner is itself a real call — record it too.
+        if (receiver.type === 'exprCall') this.extractPascalCall(receiver);
+        else this.extractPascalParenlessCall(receiver);
+      } else {
+        calleeName = method; // non-class receiver: a bare method ref (no field-access ref)
+      }
+    } else {
+      // Simple: `Obj.Method` → the dotted name (resolves via the receiver / bare name).
+      calleeName = node.namedChildren
+        .filter((c: SyntaxNode) => c.type === 'identifier')
+        .map((id: SyntaxNode) => getNodeText(id, this.source))
+        .join('.');
+    }
+
+    if (calleeName) {
+      this.unresolvedReferences.push({
+        fromNodeId: callerId,
+        referenceName: calleeName,
+        referenceKind: 'calls',
+        line: node.startPosition.row + 1,
+        column: node.startPosition.column,
+      });
+    }
+  }
+
   /**
    * Recursively visit a Pascal block/statement tree for call expressions
    */
@@ -4381,11 +4444,18 @@ export class TreeSitterExtractor {
       if (child.type === 'exprCall') {
         this.extractPascalCall(child);
       } else if (child.type === 'exprDot') {
-        // Check if exprDot contains an exprCall
-        for (let j = 0; j < child.namedChildCount; j++) {
-          const grandchild = child.namedChild(j);
-          if (grandchild?.type === 'exprCall') {
-            this.extractPascalCall(grandchild);
+        // A STATEMENT-level bare exprDot is a paren-less call (`Obj.Free;`,
+        // `TFoo.GetInstance.DoIt;`). Anywhere else (assignment side, condition,
+        // expression) a bare exprDot is ambiguous with a field/property access,
+        // so there we only descend for paren'd inner calls.
+        if (node.type === 'statement') {
+          this.extractPascalParenlessCall(child);
+        } else {
+          for (let j = 0; j < child.namedChildCount; j++) {
+            const grandchild = child.namedChild(j);
+            if (grandchild?.type === 'exprCall') {
+              this.extractPascalCall(grandchild);
+            }
           }
         }
       } else {

From 5342f7a93e5b3e70c77715cad4d5fc34ecacf0e0 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 08:55:27 -0400
Subject: [PATCH 26/51] docs(design): Pascal paren-less method calls now
 extracted (#793) (#794)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates the chained-call design doc: the Pascal paren-less-call follow-up is
done (#793) — `Obj.Free;` / `TFoo.GetInstance.DoIt;` are now extracted (scoped to
statement position so field/property accesses aren't mistaken for calls).
PascalCoin +1131/-1. EXTRACTION_VERSION 17.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/design/chained-call-resolution.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/design/chained-call-resolution.md b/docs/design/chained-call-resolution.md
index 8485a02f7..a4d9338d1 100644
--- a/docs/design/chained-call-resolution.md
+++ b/docs/design/chained-call-resolution.md
@@ -78,7 +78,7 @@ walking `context.getSupertypes(...)`.
 | **TypeScript** | — | `.` | typeorm +0/−6 · nest **+0/−164** | **Evaluated, NOT shipped** — gradual typing; see below. |
 | **Luau** | — | `:` / `.` | Fusion +0/−0 · matter +0/−0 | **Evaluated, NOT shipped** — gradually typed; additive-safe (missing-edge gap, no regression) but real Luau rarely annotates factory returns, so +0 on both benchmarks. Works for `Foo.create(): Bar` then `:doIt()` (synthetic). |
 
-`EXTRACTION_VERSION` is now **16** (C++→…→Objective-C→Pascal). Re-index with `codegraph index -f`
+`EXTRACTION_VERSION` is now **17** (C++→…→Pascal chains→Pascal paren-less calls). Re-index with `codegraph index -f`
 to pick up the newer extractor on an existing graph.
 
 ## Why TypeScript was skipped
@@ -110,7 +110,7 @@ declarations). Against the README's full supported-language list:
 |---|---|
 | **Covered** (13) | C++, C, PHP, Java, Kotlin, C#, Swift, Rust, Go, Scala, Dart, Objective-C, Pascal/Delphi |
 | **Evaluated, skipped** (2) | **TypeScript** — gradual typing → inference-typed factories can't be recovered; net recall regression. **Luau** — gradually typed; additive-safe but +0 on Fusion AND matter (real Luau rarely annotates factory returns). Both: the mechanism needs reliably-declared return types, which gradually-typed code too often omits. |
-| **Known limitation (not blocking)** | **Pascal/Delphi** is shipped (#791), but only the **paren'd** chain `TFoo.GetInstance().DoIt()` is covered — the **paren-less** form `TFoo.GetInstance.DoIt` parses as a bare `exprDot` (not in `callTypes`) and isn't extracted as a call at all. Emitting paren-less method calls is a separate extractor follow-up (and a broader Pascal-coverage win independent of chains). |
+| **Pascal paren-less calls** | **Resolved (#793).** Pascal lets a no-arg method drop its parens (`Obj.Free;`, `TFoo.GetInstance.DoIt;`), which parse as a bare `exprDot` and weren't extracted as calls at all. Now extracted, scoped to STATEMENT position (a bare dot in assignment/condition position is left alone — there it's ambiguous with a field/property access). The paren-less chain reuses the same `TFoo`/`IFoo`-gated encoding. PascalCoin A/B **+1131 / −1**, all new edges resolve to methods (zero field/property false positives). |
 | **Out of scope — no declared return types** (6) | JavaScript, Ruby, Lua, Svelte, Vue, Liquid (Liquid has no methods/chains at all) |
 | **Partial / separate** (1) | Python — only optional `-> T` hints; tracked as #578, not part of this mechanism |
 

From dac00e7d449a7b27c15292bc66f5c0c108bc9f9e Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 09:05:08 -0400
Subject: [PATCH 27/51] fix(pascal): attribute a free routine's calls to it,
 not the file (#795)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A Pascal/Delphi procedure or function defined ONLY in the implementation section
(no interface declaration, not a class method) had no node of its own, so
extractPascalDefProc's caller lookup fell through to the nodeStack top — the file
node. Every call in such a routine's body was lumped under the unit: callers
returned the file, and impact couldn't attribute the call to the routine. (Methods
were fine — they get a node from their class declaration.)

Fix: when extractPascalDefProc finds no existing node for a FREE routine (a name
with no `.`), create a function node for it and attribute the body's calls to it.
Interface-declared free routines already have a node (found via the methodIndex),
so there's no duplicate; methods keep their existing class-declaration node.

PascalCoin A/B: +511 / -145 — the +511 are calls now correctly attributed to their
actual routine (`allocate_new_datablock -> TDisposables::GetMem`), replacing -145
file-level aggregates; +248 new function nodes for the implementation-only
routines. New synthetic test asserts a free routine's call attributes to it
alongside a method caller. EXTRACTION_VERSION 17->18. Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/resolution.test.ts         | 25 +++++++++++++++++++++++++
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/tree-sitter.ts        | 26 +++++++++++++++++++++++---
 4 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 19f406db4..eb3a57c94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Objective-C methods called through a chained message send now resolve to the correct class. A call like `[[Foo create] doIt]` used to drop the receiver, so `doIt` silently attached to a same-named method on an unrelated class — most often a test helper or stdlib class. CodeGraph now captures Objective-C method return types and infers the chained receiver's type from what the inner message returns. For the ubiquitous `[[X alloc] init]` and singleton (`[[X sharedInstance] …]`) patterns — where the factory returns `instancetype` — the receiver is the class `X` itself, so the chained method resolves on `X` (including methods inherited from a superclass), creating the edge only when the class genuinely has the method. Existing Objective-C indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Objective-C)
 - Pascal/Delphi methods called through a chained factory call now resolve to the correct class. A call like `TFoo.GetInstance().DoIt()` used to drop the receiver, so `DoIt` silently attached to a same-named method on an unrelated class. CodeGraph now captures Pascal return types and infers the chained receiver's type from what the factory function returns — resolving to the declared type (including an interface return like `IFoo`), and for a constructor (`TFoo.Create().…`) or a typecast (`TFoo(x).…`) to the class `TFoo` itself, since both yield a `TFoo`. The edge is created only when that type genuinely has the method (so a wrong inference produces no edge). Existing Pascal/Delphi indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Pascal/Delphi)
 - Pascal/Delphi **paren-less method calls are now tracked**. Pascal lets a no-argument method or procedure drop its parentheses (`Obj.Free;`, `List.Clear;`, `TFoo.GetInstance.DoIt;`), which previously weren't recorded as calls at all — so callers, impact, and trace missed them. CodeGraph now extracts these, scoped to statement position so a field or property access (which looks identical) is never mistaken for a call. On a real Delphi codebase this added ~1,100 previously-missing call edges with no false positives. Existing Pascal/Delphi indexes should be re-indexed (`codegraph index -f`) to benefit. (Pascal/Delphi)
+- Pascal/Delphi calls inside a **standalone procedure or function** (one with no `interface` declaration, defined only in the `implementation` section) are now attributed to that routine instead of the whole file. Previously such a routine had no symbol of its own, so everything it called was lumped under the unit — `codegraph_callers` returned the file, and impact couldn't tell which routine was responsible. These routines are now indexed and their calls attributed correctly. Existing Pascal/Delphi indexes should be re-indexed (`codegraph index -f`) to benefit. (Pascal/Delphi)
 - Chained method calls now resolve when the chained method is **inherited from a superclass or declared on an interface/protocol** the receiver's type conforms to — for example a call on a sealed-subclass instance (`Either.Right(x).combine(...)`) that invokes a method defined on its parent type. Previously these chains found no caller edge even though the factory's type was known, so the call was invisible to callers, impact, and trace. CodeGraph now walks the type's supertypes (its `extends` / `implements` relationships) to find the method, creating the edge only when a supertype genuinely declares it (so a wrong inference still produces no edge). This makes Java, Kotlin, and C# factory and fluent chains more complete. Existing indexes should be re-indexed (`codegraph index -f`) to benefit. (#750)
 - Swift method calls made through a static factory, fluent chain, or constructor now resolve to the correct class. A call like `Foo.make().draw()` or `Foo().draw()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated class — or didn't resolve at all. CodeGraph now captures Swift return types and infers the chained receiver's type from what the inner call returns (or the constructed type), creating the edge only when that class genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Swift indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Swift)
 - C# method calls made through a static factory or fluent chain now resolve to the correct class. A call like `Foo.Create().Bar()` or `JObject.Parse(s).Property(...)` used to lose the receiver's type, so the chained method didn't resolve and the call was invisible to callers/impact/trace. CodeGraph now captures C# return types and infers the chained receiver's type from what the inner call returns, creating the edge only when that class genuinely has the method (so a wrong inference produces no edge). Existing C# indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (C#)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 35607e3dd..12131f3cc 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -3352,5 +3352,30 @@ end.
       expect(isCalled('TFoo::GetValue')).toBe(false);
       expect(isCalled('TFoo::SetValue')).toBe(false);
     });
+
+    it('attributes an implementation-only free procedure\'s calls to the procedure, not the file', async () => {
+      fs.writeFileSync(
+        path.join(tempDir, 'main.pas'),
+        `unit Main;
+interface
+type
+  TTgt = class
+    procedure Hit;
+  end;
+  TFoo = class
+    procedure DoStuff;
+  end;
+implementation
+procedure TTgt.Hit; begin end;
+procedure TFoo.DoStuff; var t: TTgt; begin t.Hit; end;
+procedure Helper; var t: TTgt; begin t.Hit; end;
+`
+      );
+      cg = await CodeGraph.init(tempDir, { index: true });
+      // `Helper` is implementation-only (no interface decl, not a method), but its
+      // body's call must attribute to `Helper`, not the file/module — alongside the
+      // method `DoStuff`.
+      expect(callerNamesOf('TTgt::Hit')).toEqual(['DoStuff', 'Helper']);
+    });
   });
 });
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 435b263e9..7b2df06d4 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 17;
+export const EXTRACTION_VERSION = 18;
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index c641ab96b..8eb04c6e8 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -4281,10 +4281,30 @@ export class TreeSitterExtractor {
       }
     }
 
-    const parentId =
+    let parentId =
       this.methodIndex.get(fullNameKey) ||
-      this.methodIndex.get(shortNameKey) ||
-      this.nodeStack[this.nodeStack.length - 1];
+      this.methodIndex.get(shortNameKey);
+
+    // No existing node? This is an implementation-only **free** procedure/function
+    // (`procedure Helper; begin … end;` with no interface declaration and not a
+    // class method). Create a function node so its body's calls attribute to it,
+    // not to the enclosing file/module. A method (`TClass.Method`, a dotted name)
+    // always has a node from its class declaration, so this only fires for free
+    // routines — and the methodIndex lookup above already covers interface-declared
+    // free routines, so there's no duplicate.
+    if (!parentId && !fullName.includes('.')) {
+      const fnNode = this.createNode('function', fullName, declProc, {
+        signature: this.extractor?.getSignature?.(declProc, this.source),
+        visibility: this.extractor?.getVisibility?.(declProc),
+      });
+      if (fnNode) {
+        parentId = fnNode.id;
+        this.methodIndex.set(fullNameKey, fnNode.id);
+        if (!this.methodIndex.has(shortNameKey)) this.methodIndex.set(shortNameKey, fnNode.id);
+      }
+    }
+
+    if (!parentId) parentId = this.nodeStack[this.nodeStack.length - 1];
     if (!parentId) return;
 
     // Visit the block for calls

From 0b3f3f969c988409ae2fcadbf51ec86800c57b01 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 09:06:37 -0400
Subject: [PATCH 28/51] docs(design): Pascal free-routine call attribution
 fixed (#795) (#796)

Records the second Pascal call-coverage follow-up (#795): a free routine
defined only in the implementation section now gets a function node so its
body's calls attribute to it, not the file. EXTRACTION_VERSION 18.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/design/chained-call-resolution.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/design/chained-call-resolution.md b/docs/design/chained-call-resolution.md
index a4d9338d1..9fa34a6e6 100644
--- a/docs/design/chained-call-resolution.md
+++ b/docs/design/chained-call-resolution.md
@@ -78,7 +78,7 @@ walking `context.getSupertypes(...)`.
 | **TypeScript** | — | `.` | typeorm +0/−6 · nest **+0/−164** | **Evaluated, NOT shipped** — gradual typing; see below. |
 | **Luau** | — | `:` / `.` | Fusion +0/−0 · matter +0/−0 | **Evaluated, NOT shipped** — gradually typed; additive-safe (missing-edge gap, no regression) but real Luau rarely annotates factory returns, so +0 on both benchmarks. Works for `Foo.create(): Bar` then `:doIt()` (synthetic). |
 
-`EXTRACTION_VERSION` is now **17** (C++→…→Pascal chains→Pascal paren-less calls). Re-index with `codegraph index -f`
+`EXTRACTION_VERSION` is now **18** (C++→…→Pascal chains→paren-less calls→free-routine attribution). Re-index with `codegraph index -f`
 to pick up the newer extractor on an existing graph.
 
 ## Why TypeScript was skipped
@@ -110,7 +110,7 @@ declarations). Against the README's full supported-language list:
 |---|---|
 | **Covered** (13) | C++, C, PHP, Java, Kotlin, C#, Swift, Rust, Go, Scala, Dart, Objective-C, Pascal/Delphi |
 | **Evaluated, skipped** (2) | **TypeScript** — gradual typing → inference-typed factories can't be recovered; net recall regression. **Luau** — gradually typed; additive-safe but +0 on Fusion AND matter (real Luau rarely annotates factory returns). Both: the mechanism needs reliably-declared return types, which gradually-typed code too often omits. |
-| **Pascal paren-less calls** | **Resolved (#793).** Pascal lets a no-arg method drop its parens (`Obj.Free;`, `TFoo.GetInstance.DoIt;`), which parse as a bare `exprDot` and weren't extracted as calls at all. Now extracted, scoped to STATEMENT position (a bare dot in assignment/condition position is left alone — there it's ambiguous with a field/property access). The paren-less chain reuses the same `TFoo`/`IFoo`-gated encoding. PascalCoin A/B **+1131 / −1**, all new edges resolve to methods (zero field/property false positives). |
+| **Pascal call-coverage follow-ups** | Two gaps from the chained-call work, both resolved. **Paren-less calls (#793):** Pascal lets a no-arg method drop its parens (`Obj.Free;`, `TFoo.GetInstance.DoIt;`), which parse as a bare `exprDot` and weren't extracted as calls at all. Now extracted, scoped to STATEMENT position (a bare dot in assignment/condition position is left alone — ambiguous with a field/property access). PascalCoin A/B **+1131 / −1**, all new edges resolve to methods. **Free-routine attribution (#795):** a procedure/function defined only in the `implementation` section (no interface decl, not a method) had no node, so its body's calls were lumped under the file; now it gets a function node and its calls attribute to it. PascalCoin A/B **+511 / −145** (file-level aggregates → per-routine edges). |
 | **Out of scope — no declared return types** (6) | JavaScript, Ruby, Lua, Svelte, Vue, Liquid (Liquid has no methods/chains at all) |
 | **Partial / separate** (1) | Python — only optional `-> T` hints; tracked as #578, not part of this mechanism |
 

From b7b7c8b4e8794e0108ddbd67402a29a6bea9b095 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 09:25:01 -0400
Subject: [PATCH 29/51] =?UTF-8?q?docs(readme):=20update=20Pascal/Delphi=20?=
 =?UTF-8?q?coverage=2075.7%=20=E2=86=92=2077.4%=20(#797)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The paren-less call extraction (#793) and free-routine attribution (#795)
added real call coverage on PascalCoin. Controlled A/B on a fresh clone,
same source-file filter, only the build differing:

  baseline (pre-Pascal-work, d21d2df): 75.79%  (≈ the documented 75.7%)
  current  (main, v18):                77.37%  (+1.58)

The baseline reproducing the documented 75.7% confirms the metric is the
same one the README table uses; the +1.58 is the measured coverage gain
from this session's Pascal extraction work.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ab147b548..c2356b23d 100644
--- a/README.md
+++ b/README.md
@@ -665,7 +665,7 @@ Impact and blast-radius queries are only as good as the dependency graph behind
 | Lua | nvim-telescope/telescope.nvim | 84.2% |
 | Luau | dphfox/Fusion | 92.2% |
 | Liquid | Shopify/dawn | 73.8% |
-| Pascal / Delphi | PascalCoin | 75.7% |
+| Pascal / Delphi | PascalCoin | 77.4% |
 
 Framework routing is validated the same way, on a canonical app per framework: Express 100%, FastAPI 98%, Flask 100%, NestJS 96.8%, Gin 96.5%, Axum 100%, Rocket 93.8%, Vapor 100%, Laravel 92%, Rails 89.6%, React Router 100% — and the convention/reflection-heavy ones at their honest static-analysis ceiling: ASP.NET 83.9%, Spring 83.3%, Drupal 78.9%, Django 74.1%.
 

From c39b4b938ec9cca48c5b953987ac26e1c0b6d5e6 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 09:57:48 -0400
Subject: [PATCH 30/51] =?UTF-8?q?docs(readme):=20fill=20framework-coverage?=
 =?UTF-8?q?=20gaps=20=E2=80=94=20add=20Play,=20Vue/Nuxt,=20Scala=20(#798)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The framework story was missing several supported frameworks:

- Play (Scala/Java) — absent from both the Framework-aware Routes table and
  the routing-coverage line. Measured 76.3% (106/139 routes resolved to a
  handler) across the 31 verb-route apps in playframework/play-samples; every
  miss is Play's framework-provided `Assets` controller (vendored library
  code, not app source). Slots into the convention-ceiling bucket.
- Vue Router / Nuxt — recognized (file-based pages/, server/api/, middleware)
  but missing from the routes table.
- Scala + Vue — missing from the "20+ Languages" highlight.

File-based routers (SvelteKit, Vue/Nuxt) have no separate handler edge — the
page IS the handler — so their coverage is the fair-coverage language figure
(Svelte/SvelteKit 100%, Vue/Nuxt 93.5%), now cited explicitly.

Existing framework numbers left untouched (they were measured ad-hoc; a fresh
re-measure would shift them and isn't part of this gap-fill).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index c2356b23d..11c379b48 100644
--- a/README.md
+++ b/README.md
@@ -225,8 +225,8 @@ CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — acr
 | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 |
 | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes |
 | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config |
-| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Dart, Lua, Luau, Svelte, Liquid, Pascal/Delphi |
-| **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 14 frameworks |
+| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, Svelte, Vue, Liquid, Pascal/Delphi |
+| **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 16 frameworks |
 | **Mixed iOS / React Native / Expo** | Closes cross-language flows that static parsing misses: Swift ↔ ObjC bridging, React Native legacy bridge + TurboModules + Fabric view components, native → JS event emitters, Expo Modules |
 | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only |
 
@@ -274,11 +274,13 @@ CodeGraph detects web-framework routing files and emits `route` nodes linked by
 | **Drupal** | `*.routing.yml` routes (`_controller`, `_form`, entity handlers); `hook_*` implementations in `.module`/`.theme`/`.install`/`.inc` |
 | **Rails** | `get '/x', to: 'users#index'`, hash-rocket `=>` syntax |
 | **Spring** | `@GetMapping`, `@PostMapping`, `@RequestMapping` on methods |
+| **Play** | `GET`/`POST`/… verb routes in `conf/routes` → `Controller.method` actions (Scala + Java) |
 | **Gin / chi / gorilla / mux** | `r.GET(...)`, `router.HandleFunc(...)` |
 | **Axum / actix / Rocket** | `.route("/x", get(handler))` |
 | **ASP.NET** | `[HttpGet("/x")]` attributes on action methods |
 | **Vapor** | `app.get("x", use: handler)` |
 | **React Router** / **SvelteKit** | Route component nodes |
+| **Vue Router** / **Nuxt** | `pages/` file-based routes, `server/api/` endpoints, route middleware |
 
 ---
 
@@ -667,7 +669,7 @@ Impact and blast-radius queries are only as good as the dependency graph behind
 | Liquid | Shopify/dawn | 73.8% |
 | Pascal / Delphi | PascalCoin | 77.4% |
 
-Framework routing is validated the same way, on a canonical app per framework: Express 100%, FastAPI 98%, Flask 100%, NestJS 96.8%, Gin 96.5%, Axum 100%, Rocket 93.8%, Vapor 100%, Laravel 92%, Rails 89.6%, React Router 100% — and the convention/reflection-heavy ones at their honest static-analysis ceiling: ASP.NET 83.9%, Spring 83.3%, Drupal 78.9%, Django 74.1%.
+Framework routing is validated the same way, on a canonical app per framework: Express 100%, FastAPI 98%, Flask 100%, NestJS 96.8%, Gin 96.5%, Axum 100%, Rocket 93.8%, Vapor 100%, Laravel 92%, Rails 89.6%, React Router 100% — and the convention/reflection-heavy ones at their honest static-analysis ceiling: ASP.NET 83.9%, Spring 83.3%, Drupal 78.9%, Play 76.3%, Django 74.1%. SvelteKit and Vue/Nuxt use file-based routing, so their page/endpoint coverage is the Svelte/SvelteKit (100%) and Vue/Nuxt (93.5%) figures in the table above.
 
 ## Troubleshooting
 

From 9a0f1447702709b5027ce346110f4dc86a7f1eb1 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 11:09:05 -0500
Subject: [PATCH 31/51] fix(directory): self-heal a stale .codegraph/.gitignore
 so daemon.pid is ignored (#788) (#802)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Versions <= 0.9.9 wrote an explicit-allowlist .codegraph/.gitignore
(*.db, cache/, .dirty, ...) that never listed daemon.pid or the socket,
so the daemon's runtime pidfile got committed. The wildcard rewrite in
#654/#492/#484 fixed new inits, but the file is only written when
absent, so existing installs kept their stale file forever — the fix
never reached the people hitting it.

Make the gitignore self-heal: ensureGitignore() writes the file if
absent and upgrades a stale CodeGraph-generated default in place,
leaving a user-authored file untouched. A "stale default" is one that
carries our `# CodeGraph data files` header but predates the wildcard
ignore (no bare `*` line) — a header match heals every historical
variant (v0.7.x..0.9.9, all verified to share it) and is idempotent.
validateDirectory() runs on every open()/openSync(), so existing repos
heal on the next codegraph command after upgrading. The duplicated
template (previously inlined in two formats) is consolidated into one
GITIGNORE_CONTENT constant.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 |  1 +
 __tests__/foundation.test.ts | 40 +++++++++++++++++
 src/directory.ts             | 87 +++++++++++++++++++++++++++---------
 3 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index eb3a57c94..6f790c7c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -93,6 +93,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Indexing a very large repository no longer aborts during its first sync with a "too many SQL variables" error. (#540)
 - Files under directories with non-ASCII names (for example CJK characters) are no longer silently skipped during indexing. (#541)
 - The `.codegraph/` index folder no longer clutters `git status`: its generated ignore file now excludes everything in the folder except itself, so the database, `daemon.pid`, sockets, and logs stop showing up as untracked changes. (#492, #484)
+- Projects initialized by an older version now get that fix automatically: a `.codegraph/.gitignore` written before this change — which listed only the database, cache, and logs and so let the daemon's `daemon.pid` get committed — is upgraded in place the next time you run any CodeGraph command. A `.gitignore` you've customized yourself is left untouched. (#788)
 - SAP HANA `.xsjs` / `.xsjslib` files are now indexed as JavaScript. (#556)
 - TypeScript `.mts` and `.cts` module files are now indexed instead of being skipped. (#366)
 - JavaScript modules that wrap their code in an anonymous function — AMD/RequireJS, NetSuite SuiteScript, IIFE bundles — now have their inner functions and calls indexed, instead of the file coming up nearly empty. (#528)
diff --git a/__tests__/foundation.test.ts b/__tests__/foundation.test.ts
index 405865b2f..05fa79804 100644
--- a/__tests__/foundation.test.ts
+++ b/__tests__/foundation.test.ts
@@ -159,6 +159,46 @@ describe('CodeGraph Foundation', () => {
       expect(validation.valid).toBe(false);
       expect(validation.errors.length).toBeGreaterThan(0);
     });
+
+    it('upgrades a stale pre-wildcard .gitignore in place (issue #788)', () => {
+      const cg = CodeGraph.initSync(tempDir);
+      cg.close();
+
+      const gitignorePath = path.join(getCodeGraphDir(tempDir), '.gitignore');
+      // A .gitignore written by an older version (<= 0.9.9): an explicit
+      // allowlist that never ignored daemon.pid, so the daemon's runtime
+      // pidfile got committed.
+      const staleV099 =
+        '# CodeGraph data files\n' +
+        '# These are local to each machine and should not be committed\n\n' +
+        '# Database\n*.db\n*.db-wal\n*.db-shm\n\n' +
+        '# Cache\ncache/\n\n# Logs\n*.log\n\n# Hook markers\n.dirty\n';
+      fs.writeFileSync(gitignorePath, staleV099, 'utf-8');
+
+      // Opening the project runs validateDirectory, which self-heals.
+      const cg2 = CodeGraph.openSync(tempDir);
+      cg2.close();
+
+      const upgraded = fs.readFileSync(gitignorePath, 'utf-8');
+      expect(upgraded).toContain('\n*\n'); // wildcard ignores everything…
+      expect(upgraded).toContain('!.gitignore'); // …except this file
+      expect(upgraded).not.toContain('.dirty'); // old explicit list is gone
+    });
+
+    it('leaves a user-customized .codegraph/.gitignore untouched', () => {
+      const cg = CodeGraph.initSync(tempDir);
+      cg.close();
+
+      const gitignorePath = path.join(getCodeGraphDir(tempDir), '.gitignore');
+      // No CodeGraph header → user-authored → must not be rewritten.
+      const custom = '# my own rules\n*.db\n!keep-this.json\n';
+      fs.writeFileSync(gitignorePath, custom, 'utf-8');
+
+      const cg2 = CodeGraph.openSync(tempDir);
+      cg2.close();
+
+      expect(fs.readFileSync(gitignorePath, 'utf-8')).toBe(custom);
+    });
   });
 
   describe('Uninitialize', () => {
diff --git a/src/directory.ts b/src/directory.ts
index 8f5abb092..1c7729a42 100644
--- a/src/directory.ts
+++ b/src/directory.ts
@@ -129,6 +129,61 @@ export function findNearestCodeGraphRoot(startPath: string): string | null {
   return null;
 }
 
+/**
+ * Contents of `.codegraph/.gitignore`. A single wildcard ignore keeps every
+ * transient file in the index dir — the database, `daemon.pid`, the socket,
+ * logs, cache, and anything future versions add — out of git, without having
+ * to enumerate each name (issues #788, #492, #484). Older versions wrote an
+ * explicit allowlist that never listed `daemon.pid` or the socket, so those
+ * runtime files were silently committed.
+ */
+const GITIGNORE_CONTENT = `# CodeGraph data files — local to each machine, not for committing.
+# Ignore everything in .codegraph/ except this file itself, so transient
+# files (the database, daemon.pid, sockets, logs) never show up in git.
+*
+!.gitignore
+`;
+
+/** Header line that prefixes every .gitignore CodeGraph has auto-generated. */
+const GITIGNORE_MARKER = '# CodeGraph data files';
+
+/**
+ * Is `content` a stale CodeGraph-generated `.gitignore` that should be
+ * regenerated in place? True when it carries our header but predates the
+ * wildcard ignore (it has no bare `*` line) — i.e. one of the old explicit
+ * allowlists (`*.db`, `cache/`, `.dirty`, …) that never ignored `daemon.pid`
+ * or the socket (issue #788). A file WITHOUT our header is user-authored and
+ * is left untouched; one that already has the wildcard is current. Matching
+ * on the header (not a byte-exact list of past defaults) heals every old
+ * variant — v0.7.x through 0.9.9 — and is idempotent once upgraded.
+ */
+function isStaleDefaultGitignore(content: string): boolean {
+  if (!content.trimStart().startsWith(GITIGNORE_MARKER)) return false;
+  return !content.split('\n').some((line) => line.trim() === '*');
+}
+
+/**
+ * Write `.codegraph/.gitignore` if it's absent, or upgrade a stale
+ * CodeGraph-generated default in place; a user-customized file is left alone.
+ * Best-effort — returns `false` only if a needed write failed.
+ */
+function ensureGitignore(gitignorePath: string): boolean {
+  let existing: string | null;
+  try {
+    existing = fs.readFileSync(gitignorePath, 'utf-8');
+  } catch {
+    existing = null; // absent (ENOENT) or unreadable — (re)create below
+  }
+  // Current default or a user-authored file: nothing to do.
+  if (existing !== null && !isStaleDefaultGitignore(existing)) return true;
+  try {
+    fs.writeFileSync(gitignorePath, GITIGNORE_CONTENT, 'utf-8');
+    return true;
+  } catch {
+    return false;
+  }
+}
+
 /**
  * Create the .codegraph directory structure
  * Note: Only throws if codegraph.db already exists, not just if .codegraph/ exists.
@@ -146,18 +201,9 @@ export function createDirectory(projectRoot: string): void {
   // Create main directory (if it doesn't exist)
   fs.mkdirSync(codegraphDir, { recursive: true });
 
-  // Create .gitignore inside .codegraph (if it doesn't exist)
-  const gitignorePath = path.join(codegraphDir, '.gitignore');
-  if (!fs.existsSync(gitignorePath)) {
-    const gitignoreContent = `# CodeGraph data files — local to each machine, not for committing.
-# Ignore everything in .codegraph/ except this file itself, so transient
-# files (the database, daemon.pid, sockets, logs) never show up in git.
-*
-!.gitignore
-`;
-
-    fs.writeFileSync(gitignorePath, gitignoreContent, 'utf-8');
-  }
+  // Write .gitignore inside .codegraph (create if absent, upgrade a stale
+  // pre-wildcard default left by an older version — issue #788).
+  ensureGitignore(path.join(codegraphDir, '.gitignore'));
 }
 
 /**
@@ -296,16 +342,15 @@ export function validateDirectory(projectRoot: string): {
     return { valid: false, errors };
   }
 
-  // Auto-repair missing .gitignore (non-critical file)
+  // Auto-repair / upgrade .gitignore (non-critical file). A missing one is
+  // recreated; a stale pre-wildcard default that never ignored daemon.pid is
+  // regenerated in place (issue #788); a user-authored file is left alone.
   const gitignorePath = path.join(codegraphDir, '.gitignore');
-  if (!fs.existsSync(gitignorePath)) {
-    try {
-      const gitignoreContent = `# CodeGraph data files — local to each machine, not for committing.\n# Ignore everything in .codegraph/ except this file itself, so transient\n# files (the database, daemon.pid, sockets, logs) never show up in git.\n*\n!.gitignore\n`;
-      fs.writeFileSync(gitignorePath, gitignoreContent, 'utf-8');
-    } catch {
-      // Non-fatal: warn but don't block
-      errors.push('.gitignore missing in .codegraph directory and could not be created');
-    }
+  const existedBefore = fs.existsSync(gitignorePath);
+  if (!ensureGitignore(gitignorePath) && !existedBefore) {
+    // Only a missing-and-uncreatable file is surfaced; a failed in-place
+    // upgrade of an existing file is non-fatal — the index still works.
+    errors.push('.gitignore missing in .codegraph directory and could not be created');
   }
 
   return {

From d0e649969a73d369ae4f0f41ff802da94cccdd95 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 11:25:13 -0500
Subject: [PATCH 32/51] fix(graph): treat class instantiation as a
 caller/callee edge (#774) (#804)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`callers <Class>` returned "No callers found" (or only the importing
file) even when a class's constructor was called from many sites, and
the instantiation sites were invisible — the opposite of what "what
breaks if I change this class?" should answer.

The `instantiates` edges already existed in the graph, correctly
attributed to the constructing function; they were simply excluded from
the caller/callee traversal, which queried only calls/references/imports.
Constructing a class is calling its constructor, so add `instantiates`
to the edge-kind set in both getCallers and getCallees (kept symmetric so
they stay inverses and `trace` can cross the instantiation boundary,
function -> class -> its methods). impact already traversed all edge
kinds, so it was unaffected.

Query-layer only — existing indexes benefit on upgrade with no re-index.
Verified on a Python fixture: `callers Supervisor` now returns the
construction sites (main/work/test_it), and a new graph test asserts
main() <-> DerivedClass via the instantiation. Full suite green.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md            |  1 +
 __tests__/graph.test.ts | 19 +++++++++++++++++++
 src/graph/traversal.ts  | 13 +++++++++++--
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f790c7c3..4aca757b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -58,6 +58,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - `codegraph affected` now reports the tests and files that actually depend on your changes. It used to follow only `import` statements — but those never cross file boundaries in CodeGraph's graph — so it returned **no affected tests for any change, in every language**. It now traces the real cross-file usage graph (calls, references, instantiations, and class `extends` / `implements`), so `git diff --name-only | codegraph affected` surfaces the test files that exercise the changed code. Circular-dependency detection, which had the same blind spot, now works too.
 - Blast radius, callers, and `codegraph affected` now recognize far more of the dependencies that were already in your code. A symbol now counts as a dependency whether it's called, used only in a type annotation inside a function body (`const items: Foo[] = []`), imported and placed in a registry array or passed as an argument, used as a JSX component, simply re-exported from a barrel (`export { X } from './x'`), or pulled in as a namespace (`import * as ns from '@/x'`) — including through tsconfig path aliases like `@/`. Previously only called, instantiated, or signature-typed symbols created a cross-file link, so a file that used a dependency in any other way could look like it depended on nothing — and the file that defined a widely-used symbol could look like nothing depended on it. The graph still indexes exactly the same symbols; it just connects the ones that were already there. (TypeScript/JavaScript)
 - The same completeness fix now applies to **Python**: a name brought in with `from module import X` is recorded as a dependency on that module even when `X` is only stored in a list/dict, passed as an argument, used as a decorator, or re-exported through an `__init__.py`. Previously Python linked only imports that were called or instantiated, so a module consumed purely by value — or only re-exported — looked like nothing depended on it.
+- `codegraph_callers` (and the `callers` command) now lists the places a class is **instantiated**, not just where it's imported. Constructing a class — `Foo(...)` in Python, `new Foo()` elsewhere — is calling its constructor, so asking who calls a class now returns the construction sites, and `codegraph_callees` / trace cross the instantiation the same way. Previously a class's instantiation sites were invisible to `callers`, so "what breaks if I change this class?" could come back empty even when the constructor was called from many places. Works on your existing index — no re-index needed. (#774)
 - Rust impact and `codegraph affected` now connect far more of the module graph. Struct literals (`Widget { n: 1 }`) are recorded as instantiations; a `use` / `pub use` brings its item into the dependency graph — so a `pub use` re-export hub (a `mod.rs` re-exporting its submodules) depends on the modules it re-exports — resolved by Rust module path (`crate::`/`self::`/`super::`), so a re-export of a common name like `read` links to the right module instead of a same-named symbol elsewhere; and trait dispatch reaches implementations — a struct whose methods cover a trait's is treated as implementing it, and a call through `&dyn Trait` resolves to the concrete method. Previously a Rust type linked only when called or used in a type position, so structs built by literal, modules surfaced only through `pub use`, and trait-only implementations looked like they had no dependents. (#584 for Rust traits)
 - Rust cross-module function calls now resolve to the right file. A call to a sibling submodule's function — `users::router()`, the common router-assembly / handler-registration pattern where `mod users;` makes `users` a child of the current module — is now resolved relative to the current module, not only the crate root. Deeper module-path calls (`database::profiles::find()` — the `db.run(|c| …)` data-access shape) now resolve too; these were being discarded before resolution even ran, because the path's leaf function name was never checked. Previously such a call linked to nothing, so a module reached only as `module::path::function()` looked like it had no dependents; a web app wired this way (Axum, Rocket, and similar) now surfaces its handler and data-access modules' real callers. (Rust)
 - Rocket route handlers now connect to where they're mounted. A handler registered in a `routes![a::b::handler, …]` or `catchers![…]` macro used to be invisible — the macro body is a raw token tree, so the handler looked like it had no caller (Rocket mounts it at runtime) and its file showed no dependents. The handler paths are now read out of the macro and linked to the `mount`/`register` call, so editing a Rocket handler surfaces its route registration and a routes module is no longer reported as unused. (Rust, Rocket)
diff --git a/__tests__/graph.test.ts b/__tests__/graph.test.ts
index 5ddbd028f..bc25942ac 100644
--- a/__tests__/graph.test.ts
+++ b/__tests__/graph.test.ts
@@ -293,6 +293,25 @@ export { main };
 
       expect(Array.isArray(callees)).toBe(true);
     });
+
+    it('treats class instantiation as a caller/callee of the class (#774)', () => {
+      // main() does `new DerivedClass(10, 'test')`. Constructing a class is
+      // calling its constructor, so main is a caller of DerivedClass and
+      // DerivedClass is a callee of main. Before #774 the `instantiates` edge
+      // was excluded from the caller/callee traversal, so `callers <Class>`
+      // returned the importing file (or nothing) and missed every
+      // construction site.
+      const derived = cg.getNodesByKind('class').find((n) => n.name === 'DerivedClass');
+      const main = cg.getNodesByKind('function').find((n) => n.name === 'main');
+      expect(derived).toBeDefined();
+      expect(main).toBeDefined();
+
+      const callerNames = cg.getCallers(derived!.id).map((c) => c.node.name);
+      expect(callerNames).toContain('main');
+
+      const calleeNames = cg.getCallees(main!.id).map((c) => c.node.name);
+      expect(calleeNames).toContain('DerivedClass');
+    });
   });
 
   describe('getImpactRadius()', () => {
diff --git a/src/graph/traversal.ts b/src/graph/traversal.ts
index 82fc208d3..c50b877fe 100644
--- a/src/graph/traversal.ts
+++ b/src/graph/traversal.ts
@@ -248,7 +248,12 @@ export class GraphTraverser {
     }
     visited.add(nodeId);
 
-    const incomingEdges = this.queries.getIncomingEdges(nodeId, ['calls', 'references', 'imports']);
+    // `instantiates` counts as a caller: constructing a class (`Foo(...)` /
+    // `new Foo()`) is calling its constructor, so the instantiation site is a
+    // caller of the class. Without it, `callers <Class>` surfaced only the
+    // importing file (via `imports`) and missed every construction site —
+    // the opposite of "what breaks if I change this class?" (#774).
+    const incomingEdges = this.queries.getIncomingEdges(nodeId, ['calls', 'references', 'imports', 'instantiates']);
     if (incomingEdges.length === 0) return;
 
     // Batch-fetch all caller nodes in one round-trip instead of one
@@ -293,7 +298,11 @@ export class GraphTraverser {
     }
     visited.add(nodeId);
 
-    const outgoingEdges = this.queries.getOutgoingEdges(nodeId, ['calls', 'references', 'imports']);
+    // Symmetric with getCallers: a function that constructs a class
+    // (`Foo(...)` / `new Foo()`) has that class as a callee, so callers and
+    // callees stay inverses of each other and `trace` can cross the
+    // instantiation boundary (function → class → its methods) (#774).
+    const outgoingEdges = this.queries.getOutgoingEdges(nodeId, ['calls', 'references', 'imports', 'instantiates']);
     if (outgoingEdges.length === 0) return;
 
     // Batch-fetch callee nodes (was N+1 — see getCallersRecursive note).

From 0b1a2eed97e54cb362d501cc93d9133f706c3e41 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 12:04:51 -0500
Subject: [PATCH 33/51] fix(mcp): treat a stdin 'error' as shutdown so the
 server can't orphan/spin (#799) (#805)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A stdio MCP server's lifeline is stdin: when the host/client goes away,
stdin should end and the server should exit. The server paths listened
for stdin 'end'/'close' but NOT 'error'.

That gap bites with a socket-backed stdin — the shape VS Code / Claude
Code use (a socketpair, not a pipe). On client death the socket can
surface as an 'error' (ECONNRESET/hangup) instead of a clean 'close'.
Unhandled, it escalated to the process-wide uncaughtException handler,
which logs and keeps running — so the server orphaned instead of
exiting. On Linux a POLLHUP socket fd left registered in epoll then
wakes the event loop continuously, pinning a core at 100% CPU; once the
main thread spins, the setInterval PPID watchdog can't even fire, so the
orphan runs forever (the report's 28+ minutes).

Add treatStdinFailureAsShutdown(): listen for 'error' as well as
'end'/'close', and DESTROY the stdin stream on any terminal event so the
fd leaves epoll and can't churn, then run the path's shutdown. Wired into
the live paths — startDirect, the local-handshake proxy, and
StdioTransport — plus the legacy pipe proxy. Fires once (re-entry guard).

Note: this is hardening for a class of failure that matches every piece
of the report's evidence (socket stdin, userspace main-thread spin, high
involuntary context switches, watchdog never firing), but the exact 100%
CPU spin could not be reproduced in Docker (Linux) across /dev/null EOF,
socket peer-death (RST/FIN), the reporter's 0.9.7 bundle, and the npx
chain — all exited cleanly — so the trigger is environment-specific.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                     |  1 +
 __tests__/stdin-teardown.test.ts | 46 ++++++++++++++++++++++++++++++++
 src/mcp/index.ts                 |  7 +++--
 src/mcp/proxy.ts                 | 18 ++++++++++---
 src/mcp/stdin-teardown.ts        | 46 ++++++++++++++++++++++++++++++++
 src/mcp/transport.ts             | 16 +++++++++--
 6 files changed, 126 insertions(+), 8 deletions(-)
 create mode 100644 __tests__/stdin-teardown.test.ts
 create mode 100644 src/mcp/stdin-teardown.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4aca757b7..c2a8dbc96 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - C++ method calls made through a singleton, factory, or chained getter now resolve to the correct class. A call like `Foo::instance().bar()`, `WidgetFactory::create().draw()`, `openSession()->run()`, or the same stored in an `auto` local first, used to lose the receiver's type — so when two classes had a same-named method the call silently attached to whichever was indexed first (or didn't resolve at all), corrupting callers, impact, and trace. CodeGraph now infers the receiver's type from what the inner call returns (capturing C++ return types for the first time) and creates the edge only when that class genuinely has the method, so a wrong guess produces no edge instead of a misleading one. Covers singletons and self-returning accessors, factories that return a different type, free-function factories, `make_unique` / `make_shared` / `new` / direct construction, and single-level member chains. Existing C/C++ indexes should be re-indexed (`codegraph index -f`) to benefit. Thanks @stabey. (#645) (C/C++)
 - The shared background server no longer logs a scary-looking `[error] … undefined` line on every session start. Attaching to the shared daemon is normal, healthy behavior, but the informational message was being surfaced by MCP hosts (Claude Code and others) as an error; it's now silent by default — set `CODEGRAPH_MCP_LOG_ATTACH=1` to surface it when debugging daemon attach. Thanks @mturac. (#618)
 - On Windows, CodeGraph's background processes no longer pile up without bound and saturate CPU over a long session. When the editor or agent that launched CodeGraph exited, its helper process couldn't tell its parent had gone — Windows reports process lineage differently than macOS and Linux — so the helper kept running, the shared background server never saw the client disconnect, and its idle timer never fired to shut it down. CodeGraph now detects parent-process exit directly on Windows, so helpers and the idle background server wind down promptly, the same as they already did on macOS and Linux. (#692, #576, #680)
+- The MCP server now shuts down cleanly when its editor/agent connection drops abruptly, instead of risking an orphaned process that pins a CPU core. Editors talk to a stdio MCP server over a socket; if that socket failed with an error rather than closing cleanly — which can happen when the editor window is reloaded or the launching process is killed — the server didn't treat it as a disconnect and could be left running. CodeGraph now treats any failure of its input stream as a shutdown signal and tears the stream down, so an orphaned server exits promptly. (#799)
 - The shared background server has two further safeguards against ever lingering: it now drops a client the moment it detects that client's process is gone (even if the disconnect arrived uncleanly — a force-quit or a dropped connection that never closed the socket), and it won't stay running indefinitely with clients attached but no activity. Together these guarantee it always winds down, on every platform. (#692)
 - A session no longer loses CodeGraph when the shared background server is restarted out from under it — for example when your MCP host (opencode and others) stops and restarts the server as you open another session. Previously the affected session's connection died silently and any request in flight at that moment hung; now CodeGraph keeps that session working by serving it locally, so the tools stay available without restarting the session. (#662)
 - React Native native→JS events now connect through the common `sendEvent(context, "X", body)` wrapper. Many libraries (react-native-device-info and others) wrap the event emitter behind a helper whose `.emit(eventName, …)` takes a *variable*, so the matcher — which looked for `.emit("literal", …)` — missed it; the literal event name actually lives in the wrapper call. Now a native method that fires `sendEvent(…, "batteryLevelChanged", …)` links to the JS `addListener('batteryLevelChanged', …)` handler, so editing the native emitter surfaces the JS subscriber. (React Native)
diff --git a/__tests__/stdin-teardown.test.ts b/__tests__/stdin-teardown.test.ts
new file mode 100644
index 000000000..c538ac5b2
--- /dev/null
+++ b/__tests__/stdin-teardown.test.ts
@@ -0,0 +1,46 @@
+/**
+ * #799 — a socket-backed stdin that fails must shut the server down, not
+ * orphan/busy-spin. treatStdinFailureAsShutdown is the shared guard.
+ */
+import { describe, it, expect } from 'vitest';
+import { PassThrough } from 'stream';
+import { treatStdinFailureAsShutdown } from '../src/mcp/stdin-teardown';
+
+describe('treatStdinFailureAsShutdown (#799)', () => {
+  it("treats a stdin 'error' (ECONNRESET/hangup) as a shutdown signal", () => {
+    const s = new PassThrough();
+    let calls = 0;
+    treatStdinFailureAsShutdown(() => { calls++; }, s);
+
+    // No extra 'error' listener would throw here — the guard registers one.
+    s.emit('error', new Error('read ECONNRESET'));
+    expect(calls).toBe(1);
+  });
+
+  it("also fires on 'end' and on 'close'", () => {
+    for (const ev of ['end', 'close'] as const) {
+      const s = new PassThrough();
+      let calls = 0;
+      treatStdinFailureAsShutdown(() => { calls++; }, s);
+      s.emit(ev);
+      expect(calls, `event ${ev}`).toBe(1);
+    }
+  });
+
+  it('destroys the stream so a hung fd leaves epoll', () => {
+    const s = new PassThrough();
+    treatStdinFailureAsShutdown(() => { /* noop */ }, s);
+    s.emit('error', new Error('boom'));
+    expect(s.destroyed).toBe(true);
+  });
+
+  it('fires onTerminal at most once, even across error → close', () => {
+    const s = new PassThrough();
+    let calls = 0;
+    treatStdinFailureAsShutdown(() => { calls++; }, s);
+    s.emit('error', new Error('boom')); // fire() also destroys → emits 'close'
+    s.emit('close');                    // must not double-fire
+    s.emit('end');
+    expect(calls).toBe(1);
+  });
+});
diff --git a/src/mcp/index.ts b/src/mcp/index.ts
index fa939dfbb..9007ba6e9 100644
--- a/src/mcp/index.ts
+++ b/src/mcp/index.ts
@@ -50,6 +50,7 @@ import {
 import { connectWithHello, runLocalHandshakeProxy } from './proxy';
 import { getDaemonSocketPath } from './daemon-paths';
 import { supervisionLostReason } from './ppid-watchdog';
+import { treatStdinFailureAsShutdown } from './stdin-teardown';
 import { HOST_PPID_ENV } from '../extraction/wasm-runtime-flags';
 
 /**
@@ -330,8 +331,10 @@ export class MCPServer {
     // Detect parent-process death — same logic as pre-refactor. When stdin
     // closes we go through StdioTransport's `process.exit(0)` already, but
     // SIGKILL of the parent doesn't reliably close stdin on Linux (#277).
-    process.stdin.on('end', () => this.stop());
-    process.stdin.on('close', () => this.stop());
+    // Also treat a stdin `'error'` (a socket-backed stdin can fail with
+    // ECONNRESET/hangup instead of a clean close) as shutdown, and destroy the
+    // stream so a hung fd can't busy-spin the event loop (#799).
+    treatStdinFailureAsShutdown(() => this.stop());
 
     this.mode = 'direct';
     this.installSignalHandlers();
diff --git a/src/mcp/proxy.ts b/src/mcp/proxy.ts
index d18649678..2efe25a48 100644
--- a/src/mcp/proxy.ts
+++ b/src/mcp/proxy.ts
@@ -23,6 +23,7 @@ import * as net from 'net';
 import { HOST_PPID_ENV } from '../extraction/wasm-runtime-flags';
 import { DaemonClientHello, DaemonHello, MAX_HELLO_LINE_BYTES } from './daemon';
 import { supervisionLostReason } from './ppid-watchdog';
+import { treatStdinFailureAsShutdown } from './stdin-teardown';
 import { CodeGraphPackageVersion } from './version';
 import { SERVER_INFO, PROTOCOL_VERSION } from './session';
 import { SERVER_INSTRUCTIONS } from './server-instructions';
@@ -298,8 +299,11 @@ export async function runLocalHandshakeProxy(deps: LocalHandshakeDeps): Promise<
       }
     }
   });
-  process.stdin.on('end', shutdown);
-  process.stdin.on('close', shutdown);
+  // Shut down when stdin ends/closes — and also on a stdin `'error'`, which a
+  // socket-backed stdin (the VS Code stdio shape) can emit on client death
+  // instead of a clean close; destroying the stream stops a hung fd from
+  // busy-spinning the event loop (#799).
+  treatStdinFailureAsShutdown(shutdown);
   startPpidWatchdogNoSocket(shutdown);
 
   // ---- daemon connection (background) ----
@@ -459,10 +463,16 @@ function pipeUntilClose(socket: net.Socket): Promise<void> {
       try { socket.end(); } catch { /* ignore */ }
       done();
     });
-    process.stdin.on('close', () => {
+    // 'close' and 'error' both tear down: a socket-backed stdin can fail with
+    // an 'error' (ECONNRESET/hangup) rather than a clean close; destroying it
+    // stops a hung fd from busy-spinning the event loop (#799).
+    const teardown = () => {
+      try { process.stdin.destroy(); } catch { /* ignore */ }
       try { socket.destroy(); } catch { /* ignore */ }
       done();
-    });
+    };
+    process.stdin.on('close', teardown);
+    process.stdin.on('error', teardown);
 
     socket.on('data', (chunk) => {
       try { process.stdout.write(chunk); } catch { /* ignore */ }
diff --git a/src/mcp/stdin-teardown.ts b/src/mcp/stdin-teardown.ts
new file mode 100644
index 000000000..1d60f7490
--- /dev/null
+++ b/src/mcp/stdin-teardown.ts
@@ -0,0 +1,46 @@
+/**
+ * Treat a stdin failure as a shutdown signal — issue #799.
+ *
+ * An MCP stdio server's lifeline is its stdin: when the host/client goes away,
+ * stdin should end and the server should exit. The server paths listened for
+ * `'end'` and `'close'` — but NOT `'error'`.
+ *
+ * That gap bites with a socket-backed stdin, which is the shape VS Code /
+ * Claude Code use (a socketpair, not a pipe). When the client dies, the socket
+ * can surface as an `'error'` (ECONNRESET / hangup) rather than a clean
+ * `'close'`. With no `'error'` listener, Node escalates it to the process-wide
+ * `uncaughtException` handler, which logs and keeps running — so the server
+ * orphans instead of exiting. Worse, on Linux a `POLLHUP` socket fd left
+ * registered in epoll wakes the event loop continuously, pinning a core at
+ * 100% CPU (the spin reported in #799); once the main thread spins, the
+ * `setInterval` PPID watchdog can't even fire, so the orphan runs forever.
+ *
+ * Fix: listen for `'error'` as well, and DESTROY the stdin stream on any
+ * terminal event so the fd leaves epoll and can't keep churning, then run the
+ * caller's shutdown. Fires `onTerminal` at most once — callers' shutdowns are
+ * already re-entry-guarded, but the single-shot guard also keeps `destroy()`'s
+ * follow-on `'close'` from re-invoking it.
+ *
+ * `stream` is injectable for tests; it defaults to `process.stdin`.
+ */
+export function treatStdinFailureAsShutdown(
+  onTerminal: () => void,
+  stream: NodeJS.ReadableStream = process.stdin
+): void {
+  let fired = false;
+  const fire = (): void => {
+    if (fired) return;
+    fired = true;
+    // Drop the fd from epoll so a hung/half-closed socket can't keep waking
+    // the loop. Best-effort: the stream may already be torn down.
+    try {
+      (stream as Partial<{ destroy(): void }>).destroy?.();
+    } catch {
+      /* already gone */
+    }
+    onTerminal();
+  };
+  stream.on('end', fire);
+  stream.on('close', fire);
+  stream.on('error', fire);
+}
diff --git a/src/mcp/transport.ts b/src/mcp/transport.ts
index aecc0368f..de1038be5 100644
--- a/src/mcp/transport.ts
+++ b/src/mcp/transport.ts
@@ -286,12 +286,24 @@ export class StdioTransport extends LineBasedJsonRpcTransport {
       await this.handleLine(line);
     });
 
-    this.rl.on('close', () => {
+    // readline 'close' fires on a clean stdin EOF. But a socket-backed stdin
+    // (the VS Code stdio shape) can fail with an 'error' (ECONNRESET/hangup)
+    // that readline doesn't surface as 'close' — unhandled, it escalated to
+    // the global uncaughtException handler (which keeps running), orphaning
+    // the server and, on Linux, busy-spinning a POLLHUP fd at 100% CPU. Treat
+    // 'error' as terminal too, and destroy stdin so the fd leaves epoll (#799).
+    let closed = false;
+    const onStreamEnd = (): void => {
+      if (closed) return;
+      closed = true;
+      try { process.stdin.destroy(); } catch { /* already gone */ }
       this.opts.onClose();
       if (this.opts.exitOnClose) {
         process.exit(0);
       }
-    });
+    };
+    this.rl.on('close', onStreamEnd);
+    process.stdin.on('error', onStreamEnd);
   }
 
   stop(): void {

From 0df9246752692ff3cbfe5ed09fa2fd05fb0825fe Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 12:38:22 -0500
Subject: [PATCH 34/51] fix(extraction): capture & clean docstrings across all
 README languages (#780) (#806)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(extraction): capture docstrings for export/const/decorator-wrapped symbols (#780)

getPrecedingDocstring walked previousNamedSibling from the EMITTED
declaration node, so it only found a leading comment when the comment was
a direct sibling of that node. For a declaration nested under a wrapper —
`export class X` / `export const f = () => {}` (export_statement /
lexical_declaration), a plain const arrow (variable_declarator), or a
decorated Python def/class (decorated_definition) — the comment is a
sibling of the WRAPPER, so the inner node had no preceding comment and
the docstring was stored as NULL.

Climb out through the wrapper node(s) before scanning for the comment.
Each wrapper holds exactly one declaration, so this can't mis-attribute a
comment to a sibling (verified: an uncommented method does NOT inherit its
class's comment). Also strip leading `#` from Python/Ruby/shell line
comments, which the cleanup chain missed (Python docstrings used to keep
their `#`).

Query/extraction-layer change to a parse helper; re-index to pick up
docstrings on already-indexed files. Verified on the reporter's JS/TS and
Python repros (8/8 now captured) plus over-walk controls; +3 tests.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(extraction): clean comment markers across all supported languages (#780)

Validating docstring capture across every README language surfaced that
the marker cleanup only knew C-style `//` and `/* */`, plus the `#` added
earlier this branch. Doc comments in other styles were captured but left
their markers in the stored text:

  - Rust/Swift/Kotlin doc lines `///` and `//!`  -> leading `/` / `!` leaked
  - Lua/Luau `--` and `--[[ ]]`                  -> not stripped
  - Pascal `{ }` and `(* *)`                     -> not stripped

Extract the cleanup into cleanCommentMarkers() and handle every style.
Paired block delimiters are stripped only when the comment OPENS with one,
so a line comment that happens to end with `}` / `*)` / `]]` is never
truncated; per-line markers stay anchored at line start.

Validated end-to-end (extract -> index -> codegraph_node output) across
all 19 tree-sitter code languages plus Svelte/Vue `<script>` blocks: every
one now stores and returns a clean docstring. +1 cross-language test.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                          |  1 +
 __tests__/extraction.test.ts          | 83 +++++++++++++++++++++++++++
 src/extraction/tree-sitter-helpers.ts | 70 ++++++++++++++++++----
 3 files changed, 142 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2a8dbc96..3743a10db 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Doc comments are now captured for exported, `const`-assigned, and decorated declarations, and the documentation a symbol carries is now clean across every supported language. Previously a comment above `export class X`, `export const fn = () => …`, a plain `const fn = () => …`, or a decorated Python `def`/`class` (`@app.route(...)`, `@dataclass`) was dropped entirely — only comments directly above a plain declaration were kept. CodeGraph now finds the comment through the `export` / `const` / decorator wrapper. Comment-marker cleanup was also rounded out for every language CodeGraph supports: Rust/Swift/Kotlin doc lines (`///`, `//!`), Python/Ruby/shell `#`, Lua/Luau (`--` and `--[[ ]]`), and Pascal (`{ }` and `(* *)`) no longer leave stray markers in the stored text — validated end-to-end across all 19 code languages plus Svelte/Vue `<script>` blocks. (#780). Thanks @caleb-kaiser.
 - Go method calls made through a chained factory function now resolve to the correct type. A call like `New().Method()` used to drop the receiver, so the chained method attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Go return types (a pointer `*Foo` resolves to `Foo`, and a multi-return `(*Foo, error)` to its first result), infers the chained receiver's type from what the factory function returns, and resolves the method on it — including methods promoted from an embedded struct — creating the edge only when the type or an embedded type genuinely has the method. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Go)
 - Scala method calls made through a companion-object factory, a fluent chain, or a case-class `apply` now resolve to the correct type. A call like `Foo.create().bar()` or `Builder(cfg).bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.flatMap` / `.foreach` onto your own same-named class. CodeGraph now captures Scala return types (a generic `List[Foo]` resolves to its container `List`, a qualified `pkg.Foo` to `Foo`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a trait the type extends — creating the edge only when that type or one of its traits genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Scala indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Scala)
 - Rust method calls made through a chained associated function now resolve to the correct type. A call like `Foo::new().bar()` or `Foo::with(cfg).build()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Rust return types (`-> Self` resolves to the implementing type), infers the chained receiver's type from what the associated function returns, and resolves the method on it — including methods provided by a trait the type implements (via the new `impl Trait for Type` relationships) — creating the edge only when the type or one of its traits genuinely has the method. Existing Rust indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Rust)
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index aae1d9e62..907b46658 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -184,6 +184,89 @@ export class PaymentService {
     expect(chargeMethod).toBeDefined();
   });
 
+  it('captures docstrings for export- and const-wrapped declarations (#780)', () => {
+    const code = `
+// plain class control
+class Ledger {}
+
+// exported class
+export class Invoice {}
+
+// export default
+export default function settle() { return true; }
+
+// exported arrow const
+export const refund = (amount: number) => amount;
+
+// non-export arrow const
+const audit = (amount: number) => amount;
+`;
+    const byName = new Map(extractFromSource('doc.ts', code).nodes.map((n) => [n.name, n]));
+    expect(byName.get('Ledger')?.docstring).toBe('plain class control'); // control still works
+    expect(byName.get('Invoice')?.docstring).toBe('exported class');
+    expect(byName.get('settle')?.docstring).toBe('export default');
+    expect(byName.get('refund')?.docstring).toBe('exported arrow const');
+    expect(byName.get('audit')?.docstring).toBe('non-export arrow const');
+  });
+
+  it('does not mis-attribute a class comment to an uncommented member (#780)', () => {
+    const code = `
+// Comment for Box
+export class Box {
+  noComment() {}
+  // own comment
+  withComment() {}
+}
+`;
+    const byName = new Map(extractFromSource('box.ts', code).nodes.map((n) => [n.name, n]));
+    expect(byName.get('Box')?.docstring).toBe('Comment for Box');
+    expect(byName.get('noComment')?.docstring ?? null).toBeNull(); // no over-walk
+    expect(byName.get('withComment')?.docstring).toBe('own comment');
+  });
+
+  it('captures docstrings for decorated Python declarations, stripping `#` (#780)', () => {
+    const code = [
+      '# decorated function',
+      '@app.route("/x")',
+      'def py_handler():',
+      '    return 1',
+      '',
+      '',
+      '# plain function control',
+      'def py_plain():',
+      '    return 1',
+      '',
+      '',
+      '# decorated class',
+      '@dataclass',
+      'class PyModel:',
+      '    pass',
+      '',
+    ].join('\n');
+    const byName = new Map(extractFromSource('mod.py', code).nodes.map((n) => [n.name, n]));
+    expect(byName.get('py_handler')?.docstring).toBe('decorated function');
+    expect(byName.get('py_plain')?.docstring).toBe('plain function control'); // `#` stripped
+    expect(byName.get('PyModel')?.docstring).toBe('decorated class');
+  });
+
+  it('cleans comment markers across language styles (#780)', () => {
+    const doc = (file: string, code: string, name: string) =>
+      new Map(extractFromSource(file, code).nodes.map((n) => [n.name, n])).get(name)?.docstring;
+
+    // Rust doc lines (`///`, `//!`) — the trailing slash used to leak through.
+    expect(doc('m.rs', '/// rust doc line\nfn rs_fn() {}', 'rs_fn')).toBe('rust doc line');
+    // Lua line + long-bracket comments.
+    expect(doc('m.lua', '-- lua line\nfunction lua_fn() end', 'lua_fn')).toBe('lua line');
+    expect(doc('b.lua', '--[[ lua block ]]\nfunction lua_b() end', 'lua_b')).toBe('lua block');
+    // Pascal brace and paren-star comments.
+    const pasUnit = (c: string) =>
+      `unit U;\ninterface\n${c}\nprocedure P;\nimplementation\nprocedure P;\nbegin\nend;\nend.\n`;
+    expect(doc('a.pas', pasUnit('{ pascal brace }'), 'P')).toBe('pascal brace');
+    expect(doc('c.pas', pasUnit('(* pascal paren *)'), 'P')).toBe('pascal paren');
+    // C block comment still clean (no regression).
+    expect(doc('m.c', '/* c block */\nvoid c_fn(void) {}', 'c_fn')).toBe('c block');
+  });
+
   it('should extract interfaces', () => {
     const code = `
 export interface User {
diff --git a/src/extraction/tree-sitter-helpers.ts b/src/extraction/tree-sitter-helpers.ts
index 0f2dd1bfd..4ffa59570 100644
--- a/src/extraction/tree-sitter-helpers.ts
+++ b/src/extraction/tree-sitter-helpers.ts
@@ -43,11 +43,66 @@ export function getChildByField(node: SyntaxNode, fieldName: string): SyntaxNode
   return node.childForFieldName(fieldName);
 }
 
+/**
+ * Node types that *wrap* a declaration so a leading comment is a sibling of the
+ * wrapper, not of the emitted (inner) declaration node. CodeGraph emits the
+ * inner node, so before looking for its preceding comment we climb out through
+ * these. Examples: `export class X {}` (export_statement), `@dec\ndef f()`
+ * (decorated_definition), `const f = () => {}` (lexical_declaration →
+ * variable_declarator). Each wraps exactly one declaration, so climbing can't
+ * mis-attribute a comment to a sibling. (#780)
+ */
+const DOCSTRING_WRAPPER_TYPES = new Set([
+  'export_statement', // JS/TS: export class/function/const ...
+  'decorated_definition', // Python: @decorator over def/class
+  'lexical_declaration', // JS/TS: const/let x = () => {}
+  'variable_declaration', // JS/TS: var x = ...
+  'variable_declarator', // JS/TS: the `x = () => {}` inside the declaration
+  'ambient_declaration', // TS: declare ...
+]);
+
+/**
+ * Strip comment-syntax markers from a raw comment so the stored docstring is
+ * just the prose. Covers the marker styles across every supported language:
+ * C-family line and block comments and their doc variants, Rust/Swift/Kotlin
+ * triple-slash and bang doc lines, hash lines (Python/Ruby/shell), Lua/Luau
+ * line and long-bracket comments, and Pascal brace and paren-star comments.
+ * (#780)
+ *
+ * Paired block delimiters are stripped only when the comment OPENS with one,
+ * so a line comment that merely happens to END with a closing delimiter is
+ * never truncated. The per-line markers are anchored at line start, so
+ * they're safe to apply to any comment.
+ */
+function cleanCommentMarkers(comment: string): string {
+  let c = comment.trim();
+  if (c.startsWith('/*')) c = c.replace(/^\/\*+!?/, '').replace(/\*+\/$/, '');
+  else if (c.startsWith('--[')) c = c.replace(/^--\[=*\[/, '').replace(/\]=*\]$/, '');
+  else if (c.startsWith('(*')) c = c.replace(/^\(\*/, '').replace(/\*\)$/, '');
+  else if (c.startsWith('{')) c = c.replace(/^\{/, '').replace(/\}$/, '');
+  return c
+    .replace(/^\/\/[/!]?\s?/gm, '') // // , and Rust/Swift doc lines /// //!
+    .replace(/^--\s?/gm, '') //        Lua/Luau line comments
+    .replace(/^#\s?/gm, '') //         Python/Ruby/shell line comments
+    .replace(/^\s*\*\s?/gm, '') //     block-comment continuation (* foo)
+    .trim();
+}
+
 /**
  * Get the docstring/comment preceding a node
  */
 export function getPrecedingDocstring(node: SyntaxNode, source: string): string | undefined {
-  let sibling = node.previousNamedSibling;
+  // Climb out of any wrapper(s) so a comment preceding the WHOLE construct
+  // (export-, decorator-, or const-arrow-wrapped) is reachable as a sibling.
+  // The emitted node's own `previousNamedSibling` is empty (export/const) or a
+  // decorator (Python) in those cases, so without this the docstring was
+  // dropped. (#780)
+  let anchor = node;
+  while (anchor.parent && DOCSTRING_WRAPPER_TYPES.has(anchor.parent.type)) {
+    anchor = anchor.parent;
+  }
+
+  let sibling = anchor.previousNamedSibling;
   const comments: string[] = [];
 
   while (sibling) {
@@ -66,15 +121,6 @@ export function getPrecedingDocstring(node: SyntaxNode, source: string): string
 
   if (comments.length === 0) return undefined;
 
-  // Clean up comment markers
-  return comments
-    .map((c) =>
-      c
-        .replace(/^\/\*\*?|\*\/$/g, '')
-        .replace(/^\/\/\s?/gm, '')
-        .replace(/^\s*\*\s?/gm, '')
-        .trim()
-    )
-    .join('\n')
-    .trim();
+  // Strip each comment's syntax markers (language-aware), then join.
+  return comments.map(cleanCommentMarkers).join('\n').trim();
 }

From 8a114ba53c0c0179ae9854aa1c4007158da57aaf Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 14:20:27 -0500
Subject: [PATCH 35/51] =?UTF-8?q?feat(extraction):=20capture=20function-as?=
 =?UTF-8?q?-value=20=E2=80=94=20callback=20registration=20sites=20in=20cal?=
 =?UTF-8?q?lers/impact=20(#756)=20(#807)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A function name used as a VALUE — passed as an argument
(signal(SIGINT, handler), qsort(..., compare)), assigned to a function
pointer or field (ops->recv_cb = my_cb, OnClick := Handler), or placed in
a struct initializer / handler table ({ .recv_cb = my_cb },
{ "get", getCommand }) — produced no edge in ANY of the 19 tree-sitter
languages, so registered callbacks looked dead and their registration
sites were invisible to callers/impact.

This adds table-driven function-as-value capture across all 19 languages
(plus the wrapper forms: &fn, &Cls::method, Java Class::m, Kotlin ::f,
Swift #selector, ObjC @selector, Ruby method(:sym), Scala eta, Pascal
@Handler), gated at extraction (same-file definitions + imported
bindings; C-family file-scope initializers are constant-expression
contexts and skip the gate, which is how redis-style cross-file command
tables resolve), and resolved by a dedicated strategy: function/method
targets only, same-file first, unique-or-drop cross-file, no fuzzy
fallback ever. Edges persist as kind 'references' with metadata.fnRef,
so getCallers/getImpactRadius surface them with zero graph-layer
changes; MCP callers/callees label them "via callback registration".

Precision rules bought by real-repo false positives (full A/B record in
docs/design/function-ref-capture.md): C++ is &-explicit outside
file-scope tables (fmt's begin/out/size collisions; out-of-line member
defs are function-kind); TS/JS/Python bare ids resolve to functions only
(TS class fields extract as method-kind — pre-existing quirk); Swift
refuses same-file method overload-families; param-forward shapes
(this.x = x, value: value) and destructuring are skipped; minified
bundles (*.min.js) produce no candidates.

Validated on 17 public OSS repos (redis, excalidraw, gin, bytes, okhttp,
okio, Alamofire, flask, sinatra, Newtonsoft.Json, scopt, provider,
busted, Fusion, AFNetworking, PascalCoin, fmt): node counts identical,
zero calls edges lost or gained, references strictly additive
(+3,200 registration edges total), precision spot-checked by reading
sampled source lines (redis 30/30, flask 8/8). Deliberately NOT covered:
indirect-dispatch resolution (o->cb(x) → impl) — that needs data-flow
through struct fields, and a wrong edge is worse than none.

EXTRACTION_VERSION 18 → 19 (re-index to benefit).

Closes #756

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                          |   1 +
 __tests__/function-ref.test.ts        | 498 ++++++++++++++++++++
 docs/design/function-ref-capture.md   | 188 ++++++++
 src/extraction/extraction-version.ts  |   2 +-
 src/extraction/function-ref.ts        | 644 ++++++++++++++++++++++++++
 src/extraction/generated-detection.ts |   3 +
 src/extraction/tree-sitter.ts         | 184 +++++++-
 src/mcp/server-instructions.ts        |   2 +-
 src/mcp/tools.ts                      |  35 +-
 src/resolution/index.ts               |  33 +-
 src/resolution/name-matcher.ts        | 116 ++++-
 src/resolution/types.ts               |   6 +-
 src/types.ts                          |  10 +-
 13 files changed, 1706 insertions(+), 16 deletions(-)
 create mode 100644 __tests__/function-ref.test.ts
 create mode 100644 docs/design/function-ref-capture.md
 create mode 100644 src/extraction/function-ref.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3743a10db..9b3739f4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- CodeGraph now sees where a function is **registered as a callback**, not just where it's called. A function name passed as an argument (`signal(SIGINT, handler)`, `qsort(…, compare)`, `addEventListener(…, onBlur)`), assigned to a function pointer or field (`ops->recv_cb = my_cb`, `OnClick := Handler`), or placed in a struct initializer or handler table (`{ .recv_cb = my_cb }`, `{ "get", getCommand }`) now produces a reference edge from the registration site to the function — so `codegraph_callers` and `codegraph_impact` surface callback wiring that previously looked like dead code. Works across all supported languages, including the language-specific forms: C/C++ `&fn`, Java `Class::method`, Kotlin `::fn`, Swift `#selector`, Objective-C `@selector`, Ruby `method(:fn)`, Scala eta-expansion, and Delphi/Pascal `@Handler` and `OnClick := Handler` event wiring. Callers output labels these "via callback registration". Resolution is deliberately conservative: an ambiguous name produces no edge rather than a wrong one. Re-index a project to benefit. Thanks @zmcrazy. (#756)
 - The `codegraph_node` MCP tool can now **read a whole source file like the built-in Read tool — only faster, served from the index**. Pass a file path with no symbol and it returns that file's current source with line numbers (the same `<n>⇥<line>` shape Read produces, so an assistant can edit straight from it), narrowable with `offset`/`limit` exactly like Read, plus a one-line note of which files depend on it (the file's blast radius). Use it anywhere you'd reach for Read on an indexed source file. Pass `symbolsOnly: true` for just the file's structure. Configuration/data files (`.yml` / `.properties`) are summarized by key only, never dumped, so secrets in them are never surfaced. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns the same bytes plus the blast radius, faster than re-reading the file.
 - New `codegraph upgrade` command updates CodeGraph to the latest release in place — it detects how you installed (the standalone `install.sh` / `install.ps1` bundle, npm, or npx) and does the right thing for each, on macOS, Linux, and Windows. Use `codegraph upgrade --check` to see whether an update is available without installing, or `codegraph upgrade <version>` to move to a specific version. After upgrading it reminds you to re-index your projects so they pick up the newer engine's improvements. (#679)
 - `codegraph status` now flags when a project's index was built by an older engine than the one you're running and recommends re-indexing (also surfaced in `codegraph status --json`), so you know when a `codegraph index -f` or `codegraph sync` will add coverage a newer release introduced.
diff --git a/__tests__/function-ref.test.ts b/__tests__/function-ref.test.ts
new file mode 100644
index 000000000..95d5b1385
--- /dev/null
+++ b/__tests__/function-ref.test.ts
@@ -0,0 +1,498 @@
+/**
+ * Function-as-value capture tests (#756) — registration-linking for callbacks.
+ *
+ * A function name used as a VALUE (passed as an argument, assigned to a
+ * field/function pointer, placed in a struct/object initializer or function
+ * table) must produce a `references` edge from the registration site to the
+ * function, so `callers`/`impact` surface where a callback is wired up.
+ *
+ * Safety properties verified here, per the dynamic-dispatch discipline
+ * ("a wrong edge is worse than none"):
+ *  - decoy: an ambiguous cross-file name (no import, ≥2 definitions) → NO edge
+ *  - same-file priority: a same-file definition beats a same-named decoy
+ *  - kind filter: a class/variable passed as a value never gets a
+ *    function-ref edge
+ *  - self: a function passing itself → no self-loop
+ *  - drain: all resolvable function_ref rows leave unresolved_refs (no
+ *    batched-resolver runaway), and re-index is idempotent
+ */
+
+import { describe, it, expect, beforeAll, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { CodeGraph } from '../src';
+import type { Edge } from '../src/types';
+import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';
+
+beforeAll(async () => {
+  await initGrammars();
+  await loadAllGrammars();
+});
+
+/** Incoming edges to `name`'s node that came from function-as-value capture. */
+function fnRefEdgesInto(cg: CodeGraph, name: string): Edge[] {
+  const targets = cg.getNodesByName(name);
+  const edges: Edge[] = [];
+  for (const t of targets) {
+    for (const e of cg.getIncomingEdges(t.id)) {
+      if (e.kind === 'references' && e.metadata?.fnRef === true) {
+        edges.push(e);
+      }
+    }
+  }
+  return edges;
+}
+
+/** Names of the source nodes of the given edges, sorted. */
+function sourceNames(cg: CodeGraph, edges: Edge[]): string[] {
+  const names: string[] = [];
+  for (const e of edges) {
+    const n = cg.getNode(e.source);
+    if (n) names.push(n.name);
+  }
+  return names.sort();
+}
+
+describe('Function-as-value capture (#756)', () => {
+  let tmpDir: string | undefined;
+  afterEach(() => {
+    if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+    tmpDir = undefined;
+  });
+
+  it('C: registration sites produce references edges (the #756 scenario)', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-c-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'driver.c'),
+      [
+        'struct ops { void (*recv_cb)(int); void (*send_cb)(int); };',
+        'typedef void (*cb_t)(int);',
+        '',
+        'static void my_recv_cb(int x) { (void)x; }',
+        'static void my_send_cb(int x) { (void)x; }',
+        '',
+        'void register_handler(void (*cb)(int)) { cb(1); }',
+        '',
+        'void direct_caller(void) { my_recv_cb(5); }',
+        '',
+        'void arg_registrar(void) { register_handler(my_recv_cb); }',
+        'void addr_registrar(void) { register_handler(&my_recv_cb); }',
+        'void assign_registrar(struct ops *o) { o->recv_cb = my_recv_cb; }',
+        '',
+        'static struct ops global_ops = { .recv_cb = my_recv_cb, .send_cb = my_send_cb };',
+        'static cb_t cb_table[] = { my_recv_cb, my_send_cb };',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      const intoRecv = fnRefEdgesInto(cg, 'my_recv_cb');
+      expect(sourceNames(cg, intoRecv)).toEqual([
+        'addr_registrar',
+        'arg_registrar',
+        'assign_registrar',
+        'driver.c', // file-scope: designated init + positional table (deduped per source)
+      ]);
+
+      // The direct call is still a `calls` edge — unchanged by this feature.
+      const recv = cg.getNodesByName('my_recv_cb')[0]!;
+      const callEdges = cg
+        .getIncomingEdges(recv.id)
+        .filter((e) => e.kind === 'calls');
+      expect(sourceNames(cg, callEdges)).toEqual(['direct_caller']);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('TypeScript: arg / object / array / member / assignment forms', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-ts-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.ts'),
+      [
+        'export function targetCb(x: number): void { console.log(x); }',
+        'function registerHandler(cb: (x: number) => void): void { cb(1); }',
+        '',
+        'export function argRegistrar(): void { registerHandler(targetCb); }',
+        'export function timerRegistrar(): void { setTimeout(targetCb, 100); }',
+        'export function objRegistrar(): unknown { return { recv: targetCb }; }',
+        'export function arrRegistrar(): unknown { return [targetCb]; }',
+        '',
+        'class Emitter { cb: ((x: number) => void) | null = null; }',
+        'export function assignRegistrar(e: Emitter): void { e.cb = targetCb; }',
+        '',
+        'interface Btn { on(ev: string, cb: () => void): void; }',
+        'export class Comp {',
+        '  handleClick(): void {}',
+        '  wire(btn: Btn): void { btn.on("click", this.handleClick); }',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'targetCb'))).toEqual([
+        'argRegistrar',
+        'arrRegistrar',
+        'assignRegistrar',
+        'objRegistrar',
+        'timerRegistrar',
+      ]);
+      // `this.handleClick` is deliberately NOT captured in TS/JS: class fields
+      // extract as method-kind nodes, so `this.X` value positions (mostly data
+      // reads in real code) produced wrong edges — see TS_JS_SPEC note.
+      expect(fnRefEdgesInto(cg, 'handleClick')).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('resolves an imported callback across files via its import', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-import-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'handlers.ts'),
+      'export function onMessage(x: number): void { console.log(x); }\n'
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'wiring.ts'),
+      [
+        "import { onMessage } from './handlers';",
+        'export function wire(bus: { on(cb: (x: number) => void): void }): void {',
+        '  bus.on(onMessage);',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const edges = fnRefEdgesInto(cg, 'onMessage');
+      expect(sourceNames(cg, edges)).toContain('wire');
+      // The edge must target the handlers.ts definition.
+      const target = cg.getNode(edges[0]!.target);
+      expect(target?.filePath.endsWith('handlers.ts')).toBe(true);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('DECOY: ambiguous cross-file name without an import resolves to NO edge', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-decoy-'));
+    // Two same-named functions in different files…
+    fs.writeFileSync(path.join(tmpDir, 'a.ts'), 'export function process(x: number): void {}\n');
+    fs.writeFileSync(path.join(tmpDir, 'b.ts'), 'export function process(x: number): void {}\n');
+    // …and a registrar that names `process` WITHOUT importing it. The name
+    // still passes the extraction gate only if imported/defined here — it is
+    // neither, so this asserts the gate; even if it leaked through, the
+    // ambiguity rule (unique-only cross-file) must yield no edge.
+    fs.writeFileSync(
+      path.join(tmpDir, 'c.ts'),
+      'export function wire(bus: { on(cb: unknown): void }, process: unknown): void { bus.on(process); }\n'
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const edges = fnRefEdgesInto(cg, 'process');
+      expect(sourceNames(cg, edges)).not.toContain('wire');
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('SAME-FILE PRIORITY: a same-file definition beats a same-named decoy elsewhere', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-samefile-'));
+    fs.writeFileSync(path.join(tmpDir, 'decoy.c'), 'void my_cb(int x) { (void)x; }\n');
+    fs.writeFileSync(
+      path.join(tmpDir, 'real.c'),
+      [
+        'static void my_cb(int x) { (void)x; }',
+        'void register_handler(void (*cb)(int)) { cb(1); }',
+        'void wire(void) { register_handler(my_cb); }',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const wires = fnRefEdgesInto(cg, 'my_cb').filter((e) => {
+        const src = cg.getNode(e.source);
+        return src?.name === 'wire';
+      });
+      expect(wires).toHaveLength(1);
+      const target = cg.getNode(wires[0]!.target);
+      expect(target?.filePath.endsWith('real.c')).toBe(true);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('KIND FILTER: a class passed as a value gets no function-ref edge', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-kind-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.ts'),
+      [
+        'export class Strategy { run(): void {} }',
+        'export function consume(x: unknown): void { void x; }',
+        'export function wire(): void { consume(Strategy); }',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const strategy = cg.getNodesByName('Strategy').find((n) => n.kind === 'class')!;
+      const fnRef = cg
+        .getIncomingEdges(strategy.id)
+        .filter((e) => e.metadata?.fnRef === true);
+      expect(fnRef).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('SELF: a function registering itself produces no self-loop', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-self-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.ts'),
+      [
+        'declare function schedule(cb: () => void): void;',
+        'export function retry(): void { schedule(retry); }',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const retry = cg.getNodesByName('retry')[0]!;
+      const selfLoops = cg
+        .getIncomingEdges(retry.id)
+        .filter((e) => e.source === retry.id && e.metadata?.fnRef === true);
+      expect(selfLoops).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('C++: &Cls::method member pointers resolve scoped; bare ids are free-function-only', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-cpp-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'widget.cpp'),
+      [
+        'struct Widget {',
+        '  void on_click(int x);',
+        '};',
+        'void Widget::on_click(int x) { (void)x; }',
+        'struct Decoy {',
+        '  void on_click(int x);',
+        '};',
+        'void Decoy::on_click(int x) { (void)x; }',
+        'void free_cb(int x) { (void)x; }',
+        'void bare_fn(int x) { (void)x; }',
+        'void reg(void* p) { (void)p; }',
+        'void wire() {',
+        '  auto p = &Widget::on_click;', // qualified — must hit Widget, not Decoy
+        '  reg(p);',
+        '  reg(&free_cb);', // explicit address-of — captured
+        '  reg(bare_fn);', // bare id in args — NOT captured for C++ (addressOfOnly)
+        '}',
+        // A method named like a local: passing the LOCAL must not resolve to
+        // the method (cpp args accept only explicit & forms).
+        'struct Buf { char* out(); };',
+        'void copy_to(void* out_) { (void)out_; }',
+        'void caller(char* out) { copy_to(out); }',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      // Qualified member pointer resolves to Widget::on_click specifically.
+      const onClicks = cg.getNodesByName('on_click');
+      const widgetOnClick = onClicks.find((n) => n.qualifiedName.includes('Widget'))!;
+      const decoyOnClick = onClicks.find((n) => n.qualifiedName.includes('Decoy'))!;
+      const intoWidget = cg
+        .getIncomingEdges(widgetOnClick.id)
+        .filter((e) => e.metadata?.fnRef === true);
+      expect(intoWidget).toHaveLength(1);
+      expect(cg.getNode(intoWidget[0]!.source)?.name).toBe('wire');
+      expect(
+        cg.getIncomingEdges(decoyOnClick.id).filter((e) => e.metadata?.fnRef === true)
+      ).toHaveLength(0);
+
+      // Explicit &fn resolves; bare identifier in C++ args does NOT (the
+      // generic-name collision class: fmt's `begin`/`out`/`size` params).
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'free_cb'))).toContain('wire');
+      expect(fnRefEdgesInto(cg, 'bare_fn')).toHaveLength(0);
+
+      // The local `out` param must NOT produce an edge to Buf::out.
+      const outMethod = cg.getNodesByName('out').find((n) => n.kind === 'method');
+      if (outMethod) {
+        expect(
+          cg.getIncomingEdges(outMethod.id).filter((e) => e.metadata?.fnRef === true)
+        ).toHaveLength(0);
+      }
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('Pascal: := event wiring, @addr and bare args', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-pas-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.pas'),
+      [
+        'unit Main;',
+        'interface',
+        'type',
+        '  TCallback = procedure(X: Integer);',
+        '  THolder = class',
+        '  public',
+        '    OnFire: TCallback;',
+        '    procedure Wire;',
+        '  end;',
+        'procedure TargetCb(X: Integer);',
+        'procedure RegisterHandler(Cb: TCallback);',
+        'procedure ArgRegistrar;',
+        'procedure AddrRegistrar;',
+        'implementation',
+        'procedure TargetCb(X: Integer);',
+        'begin',
+        '  WriteLn(X);',
+        'end;',
+        'procedure RegisterHandler(Cb: TCallback);',
+        'begin',
+        '  Cb(1);',
+        'end;',
+        'procedure ArgRegistrar;',
+        'begin',
+        '  RegisterHandler(TargetCb);',
+        'end;',
+        'procedure AddrRegistrar;',
+        'begin',
+        '  RegisterHandler(@TargetCb);',
+        'end;',
+        'procedure THolder.Wire;',
+        'begin',
+        '  OnFire := TargetCb;',
+        'end;',
+        'end.',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'TargetCb'))).toEqual([
+        'AddrRegistrar',
+        'ArgRegistrar',
+        'Wire',
+      ]);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('C UNGATED TABLES: a command table names handlers defined in OTHER files (redis pattern)', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-ctable-'));
+    // Handler defined in its own file…
+    fs.writeFileSync(path.join(tmpDir, 't_string.c'), 'void getCommand(int c) { (void)c; }\n');
+    // …and registered in a table in ANOTHER file, with no import mechanism (C).
+    fs.writeFileSync(
+      path.join(tmpDir, 'server.c'),
+      [
+        'struct cmd { const char *name; void (*proc)(int); };',
+        'static struct cmd commandTable[] = {',
+        '  { "get", getCommand },',
+        '};',
+      ].join('\n')
+    );
+    // Ambiguity safety: two files define dupCmd; a third table references it →
+    // NO edge (unique-or-drop).
+    fs.writeFileSync(path.join(tmpDir, 'dup_a.c'), 'void dupCmd(int c) { (void)c; }\n');
+    fs.writeFileSync(path.join(tmpDir, 'dup_b.c'), 'void dupCmd(int c) { (void)c; }\n');
+    fs.writeFileSync(
+      path.join(tmpDir, 'other.c'),
+      [
+        'struct cmd2 { void (*proc)(int); };',
+        'static struct cmd2 otherTable[] = { { dupCmd } };',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      // Cross-file unique handler resolves from the table's file.
+      const intoGet = fnRefEdgesInto(cg, 'getCommand');
+      expect(sourceNames(cg, intoGet)).toEqual(['server.c']);
+      const target = cg.getNode(intoGet[0]!.target);
+      expect(target?.filePath.endsWith('t_string.c')).toBe(true);
+
+      // Ambiguous handler resolves to NOTHING — silent beats wrong.
+      expect(fnRefEdgesInto(cg, 'dupCmd')).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('DRAIN: resolvable function_ref rows leave unresolved_refs; re-index is stable', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-drain-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.c'),
+      [
+        'static void cb_a(int x) { (void)x; }',
+        'void reg(void (*cb)(int)) { cb(1); }',
+        'void wire(void) { reg(cb_a); }',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const stats1 = cg.getStats();
+
+      // No function_ref rows may linger for resolvable names — the batched
+      // resolver must have drained them (delete keyed on the ORIGINAL stored
+      // ref; the #760 runaway came from violating that).
+      const db = (cg as unknown as { db: { prepare(sql: string): { all(): unknown[] } } }).db;
+      let leftover: unknown[] = [];
+      try {
+        leftover = db
+          .prepare("SELECT * FROM unresolved_refs WHERE reference_kind = 'function_ref'")
+          .all();
+      } catch {
+        // If internals aren't reachable this guard is covered by the edge
+        // assertions below.
+      }
+      expect(leftover).toHaveLength(0);
+
+      // Re-index: identical node/edge counts (idempotent, no accumulation).
+      await cg.indexAll();
+      const stats2 = cg.getStats();
+      expect(stats2.totalNodes).toBe(stats1.totalNodes);
+      expect(stats2.totalEdges).toBe(stats1.totalEdges);
+
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'cb_a'))).toEqual(['wire']);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+});
diff --git a/docs/design/function-ref-capture.md b/docs/design/function-ref-capture.md
new file mode 100644
index 000000000..762bdfef1
--- /dev/null
+++ b/docs/design/function-ref-capture.md
@@ -0,0 +1,188 @@
+# Function-as-value capture (#756) — registration-linking for callbacks
+
+**Problem.** A function used as a *value* — passed as an argument, assigned to a
+function pointer or field, placed in a struct initializer or handler table —
+produced **no edge** in any of the 19 tree-sitter languages (probed 2026-06-11;
+0/19). `callers(my_recv_cb)` on a C callback showed nothing but direct calls, so
+every registered callback looked dead, and the registration sites — the agent's
+actual next question ("where is this wired up?") — were invisible.
+
+**Non-goal, deliberate.** Resolving the *dispatch* (`o->cb(x)` → the concrete
+registered function) needs data-flow through struct fields; even an LSP needs
+fallbacks there (see the #756 thread). Partial coverage is worse than none and
+a wrong edge is worse than silence — dispatch resolution stays uncovered. What
+ships is the *registration* side, which is deterministic: the function's name
+is literally in the source at the registration site.
+
+## Mechanism
+
+```
+capture (tree-sitter.ts walkers, table-driven per language: src/extraction/function-ref.ts)
+   → gate (flushFnRefCandidates: same-file fn/method name ∪ imported binding names;
+            C-family file-scope initializers skip the gate — see below)
+   → unresolved ref, referenceKind 'function_ref' (internal-only kind)
+   → resolution (resolveOne branch: resolveViaImport first, then matchFunctionRef —
+                 exact name, function/method kinds only, same-family, same-file first,
+                 cross-file only when UNIQUE, never fuzzy)
+   → edge kind 'references', metadata { fnRef: true, resolvedBy, confidence }
+```
+
+`getCallers`/`getCallees`/`getImpactRadius` already traverse `references`, so
+registration sites surface with no graph-layer changes. The MCP callers/callees
+lists label them "via callback registration".
+
+Capture fires from three walkers (a node is only ever visited by one):
+`visitNode` (file/class scope), `visitForCallsAndStructure` (function bodies),
+`visitPascalBlock` (Pascal bodies). Subtrees the walkers consume without
+descending (top-level variable initializers, class field/property initializers,
+custom `visitNode` hooks like Scala's val/var handler) get a candidates-only
+`scanFnRefSubtree` that halts at nested function boundaries.
+
+## Per-language value positions (probe-verified)
+
+| Language | arg | assign RHS | keyed init | list/table | wrapper forms |
+|---|---|---|---|---|---|
+| C / ObjC | `argument_list` | `assignment_expression.right` | `initializer_pair.value` | `initializer_list`, `init_declarator.value` | `&fn` (`pointer_expression`), `@selector(...)` (ObjC) |
+| C++ | **`&` forms only** in args/rhs/varinit | (same — explicit `&` only) | bare ids at FILE scope only | bare ids at FILE scope only | `&fn`, `&Cls::method` (resolved scoped to the class) |
+| TS / JS (tsx/jsx) | `arguments` | `assignment_expression.right` | `pair.value` | `array`, `variable_declarator.value` | — (see TS notes) |
+| Python | `argument_list`, `keyword_argument.value` | `assignment.right` | `pair.value` | `list` | `self.method` (`attribute`) |
+| Go | `argument_list` | `assignment_statement` / `short_var_declaration` (`expression_list`) | `keyed_element` | `literal_value`, `var_spec.value` | — |
+| Rust | `arguments` | `assignment_expression.right` | `field_initializer.value` | `array_expression`, `static_item` / `let_declaration.value` | — |
+| Java | `argument_list` | `assignment_expression.right` | — | `variable_declarator.value` | `method_reference` (`Cls::m`, `this::m`) — the only form |
+| Kotlin | `value_arguments` | `assignment` (last child) | — | — | `callable_reference` (`::f`), `navigation_expression` `this::m` |
+| C# | `argument_list` (`argument`) | `assignment_expression.right` (incl. `+=`) | — | `initializer_expression`, `variable_declarator` | `this.M` (`member_access_expression`; vendored grammar keeps `this` anonymous — handled) |
+| Ruby | `argument_list` | — | `pair.value` | — | only `method(:sym)` / `&method(:sym)` — bare ids are calls/locals in Ruby |
+| Swift | `value_arguments` (`value_argument.value`) | `assignment.result` | (labeled ctor args = args) | `array_literal`, `property_declaration.value` | `#selector(...)` |
+| Scala | `arguments` | `assignment_expression.right` | — | `val_definition.value` (via hook scan) | eta `fn _` (`postfix_expression`) |
+| Dart | `arguments` (`argument`) | `assignment_expression.right` | `pair.value` | `list_literal`, `static_final_declaration` | — |
+| Lua / Luau | `arguments` | `assignment_statement` (`expression_list.value`) | `field.value` (keyed + positional) | (same) | — |
+| Pascal | `exprArgs` (via `visitPascalBlock`) | `assignment.rhs` (`OnFire := Handler`) | — | — | `@Handler` (`exprUnary.operand`) |
+| PHP | **skipped** | — | — | — | first-class callable `fn(...)` already extracts as a `calls` edge; string callables are a precision risk, deferred |
+
+## Precision rules (each one bought by a real-repo false positive)
+
+1. **The gate** (extraction-time): a candidate survives only if its name matches
+   a same-file function/method or an **imported binding** (`referenceKind ===
+   'imports'` only — scraping type-annotation `references` names let locals that
+   shared a type-member's name through; excalidraw).
+2. **C-family ungated file scope**: C has no symbol imports and registers
+   callbacks cross-file at repo scale (redis `server.c`'s command table names
+   handlers from `t_*.c`). File-scope initializer positions (`value`/`list`
+   modes) skip the gate — safe because a C file-scope initializer is a
+   **constant-expression context**: a bare identifier there can only be a
+   function address (enum/macro names get dropped by the kind filter). Local
+   initializers and assignments stay gated: `prev = next`, `*str = field`,
+   `arena_ind_prev = arena_ind` (redis/jemalloc) each matched a unique
+   same-named function somewhere and produced wrong edges when `rhs`/`varinit`
+   were ungated.
+3. **TS/JS/Python: bare ids resolve to `function` kind only.** A bare
+   identifier can never be a method value in these languages (methods need a
+   receiver — `this.m` / `self.m`), and TS class FIELDS are extracted as
+   method-kind nodes (pre-existing extractor quirk), so allowing method
+   targets soaked up locals passed as arguments
+   (`new Set(selectedPointsIndices)` → a same-named "method" field;
+   docopt.py's `name`/`match` params). For the same reason `this.X` capture
+   is disabled for TS/JS — in real code `this.X` value positions are mostly
+   data reads (`setCursor(this.canvas)`). Python's `self.m` form keeps method
+   targets through its own capture shape. C#/Swift/Dart/Java/Kotlin keep
+   method targets (method groups, implicit-self, method references are real
+   method values).
+4. **C++ is `&`-explicit** (`addressOfOnly`): bare identifiers qualify only in
+   FILE-scope initializer tables; everywhere else (args, assignments, local
+   braced-init lists `{begin, size}`) only `&fn` / `&Cls::method` count.
+   C++ codebases are dense with generic free-function names (`begin`, `end`,
+   `out`, `size`, `data`) colliding with locals, and OUT-OF-LINE member
+   definitions extract as *function*-kind nodes, defeating the kind filter —
+   bare-id matching on fmt was mostly wrong edges (72 generic-name + 105
+   member/macro mismatches → after the rule: 22 edges, ~20 genuine gtest
+   member-pointer wirings). `&x` vs `*x` share C's `pointer_expression`; only
+   the `&` operator qualifies. `&Cls::method` resolves SCOPED to that class.
+5. **Swift overload-family refusal**: several same-named METHODS in one file
+   (`Session.request(...)` × N) + a bare identifier = almost always a
+   same-named parameter, not a method value (Alamofire) — refuse rather than
+   guess. A unique method (SwiftUI `action: handleTap`) still resolves.
+6. **Param-forward skips**: `this.status = status` / `o->cb = cb` (assignment
+   whose member name equals the RHS identifier) and Swift/Kotlin labeled args
+   `value: value` — a forwarded local/parameter whose function value is
+   unknowable; a same-named function elsewhere would be the WRONG target.
+7. **Destructuring skip**: `const { center } = ellipse` extracts data, never a
+   function alias.
+8. **Generated/minified files** (`*.min.js` and the codegen patterns in
+   `generated-detection.ts`) produce no fn-ref candidates — minified
+   single-letter symbols resolve everywhere (Alamofire's vendored jquery).
+9. **Resolution**: function/method kinds only, same language family, never the
+   ref's own node (no self-loops), same-file match first, cross-file only when
+   the name is UNIQUE — ambiguity yields **no edge**. No fuzzy fallback,
+   ever (`matchReference` short-circuits `function_ref` refs to
+   `matchFunctionRef`).
+10. **Runaway invariant** (#760): `matchFunctionRef` always returns
+    `original: ref` — the stored row — so `deleteSpecificResolvedReferences`
+    drains the batch.
+
+## Validation (2026-06-11, EXTRACTION_VERSION 19)
+
+Stash-free A/B (baseline = worktree at `main`), fresh shallow clones, public
+OSS only. Per repo: node count must be identical, `calls` edges identical,
+`references` strictly additive, precision spot-checked by reading the source
+line of sampled `fnRef` edges.
+
+Final build, all 17 repos (nodes identical and calls edges untouched on every
+row; `unresolved_refs` fully drained — no batched-resolver runaway):
+
+| Lang | Repo | Nodes (base=fix) | calls Δ | refs gained | Notes |
+|---|---|---|---|---|---|
+| C | redis | 18931 | 0/0 | **+1918** | 30/30 sample genuine — ops tables, qsort comparators, module registration, lua lib tables |
+| TS/React | excalidraw | 10299 | 0/0 | **+121** | 18/20 — residual = param shadowing an imported function (file-level dep real) |
+| Go | gin | 2599 | 0/0 | +14 | |
+| Rust | bytes | 947 | 0/0 | +76 | `map(fn)`, struct init |
+| Java | okhttp | 16008 | 0/0 | +2 | method-ref forms only, by design |
+| Kotlin | okio | 7801 | 0/0 | +1 | `::fn` forms only, by design |
+| Swift | alamofire | 3477 | 0/0 | +116 | adversarial case (params mirror API names); overload-family + label==name rules applied |
+| Python | flask | 2705 | 0/0 | +111 | 8/8 sample genuine — incl. `ensure_sync(self.dispatch_request)` |
+| Ruby | sinatra | 1751 | 0/0 | +8 | `method(:sym)` only |
+| C# | newtonsoft | 20208 | 0/0 | +38 | method groups, `+=` |
+| Scala | scopt | 694 | 0/0 | +10 | eta-expansion |
+| Dart | provider | 1154 | 0/0 | +73 | implicit-this getter reads — true same-class dependencies |
+| Lua | busted | 1257 | 0/0 | +14 | |
+| Luau | fusion | 2126 | 0/0 | +18 | `:Connect(fn)` |
+| ObjC | afnetworking | 1487 | 0/0 | +52 | `@selector`, target-action |
+| Pascal | pascalcoin | 48788 | 0/0 | +577 | `OnClick :=` event wiring + paren-less-call refs (see limits) |
+| C++ | fmt | 7345 | 0/0 | +22 | ~20/22 genuine gtest member-pointer plumbing after addressOfOnly |
+
+Index cost on redis: +6% time, +5% db size.
+
+## Known limits (documented, deliberate)
+
+- **Dispatch resolution** (`o->cb(x)` → implementations): uncovered, see above.
+- **C cross-file in gated positions**: an extern callback registered via
+  *assignment* in a different file than its definition only resolves when the
+  name is repo-unique (initializer tables don't have this limit — they're
+  ungated at file scope).
+- **C++ bare-name registration** (`register_handler(my_cb)` without `&`):
+  dropped by `addressOfOnly` — the generic-name collision rate made bare ids
+  net-negative on real C++ (fmt). `&my_cb` / file-scope tables cover the
+  idioms; C files keep bare args.
+- **Local/param shadowing an imported or same-file function**
+  (`mutateElement(newElement, …)` where the file also imports `newElement`;
+  JS plugins' `indexOf(val)` with a same-file `val()` helper): irreducible
+  without local-scope tracking — the data-flow frontier deliberately left
+  uncovered. ~1-2 per 20 sampled edges on callback-heavy repos; the file-level
+  dependency is real in every observed case.
+- **Swift single same-named method collisions** (`request(self, didFailTask:
+  task…)` where one `task` method exists): the overload-family rule only
+  refuses when ≥2 same-named methods share the file. Alamofire-style
+  API-mirrored param naming keeps a residual; needs same-type scoping (v2).
+- **Pascal paren-less calls** (`Result := DoInitialize`): captured as
+  references (Pascal can't distinguish a procedure VALUE from a paren-less
+  CALL without types). The dependency direction is correct and these calls
+  were previously invisible entirely (#791) — strictly more truth, imperfect
+  label.
+- **Java/Kotlin cross-file method refs** (`OtherClass::method` without the
+  defining class imported as a simple name): gated away; same-file and
+  `this::m` forms work.
+- **Swift cross-file bare references**: Swift sees module-wide symbols without
+  imports, so cross-file bare callbacks only resolve when repo-unique.
+- **PHP string callables**, **Ruby bare symbols** outside `method(:sym)`,
+  **`obj.method` member values** where `obj` isn't `this`/`self`: deferred.
+- **TS `this.X`**: disabled until TS class-field kind classification is fixed
+  (fields currently extract as method-kind nodes).
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 7b2df06d4..4cfcfa6b7 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 18;
+export const EXTRACTION_VERSION = 19;
diff --git a/src/extraction/function-ref.ts b/src/extraction/function-ref.ts
new file mode 100644
index 000000000..dd4f12c03
--- /dev/null
+++ b/src/extraction/function-ref.ts
@@ -0,0 +1,644 @@
+/**
+ * Function-as-value capture (#756) — registration-linking for callbacks.
+ *
+ * A function name used as a VALUE — passed as a call argument
+ * (`register_handler(target_cb)`, `signal(SIGINT, handler)`), assigned to a
+ * field or function pointer (`o->cb = target_cb`, `OnFire := TargetCb`),
+ * placed in a struct/object initializer (`{ .recv_cb = my_cb }`,
+ * `{ recv: targetCb }`, `Ops{Cb: targetCb}`), or listed in a function table
+ * (`static cb_t table[] = { cb_a, cb_b }`) — is a real dependency that static
+ * call extraction misses entirely: `callers(target_cb)` showed nothing but
+ * direct calls, so every callback looked dead and its registration sites were
+ * invisible to impact analysis.
+ *
+ * This module captures those value positions during the AST walk as
+ * `function_ref` candidates. Capture is table-driven per language (the value
+ * positions and wrapper forms differ per grammar — `&fn` in C, `Main::fn` in
+ * Java, `::fn` in Kotlin, `#selector(fn)` in Swift, `@TargetCb` in Pascal,
+ * `method(:fn)` in Ruby). Candidates are GATED at end-of-file extraction
+ * (see `TreeSitterExtractor.flushFnRefCandidates`): only names matching a
+ * same-file function/method or an imported binding survive, which bounds
+ * volume and keeps precision high. Resolution then matches survivors against
+ * function/method nodes ONLY (`matchFunctionRef` in
+ * `src/resolution/name-matcher.ts`) and persists them as `references` edges,
+ * which `callers`/`impact` already traverse.
+ *
+ * Deliberately NOT covered (resolving the *dispatch* — `o->cb(x)` → the
+ * registered function — needs data-flow through struct fields; a wrong edge
+ * is worse than none): indirect-call resolution, PHP string callables,
+ * Ruby bare symbols outside `method(:sym)`, and `obj.method` member values
+ * where `obj` isn't `this`/`self`.
+ */
+
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+import { getNodeText, getChildByField } from './tree-sitter-helpers';
+
+export interface FnRefCandidate {
+  name: string;
+  line: number;
+  column: number;
+  /** Which capture position produced this candidate (gate policy keys on it). */
+  mode: CaptureMode;
+  /**
+   * True when the value was an explicit reference form (`&fn`, `&Cls::m`,
+   * `::fn`, `#selector`, `method(:sym)`) rather than a bare identifier —
+   * C++'s flush policy keys on it.
+   */
+  explicitRef: boolean;
+}
+
+/** How to pull candidate value nodes out of a dispatched container node. */
+type CaptureMode =
+  | 'args' // every named child is a potential value (call argument lists)
+  | 'rhs' // the assignment right-hand side (named field, else last named child)
+  | 'value' // the `value` field of a keyed pair (object/struct/table initializers)
+  | 'list' // every named child (array / initializer-list / table positional elements)
+  | 'varinit'; // a variable declarator's initializer value
+
+interface CaptureRule {
+  mode: CaptureMode;
+  /** Field holding the value for rhs/value/varinit (defaults per mode). */
+  field?: string;
+}
+
+export interface FnRefSpec {
+  /** Bare identifier node types that can act as a function value. */
+  idTypes: Set<string>;
+  /** Container node type → how to extract candidate values from it. */
+  dispatch: Map<string, CaptureRule>;
+  /**
+   * Transparent wrapper layers between a container and its values
+   * (`argument`, `value_argument`, `literal_element`, `expression_list`…).
+   * Value: the field to descend into, or null for "named children".
+   * `expression_list` fans out to ALL named children (Go multi-assign).
+   */
+  layers?: Map<string, string | null>;
+  /**
+   * Unary wrappers whose operand is the function value — C/C++ `&fn`
+   * (pointer_expression), Pascal `@Fn` (exprUnary), Scala eta `fn _`
+   * (postfix_expression). Value: operand field, or null for first named child.
+   */
+  unwrap?: Map<string, string | null>;
+  /**
+   * Whole-node reference forms needing bespoke name extraction —
+   * `method_reference` (Java), `callable_reference` / `navigation_expression`
+   * (Kotlin), `selector_expression` (Swift `#selector` / ObjC `@selector`),
+   * Ruby `method(:sym)` calls, and `this.method` member forms.
+   */
+  special?: Set<string>;
+  /**
+   * Capture modes whose candidates skip the same-file/import gate and rely on
+   * resolution's unique-or-drop rule instead. C-family only: an initializer
+   * value, function-pointer assignment RHS, or table element is a
+   * function-pointer position by construction, and C has no symbol imports —
+   * the dominant repo-scale pattern (`server.c`'s command table naming
+   * handlers defined across files) would otherwise be invisible. Call
+   * arguments stay gated everywhere (locals passed as args dwarf callbacks).
+   */
+  ungatedModes?: Set<CaptureMode>;
+  /**
+   * C++ only: in args/rhs/varinit positions, accept ONLY explicit reference
+   * forms (`&fn`, `&Cls::method`) — never bare identifiers. C++ codebases are
+   * dense with generic free-function/accessor names (`begin`, `end`, `out`,
+   * `size`, `data`) that collide with parameters and locals, and out-of-line
+   * member definitions extract as function-kind nodes — bare-id matching on
+   * fmt was mostly wrong edges. File-scope initializer tables (value/list)
+   * still accept bare identifiers, same as C.
+   */
+  addressOfOnly?: boolean;
+}
+
+/** Names that are never function references even when grammars call them identifiers. */
+const NAME_STOPLIST = new Set([
+  'this',
+  'self',
+  'super',
+  'null',
+  'nil',
+  'true',
+  'false',
+  'undefined',
+  'new',
+  'NULL',
+  'nullptr',
+  'None',
+]);
+
+// ---------------------------------------------------------------------------
+// Per-language specs. Node types verified against each grammar (probe fixtures
+// in the #756 investigation; see docs/design/function-ref-capture.md).
+// ---------------------------------------------------------------------------
+
+/** C / C++ / Objective-C share the C-family initializer & assignment shapes. */
+function cFamilySpec(extra?: { special?: string[]; addressOfOnly?: boolean }): FnRefSpec {
+  return {
+    idTypes: new Set(['identifier']),
+    dispatch: new Map<string, CaptureRule>([
+      ['argument_list', { mode: 'args' }],
+      ['assignment_expression', { mode: 'rhs', field: 'right' }],
+      ['init_declarator', { mode: 'varinit', field: 'value' }],
+      ['initializer_list', { mode: 'list' }],
+      ['initializer_pair', { mode: 'value', field: 'value' }],
+    ]),
+    unwrap: new Map([['pointer_expression', 'argument']]),
+    special: new Set(extra?.special ?? []),
+    // C has no symbol imports, and callbacks are registered cross-file at repo
+    // scale (redis: server.c's command table names handlers from t_*.c) — so
+    // initializer positions bypass the gate and lean on resolution's
+    // unique-or-drop rule. ONLY 'value'/'list' (struct/array initializers),
+    // and the flush additionally requires FILE scope: a C file-scope
+    // initializer is a constant-expression context, so a bare identifier
+    // there can only be a function address (or enum/macro, which the
+    // function-kind filter drops) — never a variable. 'rhs'/'varinit' were
+    // tried and produced false edges (`prev = next`, `*str = field` — data
+    // assignments matching a unique same-named function elsewhere), so
+    // assignments stay gated to same-file/import.
+    ungatedModes: new Set<CaptureMode>(['value', 'list']),
+    addressOfOnly: extra?.addressOfOnly,
+  };
+}
+
+// NOTE: deliberately NO `member_expression` (`this.handleClick`) capture for
+// TS/JS. Class fields with type annotations are extracted as method-kind
+// nodes (pre-existing extractor behavior), so `this.X` value positions —
+// which in real code are mostly DATA reads (`setCursor(this.canvas)`) —
+// resolved to those field nodes and produced wrong "registration" edges
+// (excalidraw A/B finding). Revisit if/when TS field classification is fixed.
+const TS_JS_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['arguments', { mode: 'args' }],
+    ['assignment_expression', { mode: 'rhs', field: 'right' }],
+    ['variable_declarator', { mode: 'varinit', field: 'value' }],
+    ['pair', { mode: 'value', field: 'value' }],
+    ['array', { mode: 'list' }],
+  ]),
+};
+
+const PYTHON_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['argument_list', { mode: 'args' }],
+    ['assignment', { mode: 'rhs', field: 'right' }],
+    ['keyword_argument', { mode: 'value', field: 'value' }], // Thread(target=worker)
+    ['pair', { mode: 'value', field: 'value' }],
+    ['list', { mode: 'list' }],
+  ]),
+  special: new Set(['attribute']),
+};
+
+const GO_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['argument_list', { mode: 'args' }],
+    ['assignment_statement', { mode: 'rhs', field: 'right' }],
+    ['short_var_declaration', { mode: 'rhs', field: 'right' }],
+    ['var_spec', { mode: 'varinit', field: 'value' }],
+    ['keyed_element', { mode: 'value' }], // value = last literal_element child
+    ['literal_value', { mode: 'list' }], // positional composite literals
+  ]),
+  layers: new Map<string, string | null>([
+    ['literal_element', null],
+    ['expression_list', null],
+  ]),
+};
+
+const RUST_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['arguments', { mode: 'args' }],
+    ['assignment_expression', { mode: 'rhs', field: 'right' }],
+    ['field_initializer', { mode: 'value', field: 'value' }],
+    ['array_expression', { mode: 'list' }],
+    ['static_item', { mode: 'varinit', field: 'value' }],
+    ['let_declaration', { mode: 'varinit', field: 'value' }],
+  ]),
+};
+
+const JAVA_SPEC: FnRefSpec = {
+  // No bare-identifier function values in Java — only method references.
+  idTypes: new Set<string>(),
+  dispatch: new Map<string, CaptureRule>([
+    ['argument_list', { mode: 'args' }],
+    ['assignment_expression', { mode: 'rhs', field: 'right' }],
+    ['variable_declarator', { mode: 'varinit', field: 'value' }],
+  ]),
+  special: new Set(['method_reference']),
+};
+
+const KOTLIN_SPEC: FnRefSpec = {
+  idTypes: new Set<string>(),
+  dispatch: new Map<string, CaptureRule>([
+    ['value_arguments', { mode: 'args' }],
+    ['assignment', { mode: 'rhs' }], // RHS = last named child (no field in grammar)
+  ]),
+  layers: new Map<string, string | null>([['value_argument', null]]),
+  special: new Set(['callable_reference', 'navigation_expression']),
+};
+
+const CSHARP_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['argument_list', { mode: 'args' }],
+    ['assignment_expression', { mode: 'rhs', field: 'right' }], // covers `+=` event subscription
+    ['initializer_expression', { mode: 'list' }],
+    ['variable_declarator', { mode: 'varinit' }],
+  ]),
+  layers: new Map<string, string | null>([['argument', null]]),
+  special: new Set(['member_access_expression']),
+};
+
+const RUBY_SPEC: FnRefSpec = {
+  // Bare identifiers in Ruby args are method CALLS or locals, never function
+  // values — only the `method(:name)` idiom (and `&method(:name)`) qualifies.
+  idTypes: new Set<string>(),
+  dispatch: new Map<string, CaptureRule>([
+    ['argument_list', { mode: 'args' }],
+    ['pair', { mode: 'value', field: 'value' }],
+  ]),
+  layers: new Map<string, string | null>([['block_argument', null]]),
+  special: new Set(['call']),
+};
+
+const SWIFT_SPEC: FnRefSpec = {
+  idTypes: new Set(['simple_identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['value_arguments', { mode: 'args' }],
+    ['assignment', { mode: 'rhs', field: 'result' }],
+    ['array_literal', { mode: 'list' }],
+    ['property_declaration', { mode: 'varinit', field: 'value' }],
+  ]),
+  layers: new Map<string, string | null>([['value_argument', 'value']]),
+  special: new Set(['selector_expression']),
+};
+
+const SCALA_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['arguments', { mode: 'args' }],
+    ['assignment_expression', { mode: 'rhs', field: 'right' }],
+    ['val_definition', { mode: 'varinit', field: 'value' }],
+  ]),
+  unwrap: new Map<string, string | null>([['postfix_expression', null]]), // eta-expansion `fn _`
+};
+
+const DART_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['arguments', { mode: 'args' }],
+    ['assignment_expression', { mode: 'rhs', field: 'right' }],
+    ['pair', { mode: 'value', field: 'value' }],
+    ['list_literal', { mode: 'list' }],
+    ['static_final_declaration', { mode: 'varinit' }],
+  ]),
+  layers: new Map<string, string | null>([['argument', null]]),
+};
+
+const LUA_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['arguments', { mode: 'args' }],
+    ['assignment_statement', { mode: 'rhs' }], // RHS expression_list children carry `value` fields
+    ['field', { mode: 'value', field: 'value' }], // table fields, keyed AND positional
+  ]),
+  layers: new Map<string, string | null>([['expression_list', null]]),
+};
+
+const PASCAL_SPEC: FnRefSpec = {
+  idTypes: new Set(['identifier']),
+  dispatch: new Map<string, CaptureRule>([
+    ['exprArgs', { mode: 'args' }],
+    ['assignment', { mode: 'rhs', field: 'rhs' }], // OnClick := Handler
+  ]),
+  unwrap: new Map<string, string | null>([['exprUnary', 'operand']]), // @Handler
+};
+
+/**
+ * Capture specs by language. PHP is deliberately absent: its first-class
+ * callable `fn(...)` already extracts as a `calls` edge, and string callables
+ * (`'fn_name'`) are a precision risk left for a follow-up.
+ */
+export const FN_REF_SPECS: Record<string, FnRefSpec | undefined> = {
+  c: cFamilySpec(),
+  cpp: cFamilySpec({ addressOfOnly: true }),
+  objc: cFamilySpec({ special: ['selector_expression'] }),
+  typescript: TS_JS_SPEC,
+  tsx: TS_JS_SPEC,
+  javascript: TS_JS_SPEC,
+  jsx: TS_JS_SPEC,
+  python: PYTHON_SPEC,
+  go: GO_SPEC,
+  rust: RUST_SPEC,
+  java: JAVA_SPEC,
+  kotlin: KOTLIN_SPEC,
+  csharp: CSHARP_SPEC,
+  ruby: RUBY_SPEC,
+  swift: SWIFT_SPEC,
+  scala: SCALA_SPEC,
+  dart: DART_SPEC,
+  lua: LUA_SPEC,
+  luau: LUA_SPEC,
+  pascal: PASCAL_SPEC,
+};
+
+// ---------------------------------------------------------------------------
+// Capture
+// ---------------------------------------------------------------------------
+
+/**
+ * Extract candidate names from a dispatched container node. Returns the
+ * (name, position) pairs of every function-value-shaped expression found.
+ */
+export function captureFnRefCandidates(
+  container: SyntaxNode,
+  rule: CaptureRule,
+  spec: FnRefSpec,
+  source: string
+): FnRefCandidate[] {
+  const valueNodes: SyntaxNode[] = [];
+
+  switch (rule.mode) {
+    case 'args':
+    case 'list': {
+      for (let i = 0; i < container.namedChildCount; i++) {
+        const child = container.namedChild(i);
+        if (child) valueNodes.push(child);
+      }
+      break;
+    }
+    case 'rhs': {
+      const rhs = rule.field
+        ? getChildByField(container, rule.field)
+        : container.namedChild(container.namedChildCount - 1);
+      if (rhs) {
+        // Param-storage skip: `this.status = status` / `o->cb = cb` — when
+        // the assigned member's name EQUALS the RHS identifier, the RHS is a
+        // local/parameter being stored, and the function it holds (if any)
+        // is unknowable statically. A same-named function elsewhere would
+        // resolve to the WRONG target (excalidraw A/B finding), so skip.
+        const lhs =
+          getChildByField(container, 'left') ??
+          getChildByField(container, 'lhs') ??
+          getChildByField(container, 'target') ??
+          (container.namedChildCount >= 2 ? container.namedChild(0) : null);
+        const lhsText = lhs ? getNodeText(lhs, source) : '';
+        const lhsLastName = lhsText.match(/([A-Za-z_$][A-Za-z0-9_$]*)\s*$/)?.[1];
+        const rhsText = getNodeText(rhs, source).trim();
+        if (lhsLastName && lhsLastName === rhsText) break;
+        valueNodes.push(rhs);
+      }
+      break;
+    }
+    case 'value': {
+      let value = rule.field ? getChildByField(container, rule.field) : null;
+      // Keyed containers without a value field (Go keyed_element): the value
+      // is the LAST named child (the first is the key).
+      if (!value && container.namedChildCount > 0) {
+        value = container.namedChild(container.namedChildCount - 1);
+      }
+      if (value) valueNodes.push(value);
+      break;
+    }
+    case 'varinit': {
+      // Destructuring (`const { center } = ellipse`) extracts DATA from the
+      // RHS — never a function alias. Without this skip, a parameter that
+      // shadows a same-named imported function produced a wrong edge.
+      const nameNode =
+        getChildByField(container, 'name') ?? getChildByField(container, 'pattern');
+      if (nameNode && (nameNode.type === 'object_pattern' || nameNode.type === 'array_pattern' ||
+                       nameNode.type === 'tuple_pattern' || nameNode.type === 'struct_pattern')) {
+        break;
+      }
+      if (rule.field) {
+        const value = getChildByField(container, rule.field);
+        if (value) valueNodes.push(value);
+      } else {
+        // No value field in this grammar (C# variable_declarator, Dart
+        // static_final_declaration): the initializer is the last named child —
+        // but a declarator WITHOUT an initializer has its NAME there instead.
+        // Require ≥2 named children and never pick the name/pattern child.
+        const value = container.namedChild(container.namedChildCount - 1);
+        const nameChild =
+          getChildByField(container, 'name') ?? getChildByField(container, 'pattern');
+        if (
+          value &&
+          container.namedChildCount >= 2 &&
+          (!nameChild || value.id !== nameChild.id)
+        ) {
+          valueNodes.push(value);
+        }
+      }
+      break;
+    }
+  }
+
+  const out: FnRefCandidate[] = [];
+  for (const v of valueNodes) {
+    // A bare identifier is one that normalizes without passing through an
+    // unwrap/special reference form. C++'s addressOfOnly policy (applied at
+    // flush, where file scope is known) drops bare ids outside file-scope
+    // initializer tables.
+    const explicitRef = !spec.idTypes.has(v.type);
+    for (const { name, node } of normalizeValue(v, spec, source, 0)) {
+      if (!name || NAME_STOPLIST.has(name)) continue;
+      out.push({
+        name,
+        line: node.startPosition.row + 1,
+        column: node.startPosition.column,
+        mode: rule.mode,
+        explicitRef,
+      });
+    }
+  }
+  return out;
+}
+
+/**
+ * Normalize one value expression to zero or more function names. Recursion is
+ * bounded (wrapper layers only); anything that isn't a recognized
+ * function-value shape yields [].
+ */
+function normalizeValue(
+  node: SyntaxNode,
+  spec: FnRefSpec,
+  source: string,
+  depth: number
+): Array<{ name: string; node: SyntaxNode }> {
+  if (depth > 4) return [];
+  const type = node.type;
+
+  // Bare identifier
+  if (spec.idTypes.has(type)) {
+    return [{ name: getNodeText(node, source), node }];
+  }
+
+  // Transparent layers (argument, value_argument, literal_element,
+  // expression_list, block_argument). expression_list fans out (Go `a, b = f, g`).
+  const layerField = spec.layers?.get(type);
+  if (spec.layers?.has(type)) {
+    // Labeled-argument param-forward skip (Swift/Kotlin): `value: value` /
+    // `delay: delay` — when the label EQUALS the value identifier, the value
+    // is a forwarded local/parameter, not a function reference (Alamofire
+    // A/B finding; same rationale as the `this.x = x` assignment skip).
+    if (type === 'value_argument') {
+      const label = getChildByField(node, 'name');
+      const value = getChildByField(node, 'value') ?? node.namedChild(node.namedChildCount - 1);
+      if (
+        label &&
+        value &&
+        getNodeText(label, source).trim() === getNodeText(value, source).trim()
+      ) {
+        return [];
+      }
+    }
+    if (layerField) {
+      const inner = getChildByField(node, layerField);
+      return inner ? normalizeValue(inner, spec, source, depth + 1) : [];
+    }
+    const results: Array<{ name: string; node: SyntaxNode }> = [];
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (child) results.push(...normalizeValue(child, spec, source, depth + 1));
+    }
+    return results;
+  }
+
+  // Unary wrappers: &fn / @Fn / `fn _`
+  const unwrapField = spec.unwrap?.get(type);
+  if (spec.unwrap?.has(type)) {
+    // C-family `pointer_expression` covers BOTH `&x` (address-of — a function
+    // value) and `*x` (dereference — a data read, never a function value).
+    // Only `&` qualifies; without this, fmt's `*begin` reads resolved to its
+    // free `begin()` functions.
+    if (type === 'pointer_expression' && node.child(0)?.type !== '&') return [];
+    const inner = unwrapField ? getChildByField(node, unwrapField) : node.namedChild(0);
+    if (!inner) return [];
+    // C++ `&Widget::on_click` — keep the QUALIFIED name. Resolution scopes the
+    // method to that class (more precise than a bare-name match, and exempt
+    // from the cpp bare-ids-are-free-functions rule since `&Cls::m` is an
+    // explicit member-pointer).
+    if (inner.type === 'qualified_identifier') {
+      const text = getNodeText(inner, source).trim();
+      return /^[A-Za-z_][\w:]*$/.test(text) ? [{ name: text, node: inner }] : [];
+    }
+    return normalizeValue(inner, spec, source, depth + 1);
+  }
+
+  // Special whole-node reference forms
+  if (spec.special?.has(type)) {
+    return normalizeSpecial(node, type, source);
+  }
+
+  return [];
+}
+
+/** Rightmost descendant-or-self named child of one of the given types. */
+function lastNamedOfType(node: SyntaxNode, types: Set<string>): SyntaxNode | null {
+  let found: SyntaxNode | null = null;
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (!child) continue;
+    if (types.has(child.type)) found = child;
+    const deeper = lastNamedOfType(child, types);
+    if (deeper) found = deeper;
+  }
+  return found;
+}
+
+function normalizeSpecial(
+  node: SyntaxNode,
+  type: string,
+  source: string
+): Array<{ name: string; node: SyntaxNode }> {
+  switch (type) {
+    // Java `Main::targetCb` / `this::run0` — last identifier child is the method.
+    case 'method_reference': {
+      let last: SyntaxNode | null = null;
+      for (let i = 0; i < node.namedChildCount; i++) {
+        const child = node.namedChild(i);
+        if (child && child.type === 'identifier') last = child;
+      }
+      return last ? [{ name: getNodeText(last, source), node: last }] : [];
+    }
+
+    // Kotlin `::targetCb` — the simple_identifier child.
+    case 'callable_reference': {
+      for (let i = 0; i < node.namedChildCount; i++) {
+        const child = node.namedChild(i);
+        if (child && child.type === 'simple_identifier') {
+          return [{ name: getNodeText(child, source), node: child }];
+        }
+      }
+      return [];
+    }
+
+    // Kotlin `this::fire` parses as navigation_expression with a `::fire`
+    // navigation_suffix. Ordinary `a.b` navigation MUST yield nothing.
+    case 'navigation_expression': {
+      for (let i = 0; i < node.namedChildCount; i++) {
+        const child = node.namedChild(i);
+        if (child && child.type === 'navigation_suffix' && getNodeText(child, source).startsWith('::')) {
+          const id = child.namedChild(child.namedChildCount - 1);
+          if (id) return [{ name: getNodeText(id, source), node: id }];
+        }
+      }
+      return [];
+    }
+
+    // Swift `#selector(Holder.fire)` → fire. ObjC `@selector(storeImage:)` →
+    // `storeImage:` verbatim (ObjC method nodes keep their selector colons).
+    case 'selector_expression': {
+      const inner = node.namedChild(0);
+      if (!inner) return [];
+      if (inner.type === 'identifier' || inner.type === 'simple_identifier') {
+        return [{ name: getNodeText(inner, source), node: inner }];
+      }
+      // Swift dotted form: rightmost simple_identifier. ObjC keyword selector:
+      // text as-is.
+      const last = lastNamedOfType(node, new Set(['simple_identifier']));
+      if (last) return [{ name: getNodeText(last, source), node: last }];
+      return [{ name: getNodeText(inner, source).trim(), node: inner }];
+    }
+
+    // Ruby `method(:target_cb)` — a `call` whose method is literally `method`
+    // with a single symbol argument.
+    case 'call': {
+      const method = getChildByField(node, 'method');
+      if (!method || getNodeText(method, source) !== 'method') return [];
+      const args = getChildByField(node, 'arguments');
+      if (!args || args.namedChildCount !== 1) return [];
+      const sym = args.namedChild(0);
+      if (!sym || sym.type !== 'simple_symbol') return [];
+      const name = getNodeText(sym, source).replace(/^:/, '');
+      return name ? [{ name, node: sym }] : [];
+    }
+
+    // `self.handle_click` (Python) — object must be EXACTLY `self`.
+    case 'attribute': {
+      const obj = getChildByField(node, 'object');
+      const attr = getChildByField(node, 'attribute');
+      if (obj && attr && obj.type === 'identifier' && getNodeText(obj, source) === 'self') {
+        return [{ name: getNodeText(attr, source), node: attr }];
+      }
+      return [];
+    }
+
+    // `this.Run0` (C#) — receiver must be EXACTLY `this`. Two grammar shapes:
+    // newer tree-sitter-c-sharp exposes an `expression` field holding a
+    // `this_expression`; the vendored grammar keeps `this` as an anonymous
+    // token (only the `name` field is a named child), so fall back to the
+    // node text.
+    case 'member_access_expression': {
+      const name = getChildByField(node, 'name');
+      if (!name) return [];
+      const expr = getChildByField(node, 'expression');
+      const isThisReceiver = expr
+        ? expr.type === 'this_expression' || expr.type === 'this'
+        : getNodeText(node, source).startsWith('this.');
+      return isThisReceiver ? [{ name: getNodeText(name, source), node: name }] : [];
+    }
+
+    default:
+      return [];
+  }
+}
diff --git a/src/extraction/generated-detection.ts b/src/extraction/generated-detection.ts
index bde190725..1ec6a6d82 100644
--- a/src/extraction/generated-detection.ts
+++ b/src/extraction/generated-detection.ts
@@ -41,6 +41,9 @@ const GENERATED_PATTERNS: ReadonlyArray<RegExp> = [
   /\.pb\.[jt]s$/,
   /_pb\.[jt]s$/,
   /_grpc_pb\.[jt]s$/,
+  // Minified bundles vendored into a repo (docs sites, examples). Their
+  // single-letter symbols make name-based edges pure noise.
+  /\.min\.m?js$/,
   // Python — protobuf / gRPC / openapi-codegen
   /_pb2(_grpc)?\.py$/,
   /_pb2\.pyi$/,
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 8eb04c6e8..3295df4e8 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -17,6 +17,8 @@ import {
 } from '../types';
 import { getParser, detectLanguage, isLanguageSupported, isFileLevelOnlyLanguage } from './grammars';
 import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers';
+import { FN_REF_SPECS, captureFnRefCandidates, type FnRefSpec, type FnRefCandidate } from './function-ref';
+import { isGeneratedFile } from './generated-detection';
 import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types';
 import { EXTRACTORS } from './languages';
 import { LiquidExtractor } from './liquid-extractor';
@@ -222,12 +224,18 @@ export class TreeSitterExtractor {
   private extractor: LanguageExtractor | null = null;
   private nodeStack: string[] = []; // Stack of parent node IDs
   private methodIndex: Map<string, string> | null = null; // lookup key → node ID for Pascal defProc lookup
+  // Function-as-value capture (#756): per-language spec + candidates collected
+  // during the walk, gated & flushed into unresolvedReferences at end-of-file
+  // (see flushFnRefCandidates).
+  private fnRefSpec: FnRefSpec | undefined;
+  private fnRefCandidates: Array<FnRefCandidate & { fromNodeId: string }> = [];
 
   constructor(filePath: string, source: string, language?: Language) {
     this.filePath = filePath;
     this.source = source;
     this.language = language || detectLanguage(filePath, source);
     this.extractor = EXTRACTORS[this.language] || null;
+    this.fnRefSpec = FN_REF_SPECS[this.language];
   }
 
   /**
@@ -314,6 +322,10 @@ export class TreeSitterExtractor {
 
       this.visitNode(this.tree.rootNode);
 
+      // Gate + flush function-as-value candidates (#756) while the file's
+      // nodes and import refs are complete and the file node is still pushed.
+      this.flushFnRefCandidates();
+
       if (packageNodeId) this.nodeStack.pop();
       this.nodeStack.pop();
     } catch (error) {
@@ -352,6 +364,136 @@ export class TreeSitterExtractor {
     };
   }
 
+  /**
+   * Function-as-value capture (#756): if this node is one of the language's
+   * value-position containers (call arguments, assignment RHS, struct/object
+   * initializer, array/table literal), collect candidate function names from
+   * it. Candidates are gated & flushed at end-of-file (flushFnRefCandidates).
+   */
+  private maybeCaptureFnRefs(node: SyntaxNode, nodeType: string): void {
+    const spec = this.fnRefSpec;
+    if (!spec) return;
+    const rule = spec.dispatch.get(nodeType);
+    if (!rule || this.nodeStack.length === 0) return;
+    const fromNodeId = this.nodeStack[this.nodeStack.length - 1];
+    if (!fromNodeId) return;
+    for (const cand of captureFnRefCandidates(node, rule, spec, this.source)) {
+      this.fnRefCandidates.push({ ...cand, fromNodeId });
+    }
+  }
+
+  /**
+   * Candidates-only scan of a subtree the main walkers won't traverse
+   * (top-level variable initializers). No extraction side effects. Halts at
+   * nested function definitions: their bodies are walked — and their
+   * candidates attributed — by extractFunction's own body walk.
+   */
+  private scanFnRefSubtree(node: SyntaxNode, depth: number): void {
+    if (!this.fnRefSpec || depth > 12) return;
+    const nodeType = node.type;
+    if (depth > 0 && (
+      this.extractor?.functionTypes.includes(nodeType) ||
+      nodeType === 'arrow_function' ||
+      nodeType === 'function_expression' ||
+      nodeType === 'lambda_literal' ||
+      nodeType === 'lambda_expression'
+    )) {
+      return;
+    }
+    this.maybeCaptureFnRefs(node, nodeType);
+    for (let i = 0; i < node.namedChildCount; i++) {
+      const child = node.namedChild(i);
+      if (child) this.scanFnRefSubtree(child, depth + 1);
+    }
+  }
+
+  /**
+   * Gate captured function-as-value candidates and push survivors as
+   * `function_ref` unresolved references.
+   *
+   * The gate bounds volume and protects precision: a candidate survives only
+   * if its name matches a function/method DEFINED IN THIS FILE or a name this
+   * file imports/references. Everything else (locals, params, fields passed
+   * as arguments) is dropped before it ever reaches the database. Resolution
+   * then matches survivors against function/method nodes only
+   * (matchFunctionRef) and emits `references` edges — which callers/impact
+   * already traverse.
+   *
+   * Known v1 limit, deliberate: a C/C++ callback registered in a DIFFERENT
+   * translation unit than its definition (extern, no symbol imports to match)
+   * is not captured. Same-file registration — the dominant C pattern (static
+   * callback + same-file ops struct) — is.
+   */
+  private flushFnRefCandidates(): void {
+    if (this.fnRefCandidates.length === 0) return;
+    const candidates = this.fnRefCandidates;
+    this.fnRefCandidates = [];
+
+    // Generated/minified files (vendored jquery.min.js and friends): their
+    // function-as-value edges are noise — single-letter minified symbols
+    // resolve everywhere. Same policy as the callback synthesizer.
+    if (isGeneratedFile(this.filePath)) return;
+
+    const definedHere = new Set<string>();
+    for (const n of this.nodes) {
+      if (n.kind === 'function' || n.kind === 'method') definedHere.add(n.name);
+    }
+
+    // Import-binding names only (all binding emitters push kind 'imports').
+    // Deliberately NOT 'references': those carry type-annotation and
+    // interface-member names, which let local variables that share a type
+    // member's name slip through the gate (excalidraw A/B finding).
+    const SIMPLE_NAME = /^[A-Za-z_$][A-Za-z0-9_$]*$/;
+    const importedNames = new Set<string>();
+    for (const r of this.unresolvedReferences) {
+      if (r.referenceKind === 'imports' && SIMPLE_NAME.test(r.referenceName)) {
+        importedNames.add(r.referenceName);
+      }
+    }
+
+    const ungated = this.fnRefSpec?.ungatedModes;
+    const addressOfOnly = this.fnRefSpec?.addressOfOnly === true;
+    const seen = new Set<string>();
+    for (const c of candidates) {
+      const atFileScope = c.fromNodeId.startsWith('file:');
+      // C++ (addressOfOnly): a BARE identifier qualifies only inside a
+      // file-scope initializer table. Everywhere else — args, assignments,
+      // local braced-init lists like `{begin, size}` — only explicit `&`
+      // forms count (fmt A/B finding: generic names `begin`/`out`/`size`
+      // collide with locals and members).
+      if (
+        addressOfOnly &&
+        !c.explicitRef &&
+        !(atFileScope && (c.mode === 'value' || c.mode === 'list'))
+      ) {
+        continue;
+      }
+      // C-family file-scope initializers skip the gate (constant-expression
+      // context — a bare identifier there is a function address, never a
+      // variable; see FnRefSpec.ungatedModes). Local initializers and
+      // everything else require a same-file/import match.
+      const skipGate = ungated?.has(c.mode) === true && atFileScope;
+      // Qualified C++ member-pointers (`Widget::on_click`) gate on the member
+      // name; everything else on the full name.
+      const gateName = c.name.includes('::')
+        ? c.name.slice(c.name.lastIndexOf('::') + 2)
+        : c.name;
+      if (!skipGate && !definedHere.has(gateName) && !importedNames.has(gateName)) {
+        continue;
+      }
+      const key = `${c.fromNodeId}|${c.name}`;
+      if (seen.has(key)) continue;
+      seen.add(key);
+      this.unresolvedReferences.push({
+        fromNodeId: c.fromNodeId,
+        referenceName: c.name,
+        referenceKind: 'function_ref',
+        line: c.line,
+        column: c.column,
+      });
+    }
+  }
+
   /**
    * Visit a node and extract information
    */
@@ -365,7 +507,14 @@ export class TreeSitterExtractor {
     if (this.extractor.visitNode) {
       const ctx = this.makeExtractorContext();
       const handled = this.extractor.visitNode(node, ctx);
-      if (handled) return;
+      if (handled) {
+        // The hook consumed this subtree, so the walkers below never descend
+        // into it — scan it for function-as-value candidates (#756). Scala's
+        // hook handles val/var definitions (`val table = Seq(targetCb)`), for
+        // example. The scan is capture-only and halts at nested functions.
+        this.scanFnRefSubtree(node, 0);
+        return;
+      }
     }
 
     // Pascal-specific AST handling
@@ -374,6 +523,11 @@ export class TreeSitterExtractor {
       if (skipChildren) return;
     }
 
+    // Function-as-value capture (#756) — independent of the dispatch ladder
+    // below (the captured container types have no other handler there), so it
+    // can never shadow or be shadowed by an extraction branch.
+    this.maybeCaptureFnRefs(node, nodeType);
+
     // Check for function declarations
     // For Python/Ruby, function_definition inside a class should be treated as method
     if (this.extractor.functionTypes.includes(nodeType)) {
@@ -437,17 +591,33 @@ export class TreeSitterExtractor {
     // Check for class properties (e.g. C# property_declaration)
     else if (this.extractor.propertyTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
       this.extractProperty(node);
+      // Property initializers aren't walked — scan for function-as-value
+      // candidates (#756): Scala `val table = Seq(targetCb)` in an object,
+      // Kotlin `val cb = ::handler` class properties.
+      this.scanFnRefSubtree(node, 0);
       skipChildren = true;
     }
     // Check for class fields (e.g. Java field_declaration, C# field_declaration)
     else if (this.extractor.fieldTypes?.includes(nodeType) && this.isInsideClassLikeNode()) {
       this.extractField(node);
+      // Field initializers aren't walked — scan for function-as-value
+      // candidates (#756): Java `List<IntConsumer> table = List.of(Main::cb)`,
+      // C# `List<Action<int>> table = new() { TargetCb }`.
+      this.scanFnRefSubtree(node, 0);
       skipChildren = true;
     }
     // Check for variable declarations (const, let, var, etc.)
     // Only extract top-level variables (not inside functions/methods)
     else if (this.extractor.variableTypes.includes(nodeType) && !this.isInsideClassLikeNode()) {
       this.extractVariable(node);
+      // extractVariable doesn't walk every initializer shape (object literals
+      // are deliberately skipped; Python/Ruby don't walk at all), so scan the
+      // declaration subtree for function-as-value candidates — `const routes =
+      // { home: renderHome }`, `handlers = {"recv": target_cb}`. The scan halts
+      // at nested function definitions (their bodies are walked — and
+      // attributed — separately) and flush-time dedup absorbs any overlap with
+      // initializers extractVariable DOES walk.
+      this.scanFnRefSubtree(node, 0);
       skipChildren = true; // extractVariable handles children
     }
     // Swift stored properties inside a type. Swift instance properties aren't
@@ -3086,6 +3256,10 @@ export class TreeSitterExtractor {
     const visitForCallsAndStructure = (node: SyntaxNode): void => {
       const nodeType = node.type;
 
+      // Function-as-value capture (#756) — function bodies are walked here,
+      // not in visitNode, so the capture hook must fire in both walkers.
+      this.maybeCaptureFnRefs(node, nodeType);
+
       // Rocket route-registration macros (`routes![…]` / `catchers![…]`): the
       // handler paths live in a raw token tree the call walker can't see.
       if (nodeType === 'macro_invocation') this.extractRustRouteMacro(node);
@@ -4461,8 +4635,16 @@ export class TreeSitterExtractor {
     for (let i = 0; i < node.namedChildCount; i++) {
       const child = node.namedChild(i);
       if (!child) continue;
+      // Function-as-value capture (#756): Pascal bodies are walked here, not
+      // in visitNode/visitForCallsAndStructure, so the capture hook fires here
+      // — assignment RHS is the Delphi event-wiring idiom (`OnFire := Handler`).
+      this.maybeCaptureFnRefs(child, child.type);
       if (child.type === 'exprCall') {
         this.extractPascalCall(child);
+        // The walker doesn't descend into a call's arguments — dispatch the
+        // argument container directly (`RegisterHandler(TargetCb)` / `(@Cb)`).
+        const args = child.namedChildren.find((c: SyntaxNode) => c.type === 'exprArgs');
+        if (args) this.maybeCaptureFnRefs(args, 'exprArgs');
       } else if (child.type === 'exprDot') {
         // A STATEMENT-level bare exprDot is a paren-less call (`Obj.Free;`,
         // `TFoo.GetInstance.DoIt;`). Anywhere else (assignment side, condition,
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
index b246899c7..cfa730eaf 100644
--- a/src/mcp/server-instructions.ts
+++ b/src/mcp/server-instructions.ts
@@ -47,7 +47,7 @@ typically one to a few calls; a grep/read exploration is dozens.
 - **Almost any question — "how does X work", architecture, a bug, "what/where is X", or surveying an area** → \`codegraph_explore\` (PRIMARY — call FIRST; ONE capped call returns the verbatim source of the relevant symbols grouped by file; most often the ONLY call you need)
 - **"How does X reach/become Y? / the flow / the path from X to Y"** → \`codegraph_explore\`, naming the symbols that span the flow (e.g. \`mutateElement renderScene\`) — it surfaces the call path among them, including dynamic-dispatch hops (callbacks, React re-render, JSX children) grep can't follow
 - **"What is the symbol named X?" (just its location)** → \`codegraph_search\`
-- **"What calls this?" / "What does this call?" / "What would changing this break?"** → \`codegraph_callers\` / \`codegraph_callees\` / \`codegraph_impact\`
+- **"What calls this?" / "What does this call?" / "What would changing this break?"** → \`codegraph_callers\` / \`codegraph_callees\` / \`codegraph_impact\`. Callers includes where a function is **registered as a callback** (passed as an argument, assigned to a function pointer/field, listed in a handler table) — labeled "via callback registration" — so a function with no direct calls is NOT dead if it's wired up somewhere
 - **Reading a source FILE (any time you'd use the \`Read\` tool)** → \`codegraph_node\` with a \`file\` path and no \`symbol\`. It returns the file's **current source with line numbers — the same \`<n>\\t<line>\` shape \`Read\` gives you, safe to \`Edit\` from** — narrowable with \`offset\`/\`limit\` exactly like \`Read\`, PLUS a one-line note of which files depend on it. Same bytes as \`Read\`, faster (served from the index), with the blast radius attached. Use it **instead of \`Read\`** for indexed source files; fall back to \`Read\` only for what codegraph doesn't index (configs, docs). Pass \`symbolsOnly: true\` for just the file's structure.
 - **About to read or edit a symbol you can name** → \`codegraph_node\` with that \`symbol\` (SECONDARY — the after-explore depth tool): the verbatim source (\`includeCode: true\`) PLUS its caller/callee trail, so before changing it you see what calls it and what your edit would break. For an OVERLOADED name it returns EVERY matching definition's body in one call, so you never Read a file to find the right overload
 - **"What's in directory X?"** → \`codegraph_files\`
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 94fcc5dd9..7351ca55d 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -1113,11 +1113,14 @@ export class ToolHandler {
     // Aggregate callers across all matching symbols
     const seen = new Set<string>();
     const allCallers: Node[] = [];
+    const labels = new Map<string, string>();
     for (const node of allMatches.nodes) {
       for (const c of cg.getCallers(node.id)) {
         if (!seen.has(c.node.id)) {
           seen.add(c.node.id);
           allCallers.push(c.node);
+          const label = this.edgeLabel(c.edge);
+          if (label) labels.set(c.node.id, label);
         }
       }
     }
@@ -1126,7 +1129,7 @@ export class ToolHandler {
       return this.textResult(`No callers found for "${symbol}"${allMatches.note}`);
     }
 
-    const formatted = this.formatNodeList(allCallers.slice(0, limit), `Callers of ${symbol}`) + allMatches.note;
+    const formatted = this.formatNodeList(allCallers.slice(0, limit), `Callers of ${symbol}`, labels) + allMatches.note;
     return this.textResult(this.truncateOutput(formatted));
   }
 
@@ -1148,11 +1151,14 @@ export class ToolHandler {
     // Aggregate callees across all matching symbols
     const seen = new Set<string>();
     const allCallees: Node[] = [];
+    const labels = new Map<string, string>();
     for (const node of allMatches.nodes) {
       for (const c of cg.getCallees(node.id)) {
         if (!seen.has(c.node.id)) {
           seen.add(c.node.id);
           allCallees.push(c.node);
+          const label = this.edgeLabel(c.edge);
+          if (label) labels.set(c.node.id, label);
         }
       }
     }
@@ -1161,7 +1167,7 @@ export class ToolHandler {
       return this.textResult(`No callees found for "${symbol}"${allMatches.note}`);
     }
 
-    const formatted = this.formatNodeList(allCallees.slice(0, limit), `Callees of ${symbol}`) + allMatches.note;
+    const formatted = this.formatNodeList(allCallees.slice(0, limit), `Callees of ${symbol}`, labels) + allMatches.note;
     return this.textResult(this.truncateOutput(formatted));
   }
 
@@ -3337,18 +3343,37 @@ export class ToolHandler {
     return lines.join('\n');
   }
 
-  private formatNodeList(nodes: Node[], title: string): string {
+  private formatNodeList(nodes: Node[], title: string, labels?: Map<string, string>): string {
     const lines: string[] = [`## ${title} (${nodes.length} found)`, ''];
 
     for (const node of nodes) {
       const location = node.startLine ? `:${node.startLine}` : '';
-      // Compact: just name, kind, location
-      lines.push(`- ${node.name} (${node.kind}) - ${node.filePath}${location}`);
+      // Compact: just name, kind, location — plus the relationship when it
+      // isn't a plain call (callback registration, instantiation, …).
+      const label = labels?.get(node.id);
+      lines.push(
+        `- ${node.name} (${node.kind}) - ${node.filePath}${location}${label ? ` — via ${label}` : ''}`
+      );
     }
 
     return lines.join('\n');
   }
 
+  /**
+   * Relationship label for a non-`calls` edge in callers/callees lists. A
+   * function-as-value edge (#756) is the high-signal one: `callers(cb)`
+   * showing "via callback registration" tells the agent this is where the
+   * callback is WIRED, not where it's invoked.
+   */
+  private edgeLabel(edge: Edge): string | null {
+    if (edge.kind === 'calls') return null;
+    if (edge.metadata?.fnRef === true) return 'callback registration';
+    if (edge.kind === 'instantiates') return 'instantiation';
+    if (edge.kind === 'imports') return 'import';
+    if (edge.kind === 'references') return 'reference';
+    return edge.kind;
+  }
+
   private formatImpact(symbol: string, impact: Subgraph): string {
     const nodeCount = impact.nodes.size;
 
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 96484001e..4d537d106 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -16,7 +16,7 @@ import {
   FrameworkResolver,
   ImportMapping,
 } from './types';
-import { matchReference, matchDottedCallChain, matchScopedCallChain, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
+import { matchReference, matchFunctionRef, matchDottedCallChain, matchScopedCallChain, sameLanguageFamily, crossesKnownFamily } from './name-matcher';
 import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs, isPhpIncludePathRef } from './import-resolver';
 import { detectFrameworks } from './frameworks';
 import { synthesizeCallbackEdges } from './callback-synthesizer';
@@ -669,6 +669,22 @@ export class ReferenceResolver {
       return null;
     }
 
+    // Function-as-value refs (#756) get a dedicated, strictly-gated path:
+    // import-based resolution first (an imported callback resolves through its
+    // import, the most precise cross-file signal), then matchFunctionRef
+    // (same-file first, unique-only cross-file, function/method targets only).
+    // They never reach the framework or fuzzy strategies below.
+    if (ref.referenceKind === 'function_ref') {
+      const viaImport = this.gateLanguage(resolveViaImport(ref, this.context), ref);
+      if (viaImport) {
+        const target = this.queries.getNodeById(viaImport.targetNodeId);
+        if (target && (target.kind === 'function' || target.kind === 'method')) {
+          return viaImport;
+        }
+      }
+      return this.gateLanguage(matchFunctionRef(ref, this.context), ref);
+    }
+
     // JVM FQN imports skip framework/name-matcher: `import com.example.Bar`
     // resolves directly through the qualifiedName index, which is unambiguous
     // even when several `Bar` classes exist in different packages.
@@ -750,7 +766,13 @@ export class ReferenceResolver {
    */
   createEdges(resolved: ResolvedRef[]): Edge[] {
     return resolved.map((ref) => {
-      let kind = ref.original.referenceKind;
+      // `function_ref` (#756) is internal-only: it persists as a `references`
+      // edge (the registration site depends on the callback), distinguishable
+      // by metadata.resolvedBy === 'function-ref'. callers/impact already
+      // traverse `references`, so registration sites surface with no
+      // graph-layer changes.
+      let kind: Edge['kind'] =
+        ref.original.referenceKind === 'function_ref' ? 'references' : ref.original.referenceKind;
 
       // Promote "extends" to "implements" when a class/struct targets an interface
       if (kind === 'extends') {
@@ -784,6 +806,11 @@ export class ReferenceResolver {
         metadata: {
           confidence: ref.confidence,
           resolvedBy: ref.resolvedBy,
+          // Uniform marker for function-as-value edges (#756), regardless of
+          // which strategy resolved them (import vs matchFunctionRef) — lets
+          // tooling label "callback registration" and lets validation diff
+          // exactly the edges this feature added.
+          ...(ref.original.referenceKind === 'function_ref' ? { fnRef: true } : {}),
         },
       };
     });
@@ -1161,7 +1188,7 @@ export class ReferenceResolver {
     if (!result) return result;
     const tgt = this.getLanguageFromNodeId(result.targetNodeId);
     if (!tgt || !ref.language) return result;
-    if (ref.referenceKind === 'references' && !sameLanguageFamily(tgt, ref.language)) return null;
+    if ((ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') && !sameLanguageFamily(tgt, ref.language)) return null;
     if (ref.referenceKind === 'imports' && crossesKnownFamily(tgt, ref.language)) return null;
     return result;
   }
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index b1280a78f..c29ad41e0 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -158,7 +158,7 @@ export function crossesKnownFamily(a: string, b: string): boolean {
  *    both-known filter so `.vue`/`.svelte` (own tag) importing `.ts` survives.
  */
 function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
-  if (ref.referenceKind === 'references') {
+  if (ref.referenceKind === 'references' || ref.referenceKind === 'function_ref') {
     return candidates.filter((c) => sameLanguageFamily(c.language, ref.language));
   }
   if (ref.referenceKind === 'imports') {
@@ -167,6 +167,113 @@ function applyLanguageGate(candidates: Node[], ref: UnresolvedRef): Node[] {
   return candidates;
 }
 
+/**
+ * Resolve a function-as-value reference (#756) — a function name used as a
+ * callback/function-pointer value (`register(handler)`, `o->cb = handler`,
+ * `{ .cb = handler }`, `signal(SIGINT, handler)`). The ONLY strategy allowed
+ * for `function_ref` refs: exact name, function/method targets only, same
+ * language family, same-file first, and cross-file only when the match is
+ * UNIQUE. No fuzzy fallback, no qualified-name walking — a wrong callback
+ * edge is worse than none.
+ */
+export function matchFunctionRef(
+  ref: UnresolvedRef,
+  context: ResolutionContext
+): ResolvedRef | null {
+  // In JS/TS/Python a bare identifier can never be a method value (methods
+  // are only reachable through a receiver — `this.m` / `self.m` /
+  // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the
+  // pre-existing TS quirk of class fields extracting as method-kind nodes,
+  // which otherwise soaked up local names passed as arguments (excalidraw
+  // A/B finding; same pattern in vendored docopt.py). Python's `self.m`
+  // form keeps method targets via its own capture shape. C++ likewise: a
+  // bare identifier can only be a FREE function (member values need
+  // `&Cls::method`). Other languages keep method targets: C# method groups,
+  // Swift/Dart implicit-self, Java/Kotlin method references.
+  const bareFnOnly =
+    ref.language === 'typescript' || ref.language === 'tsx' ||
+    ref.language === 'javascript' || ref.language === 'jsx' ||
+    ref.language === 'cpp' || ref.language === 'python';
+
+  // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"):
+  // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m`
+  // shape is an explicit member reference). Unique-or-drop like everything else.
+  if (ref.referenceName.includes('::')) {
+    const memberName = ref.referenceName.slice(ref.referenceName.lastIndexOf('::') + 2);
+    const scoped = context
+      .getNodesByName(memberName)
+      .filter(
+        (n) =>
+          (n.kind === 'function' || n.kind === 'method') &&
+          sameLanguageFamily(n.language, ref.language) &&
+          n.id !== ref.fromNodeId &&
+          (n.qualifiedName === ref.referenceName ||
+            n.qualifiedName.endsWith(`::${ref.referenceName}`))
+      );
+    if (scoped.length === 0) return null;
+    const sameFileScoped = scoped.filter((n) => n.filePath === ref.filePath);
+    const pool = sameFileScoped.length > 0 ? sameFileScoped : scoped;
+    if (sameFileScoped.length === 0 && scoped.length > 1) return null;
+    const target = pool.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
+    return {
+      original: ref,
+      targetNodeId: target.id,
+      confidence: 0.9,
+      resolvedBy: 'function-ref',
+    };
+  }
+
+  const candidates = context
+    .getNodesByName(ref.referenceName)
+    .filter(
+      (n) =>
+        (n.kind === 'function' || (!bareFnOnly && n.kind === 'method')) &&
+        sameLanguageFamily(n.language, ref.language) &&
+        n.id !== ref.fromNodeId // a function registering itself is not a dependency edge
+    );
+  if (candidates.length === 0) return null;
+
+  // Same-file definition wins — the extraction gate guarantees most survivors
+  // have one, and it's the dominant C pattern (static callback registered in
+  // a same-file ops struct).
+  const sameFile = candidates.filter((n) => n.filePath === ref.filePath);
+  if (sameFile.length > 0) {
+    // Swift: several same-named METHODS in one file is an API overload family
+    // (`Session.request(...)` × N), and a bare identifier hitting it is almost
+    // always a same-named parameter, not a method value (Alamofire A/B
+    // finding) — refuse rather than guess. A single method (SwiftUI's
+    // `action: handleTap`) still resolves.
+    if (
+      ref.language === 'swift' &&
+      sameFile.length > 1 &&
+      sameFile.every((n) => n.kind === 'method')
+    ) {
+      return null;
+    }
+    // Same-name overloads in one file are the same conceptual symbol; pick
+    // the first by position for determinism.
+    const target = sameFile.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
+    return {
+      original: ref,
+      targetNodeId: target.id,
+      confidence: sameFile.length === 1 ? 0.95 : 0.9,
+      resolvedBy: 'function-ref',
+    };
+  }
+
+  // Cross-file (imported names the import resolver didn't already claim):
+  // only an unambiguous match resolves.
+  if (candidates.length === 1) {
+    return {
+      original: ref,
+      targetNodeId: candidates[0]!.id,
+      confidence: 0.8,
+      resolvedBy: 'function-ref',
+    };
+  }
+  return null;
+}
+
 /**
  * Try to resolve a reference by exact name match
  */
@@ -1124,6 +1231,13 @@ export function matchReference(
   ref: UnresolvedRef,
   context: ResolutionContext
 ): ResolvedRef | null {
+  // Function-as-value refs (#756) resolve ONLY through the dedicated matcher —
+  // never the fuzzy/qualified fallthrough below (a wrong callback edge is
+  // worse than none).
+  if (ref.referenceKind === 'function_ref') {
+    return matchFunctionRef(ref, context);
+  }
+
   // Try strategies in order of confidence
   let result: ResolvedRef | null;
 
diff --git a/src/resolution/types.ts b/src/resolution/types.ts
index 8c2fc168f..ca08b634f 100644
--- a/src/resolution/types.ts
+++ b/src/resolution/types.ts
@@ -4,7 +4,7 @@
  * Types for the reference resolution system.
  */
 
-import { EdgeKind, Language, Node } from '../types';
+import { Language, Node, ReferenceKind } from '../types';
 
 /**
  * An unresolved reference from extraction
@@ -15,7 +15,7 @@ export interface UnresolvedRef {
   /** The name being referenced */
   referenceName: string;
   /** Type of reference */
-  referenceKind: EdgeKind;
+  referenceKind: ReferenceKind;
   /** Line where reference occurs */
   line: number;
   /** Column where reference occurs */
@@ -39,7 +39,7 @@ export interface ResolvedRef {
   /** Confidence score (0-1) */
   confidence: number;
   /** How it was resolved */
-  resolvedBy: 'exact-match' | 'import' | 'qualified-name' | 'framework' | 'fuzzy' | 'instance-method' | 'file-path';
+  resolvedBy: 'exact-match' | 'import' | 'qualified-name' | 'framework' | 'fuzzy' | 'instance-method' | 'file-path' | 'function-ref';
 }
 
 /**
diff --git a/src/types.ts b/src/types.ts
index 0ff4b7a5f..be3452d97 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -278,6 +278,14 @@ export interface ExtractionError {
   code?: string;
 }
 
+/**
+ * Kinds an unresolved reference can carry. `function_ref` is internal-only —
+ * a function name used as a VALUE (callback registration, #756). It never
+ * becomes an edge kind: resolution maps it to a `references` edge targeting
+ * function/method nodes only (see `matchFunctionRef`).
+ */
+export type ReferenceKind = EdgeKind | 'function_ref';
+
 /**
  * A reference that couldn't be resolved during extraction
  */
@@ -289,7 +297,7 @@ export interface UnresolvedReference {
   referenceName: string;
 
   /** Type of reference (call, type, import, etc.) */
-  referenceKind: EdgeKind;
+  referenceKind: ReferenceKind;
 
   /** Location of the reference */
   line: number;

From 38eb4e688c2029459a9875705c1671a26b092b72 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 14:48:11 -0500
Subject: [PATCH 36/51] =?UTF-8?q?fix(extraction):=20classify=20TS/JS=20cla?=
 =?UTF-8?q?ss=20fields=20by=20value=20=E2=80=94=20properties,=20not=20meth?=
 =?UTF-8?q?ods=20(#808)=20(#809)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every TS `public_field_definition` / JS `field_definition` extracted as a
method-kind node, so a plain field (`public fonts: Fonts;`) was reported
as callable: class shape was misrepresented, kind-based filtering was
defeated, and bare-name call resolution landed on data fields — typeorm's
boolean `ColumnMetadata::isArray` field was soaking up Array.isArray(...)
call edges (685 such wrong edges on typeorm alone).

Classification now follows the VALUE (classifyMethodNode hook, mirroring
resolveBody's callable detection): arrow-function / function-expression
fields and HOF-wrapped ones (`onScroll = throttle(() => {…})`) stay
methods with their bodies walked; everything else becomes a property that
keeps its type-annotation references edge, visibility, static-ness, and
decorators. Field initializers are now walked too (`history =
createHistory()` attributes the call to the property — previously
invisible), and JS class fields — whose name lives in the grammar's
`property` field, so they never extracted a symbol at all — now appear in
the graph (resolveName on the JS extractor).

With fields correctly kinded, `this.X` callback registration is re-enabled
for TS/JS (removed in #807 because field pseudo-methods made it mostly
wrong): `this.<member>` candidates resolve CLASS-SCOPED
(resolveThisMemberFnRef) — the target must be a function/method sharing
the from-symbol's qualified-name class prefix, same file, no fallback —
so `addEventListener("online", this.onOfflineStatusToggle)` and API-object
wiring (`{ mutateElement: this.mutateElement }`) produce registration
edges to the enclosing class's own method, while `this.fonts` (a
property) and inherited/unknown members yield no edge.

A/B (baseline = #807 main): excalidraw / typeorm / express — node counts
identical on all three; kinds shift method→property only (typeorm: exactly
7,406 swapped; excalidraw also corrects 5 anonymous-class mock fields that
were function-kind); every one of the 736 dropped call edges targeted a
node that is now a property (calls into data fields — verified 100%);
gains are retargets to real callables, initializer-call attributions, and
+74/+7 class-scoped this.X registration edges (sampled: addEventListener/
removeEventListener wiring, imperative-API method maps). Full suite green
(1386).

EXTRACTION_VERSION 19 → 20 (re-index to benefit).

Closes #808

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                              |   2 +
 __tests__/function-ref.test.ts            |  57 +++++++-
 __tests__/ts-field-classification.test.ts | 159 ++++++++++++++++++++++
 docs/design/function-ref-capture.md       |  33 +++--
 src/extraction/extraction-version.ts      |   2 +-
 src/extraction/function-ref.ts            |  26 +++-
 src/extraction/languages/javascript.ts    |  15 ++
 src/extraction/languages/typescript.ts    |  38 ++++++
 src/extraction/tree-sitter-types.ts       |   9 ++
 src/extraction/tree-sitter.ts             |  78 ++++++++---
 src/resolution/index.ts                   |  40 ++++++
 src/resolution/name-matcher.ts            |   4 +
 12 files changed, 420 insertions(+), 43 deletions(-)
 create mode 100644 __tests__/ts-field-classification.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9b3739f4e..e9f4b8720 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,8 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- TypeScript and JavaScript **class fields are now reported as properties instead of methods**. A plain field like `public fonts: Fonts;` previously extracted as a method, misrepresenting class shape and letting calls to same-named functions resolve to data fields (a boolean field named `isArray` was soaking up `Array.isArray(...)` call edges). Fields holding arrow functions or function expressions (`onClick = () => {…}`, including wrapped ones like `onScroll = throttle(() => {…})`) correctly remain methods and their bodies are still analyzed. Field initializers are analyzed too, so `history = createHistory()` records its call — and JavaScript class fields, which previously produced no symbol at all, now appear in the graph. Re-index a project to benefit. (#808) (TypeScript, JavaScript)
+- Callback registration through `this` now resolves precisely in TypeScript and JavaScript: `window.addEventListener("online", this.onOfflineStatusToggle)` or an API object like `{ mutateElement: this.mutateElement }` produces a reference edge to the **enclosing class's own method** — never a same-named method on an unrelated class, and never a data field. Builds on the callback-registration support below. (#808) (TypeScript, JavaScript)
 - CodeGraph now sees where a function is **registered as a callback**, not just where it's called. A function name passed as an argument (`signal(SIGINT, handler)`, `qsort(…, compare)`, `addEventListener(…, onBlur)`), assigned to a function pointer or field (`ops->recv_cb = my_cb`, `OnClick := Handler`), or placed in a struct initializer or handler table (`{ .recv_cb = my_cb }`, `{ "get", getCommand }`) now produces a reference edge from the registration site to the function — so `codegraph_callers` and `codegraph_impact` surface callback wiring that previously looked like dead code. Works across all supported languages, including the language-specific forms: C/C++ `&fn`, Java `Class::method`, Kotlin `::fn`, Swift `#selector`, Objective-C `@selector`, Ruby `method(:fn)`, Scala eta-expansion, and Delphi/Pascal `@Handler` and `OnClick := Handler` event wiring. Callers output labels these "via callback registration". Resolution is deliberately conservative: an ambiguous name produces no edge rather than a wrong one. Re-index a project to benefit. Thanks @zmcrazy. (#756)
 - The `codegraph_node` MCP tool can now **read a whole source file like the built-in Read tool — only faster, served from the index**. Pass a file path with no symbol and it returns that file's current source with line numbers (the same `<n>⇥<line>` shape Read produces, so an assistant can edit straight from it), narrowable with `offset`/`limit` exactly like Read, plus a one-line note of which files depend on it (the file's blast radius). Use it anywhere you'd reach for Read on an indexed source file. Pass `symbolsOnly: true` for just the file's structure. Configuration/data files (`.yml` / `.properties`) are summarized by key only, never dumped, so secrets in them are never surfaced. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns the same bytes plus the blast radius, faster than re-reading the file.
 - New `codegraph upgrade` command updates CodeGraph to the latest release in place — it detects how you installed (the standalone `install.sh` / `install.ps1` bundle, npm, or npx) and does the right thing for each, on macOS, Linux, and Windows. Use `codegraph upgrade --check` to see whether an update is available without installing, or `codegraph upgrade <version>` to move to a specific version. After upgrading it reminds you to re-index your projects so they pick up the newer engine's improvements. (#679)
diff --git a/__tests__/function-ref.test.ts b/__tests__/function-ref.test.ts
index 95d5b1385..67dd348f2 100644
--- a/__tests__/function-ref.test.ts
+++ b/__tests__/function-ref.test.ts
@@ -144,10 +144,9 @@ describe('Function-as-value capture (#756)', () => {
         'objRegistrar',
         'timerRegistrar',
       ]);
-      // `this.handleClick` is deliberately NOT captured in TS/JS: class fields
-      // extract as method-kind nodes, so `this.X` value positions (mostly data
-      // reads in real code) produced wrong edges — see TS_JS_SPEC note.
-      expect(fnRefEdgesInto(cg, 'handleClick')).toHaveLength(0);
+      // `this.handleClick` resolves class-scoped (#808): the target must be a
+      // method of the ENCLOSING class, in the same file.
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'handleClick'))).toEqual(['wire']);
     } finally {
       cg.destroy();
       tmpDir = undefined;
@@ -408,6 +407,56 @@ describe('Function-as-value capture (#756)', () => {
     }
   });
 
+  it('THIS-MEMBER SCOPING: this.X resolves only to the enclosing class, never elsewhere', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-thisx-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.ts'),
+      [
+        'declare const bus: { on(ev: string, cb: () => void): void };',
+        // Decoy: a same-named method on an UNRELATED class.
+        'export class Decoy { refresh(): void {} }',
+        'export class Panel {',
+        '  views: number[] = [];', // property (post-#808), shares no name
+        '  refresh(): void {}',
+        '  wire(): void {',
+        '    bus.on("update", this.refresh);', // → Panel::refresh, not Decoy::refresh
+        '    bus.on("data", this.views as never);', // property → NO edge
+        '    bus.on("gone", this.missing as never);', // unknown member → NO edge
+        '  }',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      const refreshes = cg.getNodesByName('refresh');
+      const panelRefresh = refreshes.find((n) => n.qualifiedName.includes('Panel'))!;
+      const decoyRefresh = refreshes.find((n) => n.qualifiedName.includes('Decoy'))!;
+
+      const intoPanel = cg
+        .getIncomingEdges(panelRefresh.id)
+        .filter((e) => e.metadata?.fnRef === true);
+      expect(intoPanel).toHaveLength(1);
+      expect(cg.getNode(intoPanel[0]!.source)?.name).toBe('wire');
+      expect(
+        cg.getIncomingEdges(decoyRefresh.id).filter((e) => e.metadata?.fnRef === true)
+      ).toHaveLength(0);
+
+      // The property and the unknown member produce nothing.
+      const views = cg.getNodesByName('views').find((n) => n.kind === 'property');
+      if (views) {
+        expect(
+          cg.getIncomingEdges(views.id).filter((e) => e.metadata?.fnRef === true)
+        ).toHaveLength(0);
+      }
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
   it('C UNGATED TABLES: a command table names handlers defined in OTHER files (redis pattern)', async () => {
     tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-ctable-'));
     // Handler defined in its own file…
diff --git a/__tests__/ts-field-classification.test.ts b/__tests__/ts-field-classification.test.ts
new file mode 100644
index 000000000..82069a8d2
--- /dev/null
+++ b/__tests__/ts-field-classification.test.ts
@@ -0,0 +1,159 @@
+/**
+ * TS/JS class-field kind classification (#808).
+ *
+ * `public_field_definition` (TS) / `field_definition` (JS) previously
+ * extracted as method-kind nodes unconditionally, so a plain annotated field
+ * (`public fonts: Fonts;`) was reported as a method — misrepresenting class
+ * shape and defeating kind-based filtering (#756 had to work around it).
+ *
+ * Now classification follows the VALUE: arrow-function / function-expression
+ * fields (and HOF-wrapped ones, mirroring resolveBody) stay methods; every
+ * other field is a property. Parity requirements: the property keeps its
+ * type-annotation `references` edge, visibility, and static-ness; method
+ * fields keep walking their bodies (calls still attributed).
+ */
+
+import { describe, it, expect, beforeAll, afterEach } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { CodeGraph } from '../src';
+import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';
+
+beforeAll(async () => {
+  await initGrammars();
+  await loadAllGrammars();
+});
+
+describe('TS/JS class field classification (#808)', () => {
+  let tmpDir: string | undefined;
+  afterEach(() => {
+    if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+    tmpDir = undefined;
+  });
+
+  it('TS: plain fields are properties; function-valued fields are methods', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-808-ts-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'app.ts'),
+      [
+        'declare function throttle(f: unknown, ms: number): unknown;',
+        'class Fonts {}',
+        'class History {}',
+        'class App {',
+        '  public fonts: Fonts;', // plain annotated → property
+        '  private history: History = new History();', // annotated + initializer → property
+        '  interactiveCanvas: HTMLCanvasElement | null = null;', // union type → property
+        '  count = 0;', // plain value → property
+        '  static defaults = { a: 1 };', // object value → property
+        '  onClick = () => { this.run(); };', // arrow field → method
+        '  onScroll = throttle((e: Event) => { this.run(); }, 100);', // HOF-wrapped → method
+        '  handler = function namedFn() {};', // function expression → method
+        '  handleClick(): void {}', // real method
+        '  get value(): number { return 1; }', // getter stays method
+        '  run(): void {}',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      const kindOf = (name: string) =>
+        cg.getNodesByName(name).map((n) => n.kind).sort().join(',');
+
+      expect(kindOf('fonts')).toBe('property');
+      expect(kindOf('history')).toBe('property');
+      expect(kindOf('interactiveCanvas')).toBe('property');
+      expect(kindOf('count')).toBe('property');
+      expect(kindOf('defaults')).toBe('property');
+      expect(kindOf('onClick')).toBe('method');
+      expect(kindOf('onScroll')).toBe('method');
+      expect(kindOf('handler')).toBe('method');
+      expect(kindOf('handleClick')).toBe('method');
+      expect(kindOf('value')).toBe('method');
+
+      // Parity: the property keeps its type-annotation reference edge.
+      const fontsProp = cg.getNodesByName('fonts').find((n) => n.kind === 'property')!;
+      const fontsRefs = cg
+        .getOutgoingEdges(fontsProp.id)
+        .filter((e) => e.kind === 'references')
+        .map((e) => cg.getNode(e.target)?.name);
+      expect(fontsRefs).toContain('Fonts');
+
+      // Parity: visibility survives the property path.
+      expect(fontsProp.visibility).toBe('public');
+      const historyProp = cg.getNodesByName('history').find((n) => n.kind === 'property')!;
+      expect(historyProp.visibility).toBe('private');
+
+      // Parity: arrow-field bodies still walk — onClick calls run.
+      const onClick = cg.getNodesByName('onClick')[0]!;
+      const calls = cg
+        .getOutgoingEdges(onClick.id)
+        .filter((e) => e.kind === 'calls')
+        .map((e) => cg.getNode(e.target)?.name);
+      expect(calls).toContain('run');
+
+      // Signature carries the declared type, C#-style "Type name".
+      expect(fontsProp.signature).toBe('Fonts fonts');
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('JS: field_definition classifies the same way', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-808-js-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'app.js'),
+      [
+        'class App {',
+        '  count = 0;',
+        '  config = { retries: 3 };',
+        '  onClick = () => { this.run(); };',
+        '  run() {}',
+        '}',
+        'module.exports = App;',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      expect(cg.getNodesByName('count')[0]?.kind).toBe('property');
+      expect(cg.getNodesByName('config')[0]?.kind).toBe('property');
+      expect(cg.getNodesByName('onClick')[0]?.kind).toBe('method');
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('field initializers still register callbacks (fn-ref scan)', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-808-fnref-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.ts'),
+      [
+        'function onSave(): void {}',
+        'function onLoad(): void {}',
+        'export class Registry {',
+        '  static handlers = { save: onSave, load: onLoad };',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const onSave = cg.getNodesByName('onSave')[0]!;
+      const fnRefs = cg
+        .getIncomingEdges(onSave.id)
+        .filter((e) => e.metadata?.fnRef === true);
+      expect(fnRefs.length).toBeGreaterThan(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+});
diff --git a/docs/design/function-ref-capture.md b/docs/design/function-ref-capture.md
index 762bdfef1..159176cc5 100644
--- a/docs/design/function-ref-capture.md
+++ b/docs/design/function-ref-capture.md
@@ -44,7 +44,7 @@ custom `visitNode` hooks like Scala's val/var handler) get a candidates-only
 |---|---|---|---|---|---|
 | C / ObjC | `argument_list` | `assignment_expression.right` | `initializer_pair.value` | `initializer_list`, `init_declarator.value` | `&fn` (`pointer_expression`), `@selector(...)` (ObjC) |
 | C++ | **`&` forms only** in args/rhs/varinit | (same — explicit `&` only) | bare ids at FILE scope only | bare ids at FILE scope only | `&fn`, `&Cls::method` (resolved scoped to the class) |
-| TS / JS (tsx/jsx) | `arguments` | `assignment_expression.right` | `pair.value` | `array`, `variable_declarator.value` | — (see TS notes) |
+| TS / JS (tsx/jsx) | `arguments` | `assignment_expression.right` | `pair.value` | `array`, `variable_declarator.value` | `this.method` (`member_expression`, class-scoped — see rule 3) |
 | Python | `argument_list`, `keyword_argument.value` | `assignment.right` | `pair.value` | `list` | `self.method` (`attribute`) |
 | Go | `argument_list` | `assignment_statement` / `short_var_declaration` (`expression_list`) | `keyed_element` | `literal_value`, `var_spec.value` | — |
 | Rust | `arguments` | `assignment_expression.right` | `field_initializer.value` | `array_expression`, `static_item` / `let_declaration.value` | — |
@@ -77,16 +77,19 @@ custom `visitNode` hooks like Scala's val/var handler) get a candidates-only
    were ungated.
 3. **TS/JS/Python: bare ids resolve to `function` kind only.** A bare
    identifier can never be a method value in these languages (methods need a
-   receiver — `this.m` / `self.m`), and TS class FIELDS are extracted as
-   method-kind nodes (pre-existing extractor quirk), so allowing method
-   targets soaked up locals passed as arguments
-   (`new Set(selectedPointsIndices)` → a same-named "method" field;
-   docopt.py's `name`/`match` params). For the same reason `this.X` capture
-   is disabled for TS/JS — in real code `this.X` value positions are mostly
-   data reads (`setCursor(this.canvas)`). Python's `self.m` form keeps method
-   targets through its own capture shape. C#/Swift/Dart/Java/Kotlin keep
-   method targets (method groups, implicit-self, method references are real
-   method values).
+   receiver — `this.m` / `self.m`), so allowing method targets soaked up
+   locals passed as arguments (`new Set(selectedPointsIndices)`;
+   docopt.py's `name`/`match` params — excalidraw/fmt A/B findings).
+   TS/JS `this.X` values are captured as `this.`-PREFIXED candidates and
+   resolved CLASS-SCOPED (`resolveThisMemberFnRef` in
+   `src/resolution/index.ts`): the target must be a function/method whose
+   qualified name shares the from-symbol's class prefix, same file, no
+   fallback of any kind — `addEventListener(…, this.onResize)` hits the
+   enclosing class's method; `this.fonts` (a property, post-#808 field
+   classification) and inherited/unknown members yield no edge. Python's
+   `self.m` form keeps method targets through its own capture shape.
+   C#/Swift/Dart/Java/Kotlin keep method targets (method groups,
+   implicit-self, method references are real method values).
 4. **C++ is `&`-explicit** (`addressOfOnly`): bare identifiers qualify only in
    FILE-scope initializer tables; everywhere else (args, assignments, local
    braced-init lists `{begin, size}`) only `&fn` / `&Cls::method` count.
@@ -184,5 +187,9 @@ Index cost on redis: +6% time, +5% db size.
   imports, so cross-file bare callbacks only resolve when repo-unique.
 - **PHP string callables**, **Ruby bare symbols** outside `method(:sym)`,
   **`obj.method` member values** where `obj` isn't `this`/`self`: deferred.
-- **TS `this.X`**: disabled until TS class-field kind classification is fixed
-  (fields currently extract as method-kind nodes).
+- **TS/JS `this.X` to inherited members**: the class-scoped resolver matches
+  the enclosing class's OWN members only — `this.handleClick` defined on a
+  superclass yields no edge (would need the supertype walk; deliberate v1).
+  Reading a getter into a local (`const s = this.snapshot`) produces a
+  references edge to the getter — a true dependency with an imperfect
+  "registration" flavor.
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 4cfcfa6b7..d7a1250ff 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 19;
+export const EXTRACTION_VERSION = 20;
diff --git a/src/extraction/function-ref.ts b/src/extraction/function-ref.ts
index dd4f12c03..dfb9eb58e 100644
--- a/src/extraction/function-ref.ts
+++ b/src/extraction/function-ref.ts
@@ -158,12 +158,13 @@ function cFamilySpec(extra?: { special?: string[]; addressOfOnly?: boolean }): F
   };
 }
 
-// NOTE: deliberately NO `member_expression` (`this.handleClick`) capture for
-// TS/JS. Class fields with type annotations are extracted as method-kind
-// nodes (pre-existing extractor behavior), so `this.X` value positions —
-// which in real code are mostly DATA reads (`setCursor(this.canvas)`) —
-// resolved to those field nodes and produced wrong "registration" edges
-// (excalidraw A/B finding). Revisit if/when TS field classification is fixed.
+// `this.handleClick` capture (member_expression) emits a `this.`-PREFIXED
+// candidate name: resolution scopes it to the enclosing symbol's class
+// (qualified-name prefix), so `this.fonts` (a property, post-#808) and
+// inherited/unknown members yield no edge, while same-class methods —
+// `btn.on('click', this.handleClick)`, the observer-registration idiom —
+// resolve precisely. Bare identifiers stay function-kind-only (a bare id can
+// never be a method value in JS).
 const TS_JS_SPEC: FnRefSpec = {
   idTypes: new Set(['identifier']),
   dispatch: new Map<string, CaptureRule>([
@@ -173,6 +174,7 @@ const TS_JS_SPEC: FnRefSpec = {
     ['pair', { mode: 'value', field: 'value' }],
     ['array', { mode: 'list' }],
   ]),
+  special: new Set(['member_expression']),
 };
 
 const PYTHON_SPEC: FnRefSpec = {
@@ -613,6 +615,18 @@ function normalizeSpecial(
       return name ? [{ name, node: sym }] : [];
     }
 
+    // `this.handleClick` (TS/JS) — object must be EXACTLY `this`. The name
+    // keeps the `this.` prefix so resolution can scope it to the enclosing
+    // class (see resolveThisMemberFnRef) instead of bare name-matching.
+    case 'member_expression': {
+      const obj = getChildByField(node, 'object');
+      const prop = getChildByField(node, 'property');
+      if (obj && prop && obj.type === 'this' && prop.type === 'property_identifier') {
+        return [{ name: `this.${getNodeText(prop, source)}`, node: prop }];
+      }
+      return [];
+    }
+
     // `self.handle_click` (Python) — object must be EXACTLY `self`.
     case 'attribute': {
       const obj = getChildByField(node, 'object');
diff --git a/src/extraction/languages/javascript.ts b/src/extraction/languages/javascript.ts
index 0a0d67808..3b36348b3 100644
--- a/src/extraction/languages/javascript.ts
+++ b/src/extraction/languages/javascript.ts
@@ -1,10 +1,14 @@
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
+import { classifyTsClassMember } from './typescript';
 
 export const javascriptExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration', 'arrow_function', 'function_expression'],
   classTypes: ['class_declaration'],
   methodTypes: ['method_definition', 'field_definition'],
+  // JS `field_definition` ≙ TS `public_field_definition`: plain fields are
+  // properties, function-valued fields are methods (#808).
+  classifyMethodNode: classifyTsClassMember,
   interfaceTypes: [],
   structTypes: [],
   enumTypes: [],
@@ -13,6 +17,17 @@ export const javascriptExtractor: LanguageExtractor = {
   callTypes: ['call_expression'],
   variableTypes: ['lexical_declaration', 'variable_declaration'],
   nameField: 'name',
+  // JS `field_definition` names its key the `property` field (TS's
+  // public_field_definition uses `name`). Without this, JS class fields —
+  // including arrow-function handler fields — extracted no name and produced
+  // no node at all (#808).
+  resolveName: (node, source) => {
+    if (node.type === 'field_definition') {
+      const prop = getChildByField(node, 'property');
+      if (prop) return getNodeText(prop, source);
+    }
+    return undefined;
+  },
   bodyField: 'body',
   resolveBody: (node, bodyField) => {
     // field_definition (arrow function class fields) nest the body inside
diff --git a/src/extraction/languages/typescript.ts b/src/extraction/languages/typescript.ts
index 9540dd940..72059114f 100644
--- a/src/extraction/languages/typescript.ts
+++ b/src/extraction/languages/typescript.ts
@@ -1,10 +1,48 @@
 import { getNodeText, getChildByField } from '../tree-sitter-helpers';
 import type { LanguageExtractor } from '../tree-sitter-types';
+import type { Node as SyntaxNode } from 'web-tree-sitter';
+
+/**
+ * A TS/JS class field (`public_field_definition` / `field_definition`) is a
+ * METHOD only when its value is callable — an arrow function, a function
+ * expression, or a HOF call wrapping one (`onScroll = throttle(() => {…})`),
+ * exactly mirroring what `resolveBody` below knows how to walk. Everything
+ * else (`public fonts: Fonts;`, `count = 0`, `static defaults = {…}`) is a
+ * PROPERTY. Previously every field extracted as method-kind (#808), which
+ * misrepresented class shape and defeated kind-based filtering — the reason
+ * #756's function-ref resolution had to restrict TS/JS bare identifiers to
+ * function targets.
+ */
+export function classifyTsClassMember(node: SyntaxNode): 'method' | 'property' {
+  if (node.type !== 'public_field_definition' && node.type !== 'field_definition') {
+    return 'method'; // method_definition, getters/setters — untouched
+  }
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (!child) continue;
+    if (child.type === 'arrow_function' || child.type === 'function_expression') {
+      return 'method';
+    }
+    if (child.type === 'call_expression') {
+      const args = getChildByField(child, 'arguments');
+      if (args) {
+        for (let j = 0; j < args.namedChildCount; j++) {
+          const arg = args.namedChild(j);
+          if (arg && (arg.type === 'arrow_function' || arg.type === 'function_expression')) {
+            return 'method';
+          }
+        }
+      }
+    }
+  }
+  return 'property';
+}
 
 export const typescriptExtractor: LanguageExtractor = {
   functionTypes: ['function_declaration', 'arrow_function', 'function_expression'],
   classTypes: ['class_declaration', 'abstract_class_declaration'],
   methodTypes: ['method_definition', 'public_field_definition'],
+  classifyMethodNode: classifyTsClassMember,
   interfaceTypes: ['interface_declaration'],
   structTypes: [],
   enumTypes: ['enum_declaration'],
diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts
index cecd54c02..28338b0ac 100644
--- a/src/extraction/tree-sitter-types.ts
+++ b/src/extraction/tree-sitter-types.ts
@@ -180,6 +180,15 @@ export interface LanguageExtractor {
    */
   classifyClassNode?: (node: SyntaxNode) => 'class' | 'struct' | 'enum' | 'interface' | 'trait';
 
+  /**
+   * Classify a methodTypes node when the grammar reuses one node type for
+   * both callable and data members (#808): TS/JS class FIELDS
+   * (`public_field_definition` / `field_definition`) are methods only when
+   * their value is callable (`onClick = () => {}`); a plain field
+   * (`public fonts: Fonts;`, `count = 0`) is a property. Default: 'method'.
+   */
+  classifyMethodNode?: (node: SyntaxNode) => 'method' | 'property';
+
   /**
    * Resolve the body node for a function/method/class when it's not a child field.
    * (e.g. Dart puts function_body as a sibling, not a child.)
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 3295df4e8..612681499 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -473,11 +473,14 @@ export class TreeSitterExtractor {
       // variable; see FnRefSpec.ungatedModes). Local initializers and
       // everything else require a same-file/import match.
       const skipGate = ungated?.has(c.mode) === true && atFileScope;
-      // Qualified C++ member-pointers (`Widget::on_click`) gate on the member
-      // name; everything else on the full name.
-      const gateName = c.name.includes('::')
-        ? c.name.slice(c.name.lastIndexOf('::') + 2)
-        : c.name;
+      // Qualified C++ member-pointers (`Widget::on_click`) and TS/JS
+      // `this.<member>` candidates gate on the member name; everything else
+      // on the full name.
+      const gateName = c.name.startsWith('this.')
+        ? c.name.slice(5)
+        : c.name.includes('::')
+          ? c.name.slice(c.name.lastIndexOf('::') + 2)
+          : c.name;
       if (!skipGate && !definedHere.has(gateName) && !importedNames.has(gateName)) {
         continue;
       }
@@ -564,8 +567,30 @@ export class TreeSitterExtractor {
     }
     // Check for method declarations (only if not already handled by functionTypes)
     else if (this.extractor.methodTypes.includes(nodeType)) {
-      this.extractMethod(node);
-      skipChildren = true; // extractMethod visits children via visitFunctionBody
+      // TS/JS class fields parse as a methodTypes node; only function-valued
+      // fields are methods — a plain field (`public fonts: Fonts;`) is a
+      // property (#808). classifyMethodNode is absent for other languages.
+      if (this.extractor.classifyMethodNode?.(node) === 'property') {
+        const propNode = this.extractProperty(node);
+        // Walk the initializer so its calls/instantiations attribute to the
+        // property (`history = createHistory()` → history calls
+        // createHistory). The old field-as-method path never walked these
+        // (resolveBody only resolves function bodies), so this is additive.
+        const valueNode = getChildByField(node, 'value');
+        if (propNode && valueNode) {
+          this.nodeStack.push(propNode.id);
+          this.visitFunctionBody(valueNode, '');
+          this.nodeStack.pop();
+        }
+        // A field initializer can also register callbacks
+        // (`static handlers = { click: onClick }`) — scan it for
+        // function-as-value candidates (capture-only, halts at functions).
+        this.scanFnRefSubtree(node, 0);
+        skipChildren = true;
+      } else {
+        this.extractMethod(node);
+        skipChildren = true; // extractMethod visits children via visitFunctionBody
+      }
     }
     // Check for interface/protocol/trait declarations
     else if (this.extractor.interfaceTypes.includes(nodeType)) {
@@ -1302,27 +1327,41 @@ export class TreeSitterExtractor {
    * Extract a class property declaration (e.g. C# `public string Name { get; set; }`).
    * Extracts as 'property' kind node inside the owning class.
    */
-  private extractProperty(node: SyntaxNode): void {
-    if (!this.extractor) return;
+  private extractProperty(node: SyntaxNode): Node | null {
+    if (!this.extractor) return null;
 
     const docstring = getPrecedingDocstring(node, this.source);
     const visibility = this.extractor.getVisibility?.(node);
     const isStatic = this.extractor.isStatic?.(node) ?? false;
 
     const hookName = this.extractor.extractPropertyName?.(node, this.source);
+    // JS `field_definition` names its key the `property` field (TS uses
+    // `name`) — try both before the generic identifier scan (#808).
     const nameNode = hookName
       ? null
-      : getChildByField(node, 'name') || node.namedChildren.find(c => c.type === 'identifier');
+      : getChildByField(node, 'name') ||
+        getChildByField(node, 'property') ||
+        node.namedChildren.find(c => c.type === 'identifier');
     const name = hookName ?? (nameNode ? getNodeText(nameNode, this.source) : null);
-    if (!name) return;
-
-    // Get property type from the type child (first named child that isn't modifier or identifier)
-    const typeNode = node.namedChildren.find(
-      c => c.type !== 'modifier' && c.type !== 'modifiers'
-        && c.type !== 'identifier' && c.type !== 'accessor_list'
-        && c.type !== 'accessors' && c.type !== 'equals_value_clause'
-    );
-    const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
+    if (!name) return null;
+
+    // Get property type. TS/JS field definitions carry an explicit `type`
+    // field (a `type_annotation`); their other named children are the name
+    // and the initializer VALUE, which the generic finder below would
+    // wrongly pick — so fields use the type field only (#808). Other
+    // languages (C# property_declaration) keep the generic scan.
+    const isTsJsField =
+      node.type === 'public_field_definition' || node.type === 'field_definition';
+    const typeNode = isTsJsField
+      ? getChildByField(node, 'type')
+      : node.namedChildren.find(
+          c => c.type !== 'modifier' && c.type !== 'modifiers'
+            && c.type !== 'identifier' && c.type !== 'accessor_list'
+            && c.type !== 'accessors' && c.type !== 'equals_value_clause'
+        );
+    const typeText = typeNode
+      ? getNodeText(typeNode, this.source).replace(/^:\s*/, '')
+      : undefined;
     const signature = typeText ? `${typeText} ${name}` : name;
 
     const propNode = this.createNode('property', name, node, {
@@ -1341,6 +1380,7 @@ export class TreeSitterExtractor {
       // `type_annotation` children; the C# branch walks the `type` field.
       this.extractTypeAnnotations(node, propNode.id);
     }
+    return propNode;
   }
 
   /**
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 4d537d106..c2b75617b 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -675,6 +675,11 @@ export class ReferenceResolver {
     // (same-file first, unique-only cross-file, function/method targets only).
     // They never reach the framework or fuzzy strategies below.
     if (ref.referenceKind === 'function_ref') {
+      // `this.<member>` values (TS/JS) resolve ONLY against the enclosing
+      // class's own members — never a same-named symbol elsewhere.
+      if (ref.referenceName.startsWith('this.')) {
+        return this.gateLanguage(this.resolveThisMemberFnRef(ref), ref);
+      }
       const viaImport = this.gateLanguage(resolveViaImport(ref, this.context), ref);
       if (viaImport) {
         const target = this.queries.getNodeById(viaImport.targetNodeId);
@@ -1184,6 +1189,41 @@ export class ReferenceResolver {
     return { original: ref, targetNodeId: target.id, confidence: 0.9, resolvedBy: 'import' };
   }
 
+  /**
+   * Resolve a `this.<member>` function-as-value reference (#756/#808) to the
+   * ENCLOSING CLASS's own member — never a same-named symbol elsewhere. The
+   * registration idiom (`btn.on('click', this.handleClick)`) names a member
+   * of the class being defined, so the only valid target shares the
+   * from-symbol's qualified-name scope. Function/method targets only — a
+   * property (a data field, post-#808 classification) yields no edge — same
+   * file required, no fallback of any kind.
+   */
+  private resolveThisMemberFnRef(ref: UnresolvedRef): ResolvedRef | null {
+    const member = ref.referenceName.slice('this.'.length);
+    if (!member) return null;
+    const fromNode = this.queries.getNodeById(ref.fromNodeId);
+    if (!fromNode) return null;
+    const sep = fromNode.qualifiedName.lastIndexOf('::');
+    if (sep <= 0) return null; // not inside a class scope
+    const classPrefix = fromNode.qualifiedName.slice(0, sep);
+    const candidates = this.context
+      .getNodesByQualifiedName(`${classPrefix}::${member}`)
+      .filter(
+        (n) =>
+          (n.kind === 'function' || n.kind === 'method') &&
+          n.filePath === ref.filePath &&
+          n.id !== ref.fromNodeId
+      );
+    if (candidates.length === 0) return null;
+    const target = candidates.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
+    return {
+      original: ref,
+      targetNodeId: target.id,
+      confidence: 0.95,
+      resolvedBy: 'function-ref',
+    };
+  }
+
   private gateLanguage(result: ResolvedRef | null, ref: UnresolvedRef): ResolvedRef | null {
     if (!result) return result;
     const tgt = this.getLanguageFromNodeId(result.targetNodeId);
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index c29ad41e0..0584fc9dd 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -180,6 +180,10 @@ export function matchFunctionRef(
   ref: UnresolvedRef,
   context: ResolutionContext
 ): ResolvedRef | null {
+  // `this.<member>` refs are resolved ONLY by the class-scoped resolver in
+  // resolveOne (resolveThisMemberFnRef) — never by name matching here.
+  if (ref.referenceName.startsWith('this.')) return null;
+
   // In JS/TS/Python a bare identifier can never be a method value (methods
   // are only reachable through a receiver — `this.m` / `self.m` /
   // `Cls.m`), so bare fn-refs match FUNCTIONS only. This also sidesteps the

From 38095aa95baacffde3576492ea04057454470a5d Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 15:09:01 -0500
Subject: [PATCH 37/51] feat(resolution): inherited this.X, Java/Kotlin
 cross-file method refs, Swift type scoping (#810)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three callback-registration shapes deferred from #756/#808, one arc:

1. INHERITED this.X (TS/JS + every this.-routed language): a `this.<member>`
   registration whose member isn't on the enclosing class defers to a second
   pass (resolveDeferredThisMemberRefs — in-memory like deferredChainRefs,
   runs after implements/extends edges persist, same lifecycle as the #750
   conformance pass) and resolves up the supertype chain, depth-capped BFS,
   validated targets only. `bus.on("submit", this.handleSubmit)` in a
   subclass links to FormBase::handleSubmit; same-named methods on unrelated
   classes never match. this.-prefixed candidates skip the extraction name
   gate (an inherited member can't be in definedHere).

2. JAVA/KOTLIN qualified method refs: `Handlers::onMessage` /
   `OtherClass::handle` emit QUALIFIED names resolved by the scoped
   suffix-matcher — cross-file capable, gated on the scope name being a
   same-file type or an imported name (dotted JVM imports now contribute
   their last segment). `this::m` and `super::m` route through the
   class-scoped resolver (super rides the supertype pass). References
   through a VARIABLE (`subscriber::onNext`) deliberately produce nothing —
   receiver type is unknowable; RxJava's baseline bare capture was resolving
   these to same-named same-file methods (a test method "registering" an
   anonymous class's onNext) — the rework drops 18 such wrong edges and
   keeps the 7 genuine Type::method refs RxJava's main tree actually has.

3. SWIFT enclosing-type scoping (implicit self): bare callback names match
   methods only of the from-symbol's own type (extension/nested scopes
   reconciled by suffix), and top-level code never matches methods.
   Alamofire: −44 wrong edges (parameters like `request`/`data`/`retrier`
   resolving to same-named methods on unrelated protocols), all verified;
   the same-class param collision (`task`) remains and is documented.

New ResolutionContext.getNodeById lets matchers derive the from-symbol's
class scope. Controls: redis/fmt fnref edges byte-identical; excalidraw
stable; typeorm +4 genuine inherited-getter dependencies; zero calls edges
changed on any of 7 A/B repos; nodes identical everywhere. Kotlin
companion-object members extract unqualified (pre-existing) so
`Type::companionFn` stays silent rather than guessing — documented.

Full suite 1389 passed. EXTRACTION_VERSION 20 → 21 (re-index to benefit).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   1 +
 __tests__/function-ref.test.ts       | 116 +++++++++++++++++++++++++++
 docs/design/function-ref-capture.md  |  38 ++++++---
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/function-ref.ts       |  49 ++++++++---
 src/extraction/tree-sitter.ts        |  65 +++++++++++----
 src/index.ts                         |   6 ++
 src/resolution/index.ts              |  91 ++++++++++++++++++++-
 src/resolution/name-matcher.ts       |  31 ++++++-
 src/resolution/types.ts              |   7 ++
 10 files changed, 365 insertions(+), 41 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e9f4b8720..ad5c569ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 - TypeScript and JavaScript **class fields are now reported as properties instead of methods**. A plain field like `public fonts: Fonts;` previously extracted as a method, misrepresenting class shape and letting calls to same-named functions resolve to data fields (a boolean field named `isArray` was soaking up `Array.isArray(...)` call edges). Fields holding arrow functions or function expressions (`onClick = () => {…}`, including wrapped ones like `onScroll = throttle(() => {…})`) correctly remain methods and their bodies are still analyzed. Field initializers are analyzed too, so `history = createHistory()` records its call — and JavaScript class fields, which previously produced no symbol at all, now appear in the graph. Re-index a project to benefit. (#808) (TypeScript, JavaScript)
 - Callback registration through `this` now resolves precisely in TypeScript and JavaScript: `window.addEventListener("online", this.onOfflineStatusToggle)` or an API object like `{ mutateElement: this.mutateElement }` produces a reference edge to the **enclosing class's own method** — never a same-named method on an unrelated class, and never a data field. Builds on the callback-registration support below. (#808) (TypeScript, JavaScript)
+- Callback-registration coverage deepened across four more shapes: a `this.<member>` registration whose method lives on a **base class** now resolves through the inheritance chain (`bus.on("submit", this.handleSubmit)` in a subclass links to the parent's `handleSubmit`); Java and Kotlin **method references to other classes** (`Handlers::onMessage`, `OtherClass::handle`) resolve across files, with `this::` and `super::` scoped to the defining class and references through a variable deliberately left out; and Swift bare callback names now match only the **enclosing type's** methods (implicit `self`), eliminating a class of wrong edges where a parameter like `request` linked to a same-named method on an unrelated type. (Java, Kotlin, Swift, TypeScript, JavaScript)
 - CodeGraph now sees where a function is **registered as a callback**, not just where it's called. A function name passed as an argument (`signal(SIGINT, handler)`, `qsort(…, compare)`, `addEventListener(…, onBlur)`), assigned to a function pointer or field (`ops->recv_cb = my_cb`, `OnClick := Handler`), or placed in a struct initializer or handler table (`{ .recv_cb = my_cb }`, `{ "get", getCommand }`) now produces a reference edge from the registration site to the function — so `codegraph_callers` and `codegraph_impact` surface callback wiring that previously looked like dead code. Works across all supported languages, including the language-specific forms: C/C++ `&fn`, Java `Class::method`, Kotlin `::fn`, Swift `#selector`, Objective-C `@selector`, Ruby `method(:fn)`, Scala eta-expansion, and Delphi/Pascal `@Handler` and `OnClick := Handler` event wiring. Callers output labels these "via callback registration". Resolution is deliberately conservative: an ambiguous name produces no edge rather than a wrong one. Re-index a project to benefit. Thanks @zmcrazy. (#756)
 - The `codegraph_node` MCP tool can now **read a whole source file like the built-in Read tool — only faster, served from the index**. Pass a file path with no symbol and it returns that file's current source with line numbers (the same `<n>⇥<line>` shape Read produces, so an assistant can edit straight from it), narrowable with `offset`/`limit` exactly like Read, plus a one-line note of which files depend on it (the file's blast radius). Use it anywhere you'd reach for Read on an indexed source file. Pass `symbolsOnly: true` for just the file's structure. Configuration/data files (`.yml` / `.properties`) are summarized by key only, never dumped, so secrets in them are never surfaced. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns the same bytes plus the blast radius, faster than re-reading the file.
 - New `codegraph upgrade` command updates CodeGraph to the latest release in place — it detects how you installed (the standalone `install.sh` / `install.ps1` bundle, npm, or npx) and does the right thing for each, on macOS, Linux, and Windows. Use `codegraph upgrade --check` to see whether an update is available without installing, or `codegraph upgrade <version>` to move to a specific version. After upgrading it reminds you to re-index your projects so they pick up the newer engine's improvements. (#679)
diff --git a/__tests__/function-ref.test.ts b/__tests__/function-ref.test.ts
index 67dd348f2..d7f9b58c2 100644
--- a/__tests__/function-ref.test.ts
+++ b/__tests__/function-ref.test.ts
@@ -457,6 +457,122 @@ describe('Function-as-value capture (#756)', () => {
     }
   });
 
+  it('INHERITED this.X: resolves on a supertype via the second pass, never on unrelated classes', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-inherit-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'base.ts'),
+      'export class FormBase { handleSubmit(): void {} }\n'
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'unrelated.ts'),
+      'export class Unrelated { handleSubmit(): void {} }\n'
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'login.ts'),
+      [
+        "import { FormBase } from './base';",
+        'declare const bus: { on(ev: string, cb: () => void): void };',
+        'export class LoginForm extends FormBase {',
+        '  wire(): void { bus.on("submit", this.handleSubmit); }',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const handleSubmits = cg.getNodesByName('handleSubmit');
+      const baseM = handleSubmits.find((n) => n.qualifiedName.includes('FormBase'))!;
+      const unrelatedM = handleSubmits.find((n) => n.qualifiedName.includes('Unrelated'))!;
+
+      const intoBase = cg.getIncomingEdges(baseM.id).filter((e) => e.metadata?.fnRef === true);
+      expect(intoBase).toHaveLength(1);
+      expect(cg.getNode(intoBase[0]!.source)?.name).toBe('wire');
+      expect(
+        cg.getIncomingEdges(unrelatedM.id).filter((e) => e.metadata?.fnRef === true)
+      ).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('JAVA: Type::method cross-file, this::/super:: scoped, variable:: yields nothing', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-java-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'Handlers.java'),
+      [
+        'package com.example;',
+        'public class Handlers {',
+        '    public static void onMessage(int x) { System.out.println(x); }',
+        '}',
+      ].join('\n')
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'BaseForm.java'),
+      ['package com.example;', 'public class BaseForm {', '    void baseHandler(int x) {}', '}'].join('\n')
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'Main.java'),
+      [
+        'package com.example;',
+        'import com.example.Handlers;',
+        'import java.util.function.IntConsumer;',
+        'public class Main extends BaseForm {',
+        '    static void registerHandler(IntConsumer cb) { cb.accept(1); }',
+        '    void run0() {}',
+        '    void crossFile() { registerHandler(Handlers::onMessage); }',
+        '    void thisRef() { registerHandler(this::run0); }',
+        '    void superRef() { registerHandler(super::baseHandler); }',
+        '    void varRef(Main m) { registerHandler(m::run0); }',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'onMessage'))).toEqual(['crossFile']);
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'baseHandler'))).toEqual(['superRef']);
+      // this::run0 resolves class-scoped; m::run0 (variable receiver) must NOT
+      // add a second edge — exactly one source.
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'run0'))).toEqual(['thisRef']);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('SWIFT SCOPING: bare ids hit only the enclosing type’s methods; top-level bare hits functions only', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-swiftscope-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.swift'),
+      [
+        'func register(_ cb: (Int) -> Void) { cb(1) }',
+        'class Monitor {',
+        '  func report(_ x: Int) {}',
+        '  func wire() { register(report) }', // implicit self → Monitor::report
+        '}',
+        'class Other {',
+        // `report` here is a PARAMETER; Monitor::report must not win.
+        '  func use(report: (Int) -> Void) { register(report) }',
+        '}',
+        'func topLevel() { register(report) }', // no implicit self → no method target
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const edges = fnRefEdgesInto(cg, 'report');
+      expect(sourceNames(cg, edges)).toEqual(['wire']);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
   it('C UNGATED TABLES: a command table names handlers defined in OTHER files (redis pattern)', async () => {
     tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-ctable-'));
     // Handler defined in its own file…
diff --git a/docs/design/function-ref-capture.md b/docs/design/function-ref-capture.md
index 159176cc5..bb325052d 100644
--- a/docs/design/function-ref-capture.md
+++ b/docs/design/function-ref-capture.md
@@ -171,25 +171,39 @@ Index cost on redis: +6% time, +5% db size.
   without local-scope tracking — the data-flow frontier deliberately left
   uncovered. ~1-2 per 20 sampled edges on callback-heavy repos; the file-level
   dependency is real in every observed case.
-- **Swift single same-named method collisions** (`request(self, didFailTask:
-  task…)` where one `task` method exists): the overload-family rule only
-  refuses when ≥2 same-named methods share the file. Alamofire-style
-  API-mirrored param naming keeps a residual; needs same-type scoping (v2).
+- **Swift same-class param collisions** (`eventMonitor?.request(self,
+  didFailTask: task…)` where the enclosing type ALSO has a `task` method):
+  enclosing-type scoping (implicit self — methods match only the from-symbol's
+  own type, top-level bare ids never match methods) eliminated the CROSS-class
+  collision class on Alamofire (−44 wrong edges), but a parameter named after
+  a method of the SAME type is statically indistinguishable from an
+  implicit-self method value. Residual, documented.
 - **Pascal paren-less calls** (`Result := DoInitialize`): captured as
   references (Pascal can't distinguish a procedure VALUE from a paren-less
   CALL without types). The dependency direction is correct and these calls
   were previously invisible entirely (#791) — strictly more truth, imperfect
   label.
-- **Java/Kotlin cross-file method refs** (`OtherClass::method` without the
-  defining class imported as a simple name): gated away; same-file and
-  `this::m` forms work.
+- **Java/Kotlin method refs through a VARIABLE** (`subscriber::onNext`,
+  `m::run0`): receiver type unknown statically — deliberately no edge (the
+  obj.method class). RxJava's baseline bare capture was resolving these to
+  same-named same-file methods (a test method "registering" an anonymous
+  class's `onNext`); the qualified rework drops them. `Type::method` resolves
+  cross-file (scope gated on same-file types ∪ imported names, incl. the last
+  segment of dotted JVM imports); `this::m` / `super::m` ride the
+  class-scoped + supertype path.
+- **Kotlin companion-object members** extract UNQUALIFIED (node `handle`, not
+  `KtHandlers::Companion::handle` — pre-existing extraction shape), so
+  `KtHandlers::handle` refs to companion members stay silent rather than
+  guess. Fix belongs in kotlin companion extraction.
 - **Swift cross-file bare references**: Swift sees module-wide symbols without
-  imports, so cross-file bare callbacks only resolve when repo-unique.
+  imports, so cross-file bare callbacks only resolve when repo-unique
+  (functions; methods are enclosing-type-only). Cross-TYPE `#selector`
+  targets (rare — target-action is normally self) are scoped away too.
 - **PHP string callables**, **Ruby bare symbols** outside `method(:sym)`,
   **`obj.method` member values** where `obj` isn't `this`/`self`: deferred.
-- **TS/JS `this.X` to inherited members**: the class-scoped resolver matches
-  the enclosing class's OWN members only — `this.handleClick` defined on a
-  superclass yields no edge (would need the supertype walk; deliberate v1).
-  Reading a getter into a local (`const s = this.snapshot`) produces a
+- **`this.X` inherited members resolve through the supertype pass**
+  (`resolveDeferredThisMemberRefs`, depth-capped BFS over implements/extends,
+  runs after edges persist — same lifecycle as the #750 conformance pass).
+  Reading a getter into a local (`const s = this.snapshot`) still produces a
   references edge to the getter — a true dependency with an imperfect
   "registration" flavor.
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index d7a1250ff..76e3f5758 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 20;
+export const EXTRACTION_VERSION = 21;
diff --git a/src/extraction/function-ref.ts b/src/extraction/function-ref.ts
index dfb9eb58e..2970cd4e3 100644
--- a/src/extraction/function-ref.ts
+++ b/src/extraction/function-ref.ts
@@ -553,35 +553,66 @@ function normalizeSpecial(
   source: string
 ): Array<{ name: string; node: SyntaxNode }> {
   switch (type) {
-    // Java `Main::targetCb` / `this::run0` — last identifier child is the method.
+    // Java method references. Receiver decides the resolution route (#808):
+    //   `this::run0` / `super::close` → `this.<m>` (class-scoped resolver;
+    //     super rides the inherited-member supertype pass)
+    //   `Type::method` (capitalized) → qualified `Type::method` (suffix-
+    //     matched against that type's members, cross-file capable)
+    //   `variable::method` → nothing (receiver type unknown statically —
+    //     the deferred obj.method class)
     case 'method_reference': {
       let last: SyntaxNode | null = null;
       for (let i = 0; i < node.namedChildCount; i++) {
         const child = node.namedChild(i);
         if (child && child.type === 'identifier') last = child;
       }
-      return last ? [{ name: getNodeText(last, source), node: last }] : [];
+      if (!last) return [];
+      const m = getNodeText(last, source);
+      const text = getNodeText(node, source);
+      if (text.startsWith('this::') || text.startsWith('super::')) {
+        return [{ name: `this.${m}`, node: last }];
+      }
+      const recv = text.match(/^([A-Z][A-Za-z0-9_]*)\s*::/);
+      if (recv) {
+        // `Type::method` — but `Type::new` (constructor ref) has no method
+        // node to land on; let the stoplist drop it via the bare name.
+        return m === 'new' ? [] : [{ name: `${recv[1]}::${m}`, node: last }];
+      }
+      return [];
     }
 
-    // Kotlin `::targetCb` — the simple_identifier child.
+    // Kotlin `::targetCb` (one part) / `OtherClass::handle` (two parts —
+    // receiver is a type_identifier; lowercase receivers are variables, the
+    // deferred obj.method class).
     case 'callable_reference': {
+      let receiver: SyntaxNode | null = null;
+      let member: SyntaxNode | null = null;
       for (let i = 0; i < node.namedChildCount; i++) {
         const child = node.namedChild(i);
-        if (child && child.type === 'simple_identifier') {
-          return [{ name: getNodeText(child, source), node: child }];
-        }
+        if (!child) continue;
+        if (child.type === 'type_identifier') receiver = child;
+        if (child.type === 'simple_identifier') member = child;
       }
-      return [];
+      if (!member) return [];
+      const m = getNodeText(member, source);
+      if (!receiver) return [{ name: m, node: member }]; // ::topLevelFn
+      const recvText = getNodeText(receiver, source);
+      return /^[A-Z]/.test(recvText)
+        ? [{ name: `${recvText}::${m}`, node: member }]
+        : []; // variable::method — unknown receiver type
     }
 
     // Kotlin `this::fire` parses as navigation_expression with a `::fire`
-    // navigation_suffix. Ordinary `a.b` navigation MUST yield nothing.
+    // navigation_suffix — route through the class-scoped `this.` resolver.
+    // Ordinary `a.b` navigation (and any non-`this` receiver) MUST yield
+    // nothing.
     case 'navigation_expression': {
+      if (!getNodeText(node, source).startsWith('this::')) return [];
       for (let i = 0; i < node.namedChildCount; i++) {
         const child = node.namedChild(i);
         if (child && child.type === 'navigation_suffix' && getNodeText(child, source).startsWith('::')) {
           const id = child.namedChild(child.namedChildCount - 1);
-          if (id) return [{ name: getNodeText(id, source), node: id }];
+          if (id) return [{ name: `this.${getNodeText(id, source)}`, node: id }];
         }
       }
       return [];
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 612681499..a3e827f56 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -435,19 +435,34 @@ export class TreeSitterExtractor {
     if (isGeneratedFile(this.filePath)) return;
 
     const definedHere = new Set<string>();
+    const definedTypes = new Set<string>();
     for (const n of this.nodes) {
       if (n.kind === 'function' || n.kind === 'method') definedHere.add(n.name);
+      if (
+        n.kind === 'class' || n.kind === 'struct' || n.kind === 'interface' ||
+        n.kind === 'enum' || n.kind === 'trait' || n.kind === 'protocol'
+      ) {
+        definedTypes.add(n.name);
+      }
     }
 
     // Import-binding names only (all binding emitters push kind 'imports').
     // Deliberately NOT 'references': those carry type-annotation and
     // interface-member names, which let local variables that share a type
-    // member's name slip through the gate (excalidraw A/B finding).
+    // member's name slip through the gate (excalidraw A/B finding). A dotted
+    // import (JVM `import com.example.OtherClass`) also contributes its LAST
+    // segment — the simple name Java/Kotlin code uses in `OtherClass::method`
+    // references.
     const SIMPLE_NAME = /^[A-Za-z_$][A-Za-z0-9_$]*$/;
+    const DOTTED_NAME = /^[A-Za-z_$][A-Za-z0-9_$.]*\.([A-Za-z_$][A-Za-z0-9_$]*)$/;
     const importedNames = new Set<string>();
     for (const r of this.unresolvedReferences) {
-      if (r.referenceKind === 'imports' && SIMPLE_NAME.test(r.referenceName)) {
+      if (r.referenceKind !== 'imports') continue;
+      if (SIMPLE_NAME.test(r.referenceName)) {
         importedNames.add(r.referenceName);
+      } else {
+        const dotted = r.referenceName.match(DOTTED_NAME);
+        if (dotted) importedNames.add(dotted[1]!);
       }
     }
 
@@ -468,21 +483,37 @@ export class TreeSitterExtractor {
       ) {
         continue;
       }
-      // C-family file-scope initializers skip the gate (constant-expression
-      // context — a bare identifier there is a function address, never a
-      // variable; see FnRefSpec.ungatedModes). Local initializers and
-      // everything else require a same-file/import match.
-      const skipGate = ungated?.has(c.mode) === true && atFileScope;
-      // Qualified C++ member-pointers (`Widget::on_click`) and TS/JS
-      // `this.<member>` candidates gate on the member name; everything else
-      // on the full name.
-      const gateName = c.name.startsWith('this.')
-        ? c.name.slice(5)
-        : c.name.includes('::')
-          ? c.name.slice(c.name.lastIndexOf('::') + 2)
-          : c.name;
-      if (!skipGate && !definedHere.has(gateName) && !importedNames.has(gateName)) {
-        continue;
+      // Gate policy by candidate shape:
+      //  - `this.<member>`: ALWAYS flush — the member may be inherited from a
+      //    class in another file (definedHere can't see it), volume is
+      //    naturally bounded by real `this.X` expressions, and resolution is
+      //    strictly class-scoped (own members or the validated supertype
+      //    pass), so nothing fuzzy can leak.
+      //  - `Scope::member` (C++ member-pointers, Java/Kotlin type-qualified
+      //    method refs): the SCOPE name must be a type defined here or an
+      //    imported name (covers `OtherClass::method` cross-file), or the
+      //    member matches the plain gate (back-compat for C++ same-file).
+      //  - C-family file-scope initializers skip the gate entirely
+      //    (constant-expression context — see FnRefSpec.ungatedModes).
+      //  - everything else: name ∈ same-file functions/methods ∪ imports.
+      if (!c.name.startsWith('this.')) {
+        const skipGate = ungated?.has(c.mode) === true && atFileScope;
+        if (!skipGate) {
+          if (c.name.includes('::')) {
+            const scopeName = c.name.slice(0, c.name.indexOf('::'));
+            const memberName = c.name.slice(c.name.lastIndexOf('::') + 2);
+            if (
+              !definedTypes.has(scopeName) &&
+              !importedNames.has(scopeName) &&
+              !definedHere.has(memberName) &&
+              !importedNames.has(memberName)
+            ) {
+              continue;
+            }
+          } else if (!definedHere.has(c.name) && !importedNames.has(c.name)) {
+            continue;
+          }
+        }
       }
       const key = `${c.fromNodeId}|${c.name}`;
       if (seen.has(key)) continue;
diff --git a/src/index.ts b/src/index.ts
index 2e2eef88c..35855e8b1 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -382,6 +382,9 @@ export class CodeGraph {
           // interface). Needs the implements/extends edges the main pass just
           // built, so it runs after resolution (#750).
           this.resolver.resolveChainedCallsViaConformance();
+          // Same lifecycle for `this.<member>` callback registrations whose
+          // member is inherited from a supertype (#808).
+          this.resolver.resolveDeferredThisMemberRefs();
         }
 
         // Refresh planner stats + checkpoint the WAL after bulk writes.
@@ -503,6 +506,9 @@ export class CodeGraph {
           // receiver conforms to (protocol-extension / inherited). Needs the
           // implements/extends edges built above (#750).
           this.resolver.resolveChainedCallsViaConformance();
+          // Same lifecycle for `this.<member>` callback registrations whose
+          // member is inherited from a supertype (#808).
+          this.resolver.resolveDeferredThisMemberRefs();
         }
 
         // Refresh planner stats + checkpoint the WAL after bulk writes.
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index c2b75617b..1e4920208 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -207,6 +207,11 @@ export class ReferenceResolver {
   // once implements/extends edges exist, to resolve methods on a supertype the
   // receiver conforms to (#750).
   private deferredChainRefs: UnresolvedRef[] = [];
+  // `this.<member>` function-as-value refs whose member is NOT on the
+  // enclosing class itself — possibly inherited. Collected in-memory for the
+  // same reason as deferredChainRefs and drained by
+  // resolveDeferredThisMemberRefs once implements/extends edges exist (#808).
+  private deferredThisMemberRefs: UnresolvedRef[] = [];
   // Per-`.razor`/`.cshtml`-file `@using` namespace set (own directives + folder
   // `_Imports.razor`, cascading to the project root). Used to disambiguate a
   // markup type ref to the right C# namespace.
@@ -422,6 +427,10 @@ export class ReferenceResolver {
         return result;
       },
 
+      getNodeById: (id: string) => {
+        return this.queries.getNodeById(id);
+      },
+
       getSupertypes: (typeName: string, language) => {
         // Union the `implements`/`extends` targets of every same-named type node.
         // Matching by simple name (not id) reconciles a type declared in one node
@@ -1214,7 +1223,13 @@ export class ReferenceResolver {
           n.filePath === ref.filePath &&
           n.id !== ref.fromNodeId
       );
-    if (candidates.length === 0) return null;
+    if (candidates.length === 0) {
+      // Not on the class itself — possibly INHERITED. implements/extends
+      // edges don't exist yet in this pass, so retry in the supertype pass
+      // (resolveDeferredThisMemberRefs) instead of giving up.
+      this.deferredThisMemberRefs.push(ref);
+      return null;
+    }
     const target = candidates.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
     return {
       original: ref,
@@ -1224,6 +1239,80 @@ export class ReferenceResolver {
     };
   }
 
+  /**
+   * Second pass for `this.<member>` refs whose member wasn't on the enclosing
+   * class itself (#808): once implements/extends edges exist, walk the
+   * class's supertypes (transitively, depth-capped) and resolve the member on
+   * the nearest one that declares it — `this.handleSubmit` registered in a
+   * subclass resolves to `FormBase::handleSubmit`. Validated targets only
+   * (function/method kind, same language family); no match → no edge.
+   * Mirrors resolveChainedCallsViaConformance's lifecycle. Returns the number
+   * of newly-created edges.
+   */
+  resolveDeferredThisMemberRefs(): number {
+    const deferred = this.deferredThisMemberRefs;
+    this.deferredThisMemberRefs = [];
+    if (deferred.length === 0) return 0;
+
+    this.clearCaches();
+    const resolved: ResolvedRef[] = [];
+    for (const ref of deferred) {
+      const member = ref.referenceName.slice('this.'.length);
+      const fromNode = this.queries.getNodeById(ref.fromNodeId);
+      if (!fromNode || !member) continue;
+      const sep = fromNode.qualifiedName.lastIndexOf('::');
+      if (sep <= 0) continue;
+      const classPrefix = fromNode.qualifiedName.slice(0, sep);
+      const className = classPrefix.includes('::')
+        ? classPrefix.slice(classPrefix.lastIndexOf('::') + 2)
+        : classPrefix;
+
+      // BFS up the supertype graph by simple name.
+      const seen = new Set<string>([className]);
+      let frontier = this.context.getSupertypes?.(className, ref.language) ?? [];
+      let target: Node | null = null;
+      for (let depth = 0; depth < 5 && frontier.length > 0 && !target; depth++) {
+        const next: string[] = [];
+        for (const superName of frontier) {
+          if (seen.has(superName)) continue;
+          seen.add(superName);
+          const members = this.context
+            .getNodesByName(member)
+            .filter(
+              (n) =>
+                (n.kind === 'function' || n.kind === 'method') &&
+                sameLanguageFamily(n.language, ref.language) &&
+                (n.qualifiedName === `${superName}::${member}` ||
+                  n.qualifiedName.endsWith(`::${superName}::${member}`))
+            );
+          if (members.length > 0) {
+            target = members.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
+            break;
+          }
+          next.push(...(this.context.getSupertypes?.(superName, ref.language) ?? []));
+        }
+        frontier = next;
+      }
+
+      if (target) {
+        resolved.push({
+          original: ref,
+          targetNodeId: target.id,
+          confidence: 0.85,
+          resolvedBy: 'function-ref',
+        });
+      }
+    }
+    if (resolved.length === 0) return 0;
+
+    const edges = this.createEdges(resolved);
+    if (edges.length > 0) {
+      this.queries.insertEdges(edges);
+      this.clearCaches();
+    }
+    return edges.length;
+  }
+
   private gateLanguage(result: ResolvedRef | null, ref: UnresolvedRef): ResolvedRef | null {
     if (!result) return result;
     const tgt = this.getLanguageFromNodeId(result.targetNodeId);
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index 0584fc9dd..9b91aea22 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -227,7 +227,7 @@ export function matchFunctionRef(
     };
   }
 
-  const candidates = context
+  let candidates = context
     .getNodesByName(ref.referenceName)
     .filter(
       (n) =>
@@ -237,6 +237,35 @@ export function matchFunctionRef(
     );
   if (candidates.length === 0) return null;
 
+  // Swift implicit-self: a bare identifier can name a METHOD only of the
+  // ENCLOSING type (`Button(action: handleTap)` written inside that type) —
+  // a same-named method on any OTHER class is a parameter collision
+  // (Alamofire: a `request` parameter resolving to EventMonitor::request).
+  // Scope method candidates to the from-symbol's type; top-level code has no
+  // implicit self, so method targets are excluded there entirely. Free
+  // functions are unaffected.
+  if (ref.language === 'swift' && candidates.some((n) => n.kind === 'method')) {
+    const fromNode = context.getNodeById?.(ref.fromNodeId);
+    const sep = fromNode ? fromNode.qualifiedName.lastIndexOf('::') : -1;
+    const classPrefix = fromNode && sep > 0 ? fromNode.qualifiedName.slice(0, sep) : null;
+    candidates = candidates.filter((n) => {
+      if (n.kind !== 'method') return true;
+      if (!classPrefix) return false;
+      const mSep = n.qualifiedName.lastIndexOf('::');
+      if (mSep <= 0) return false;
+      const methodPrefix = n.qualifiedName.slice(0, mSep);
+      // Accept exact-scope matches plus suffix relationships either way, so
+      // extension-declared members (`Holder::m`) still match a nested
+      // from-scope (`Module::Holder::wire`) and vice versa.
+      return (
+        methodPrefix === classPrefix ||
+        methodPrefix.endsWith(`::${classPrefix}`) ||
+        classPrefix.endsWith(`::${methodPrefix}`)
+      );
+    });
+    if (candidates.length === 0) return null;
+  }
+
   // Same-file definition wins — the extraction gate guarantees most survivors
   // have one, and it's the dominant C pattern (static callback registered in
   // a same-file ops struct).
diff --git a/src/resolution/types.ts b/src/resolution/types.ts
index ca08b634f..71366a150 100644
--- a/src/resolution/types.ts
+++ b/src/resolution/types.ts
@@ -91,6 +91,13 @@ export interface ResolutionContext {
    * method). Optional so external/test contexts compile without it.
    */
   getSupertypes?(typeName: string, language: Language): string[];
+  /**
+   * Look up a node by its id. Lets matchers derive the FROM-symbol's
+   * enclosing-class scope (Swift implicit-self method scoping, `this.X`
+   * member resolution). Optional so external/test contexts compile
+   * without it.
+   */
+  getNodeById?(id: string): Node | null;
   /** Get cached import mappings for a file */
   getImportMappings(filePath: string, language: Language): ImportMapping[];
   /**

From 1f15f93febb74495a1f002621ce89ca4cdd82beb Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 15:30:29 -0500
Subject: [PATCH 38/51] feat(extraction): PHP string/array callables + Ruby
 lifecycle-hook symbols (#811)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The last two deferred callback-registration shapes from #756, each scoped
to positions where the reference is trustworthy:

PHP — a string is a callable ONLY in a known callable position:
  - string args of core HOFs (usort, array_map, array_filter,
    call_user_func*, preg_replace_callback, spl_autoload_register,
    set_error_handler, … — PHP_CALLABLE_HOFS): ungated (PHP globals are
    referenced cross-file without imports) + resolution unique-or-drop,
    function-kind only ('Cls::m' strings resolve qualified)
  - array callables anywhere in call args: [$this, 'method'] routes through
    the class-scoped this. resolver (parents included); [Foo::class,
    'method'] resolves qualified
  - strings to arbitrary functions: deliberately nothing

Ruby — hook-DSL symbols name a method of the enclosing class:
  (skip_)?(before|after|around)_* / validate / set_callback /
  helper_method / rescue_from(with:) symbols → class-scoped this.<sym>,
  riding the supertype pass so `before_action :authenticate` in a
  controller resolves to ApplicationController's method. `validates`
  (plural) excluded — its symbols name ATTRIBUTES. Class-body-level hooks
  attribute to the CLASS node (the scoped resolvers now accept class-like
  from-nodes).

Also hardened while validating: the this.X supertype pass is now
NODE-anchored — file-anchored class node → implements/extends edge targets
→ contains-anchored member lookup — replacing the name-keyed
getSupertypes walk, which unioned every same-named class's parents (rails
has a dozen `Engine`s) and produced a cross-class wrong edge.

A/B vs main: WordPress +556 (14/14 sampled genuine — [$this,'m'] wiring,
array_map('absint',…), sodium polyfill call_user_func_array dispatch);
rails/rails +385 after the node-anchored fix (16/16 sampled genuine, incl.
inherited hooks across real extends edges); controls byte-stable
(excalidraw 0-delta, redis identical, typeorm keeps its +4 inherited
getters). The only calls-edge deltas anywhere are pre-existing
minified-bundle resolution jitter (wp-tinymce.js single-letter symbols).

Full suite 1391 passed. EXTRACTION_VERSION 21 → 22 (re-index to benefit).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |   2 +
 __tests__/function-ref.test.ts       |  82 ++++++++++++
 docs/design/function-ref-capture.md  |  17 ++-
 src/extraction/extraction-version.ts |   2 +-
 src/extraction/function-ref.ts       | 178 +++++++++++++++++++++++++--
 src/extraction/tree-sitter.ts        |  13 +-
 src/resolution/index.ts              | 106 +++++++++++-----
 src/resolution/name-matcher.ts       |   9 +-
 8 files changed, 354 insertions(+), 55 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ad5c569ed..57c3a5ca8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,6 +19,8 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - TypeScript and JavaScript **class fields are now reported as properties instead of methods**. A plain field like `public fonts: Fonts;` previously extracted as a method, misrepresenting class shape and letting calls to same-named functions resolve to data fields (a boolean field named `isArray` was soaking up `Array.isArray(...)` call edges). Fields holding arrow functions or function expressions (`onClick = () => {…}`, including wrapped ones like `onScroll = throttle(() => {…})`) correctly remain methods and their bodies are still analyzed. Field initializers are analyzed too, so `history = createHistory()` records its call — and JavaScript class fields, which previously produced no symbol at all, now appear in the graph. Re-index a project to benefit. (#808) (TypeScript, JavaScript)
 - Callback registration through `this` now resolves precisely in TypeScript and JavaScript: `window.addEventListener("online", this.onOfflineStatusToggle)` or an API object like `{ mutateElement: this.mutateElement }` produces a reference edge to the **enclosing class's own method** — never a same-named method on an unrelated class, and never a data field. Builds on the callback-registration support below. (#808) (TypeScript, JavaScript)
 - Callback-registration coverage deepened across four more shapes: a `this.<member>` registration whose method lives on a **base class** now resolves through the inheritance chain (`bus.on("submit", this.handleSubmit)` in a subclass links to the parent's `handleSubmit`); Java and Kotlin **method references to other classes** (`Handlers::onMessage`, `OtherClass::handle`) resolve across files, with `this::` and `super::` scoped to the defining class and references through a variable deliberately left out; and Swift bare callback names now match only the **enclosing type's** methods (implicit `self`), eliminating a class of wrong edges where a parameter like `request` linked to a same-named method on an unrelated type. (Java, Kotlin, Swift, TypeScript, JavaScript)
+- PHP **string and array callables** now register: a string passed to a callable-taking core function (`usort($items, 'cmp_items')`, `array_map('absint', …)`, `call_user_func`, `spl_autoload_register`, …) links to that function — including across files — and the array forms `[$this, 'method']` and `[Foo::class, 'method']` link to the named method (the `$this` form resolves through the class and its parents). Strings passed to arbitrary functions are deliberately ignored: only known callable positions are trusted. Validated on WordPress core (+556 edges, every sampled edge a genuine registration). (PHP)
+- Ruby **lifecycle-hook symbols** now register: `before_action :authenticate`, `after_save :reindex`, `around_create`, `validate :check`, `rescue_from(…, with: :handler)` and friends link the symbol to the method it names — on the class itself or **inherited from a parent** (`before_action :authenticate` in a controller resolves to `ApplicationController`'s method). `validates` (plural) is excluded since its symbols name attributes, not methods. Validated on rails/rails (+385 edges, every sampled edge genuine). (Ruby)
 - CodeGraph now sees where a function is **registered as a callback**, not just where it's called. A function name passed as an argument (`signal(SIGINT, handler)`, `qsort(…, compare)`, `addEventListener(…, onBlur)`), assigned to a function pointer or field (`ops->recv_cb = my_cb`, `OnClick := Handler`), or placed in a struct initializer or handler table (`{ .recv_cb = my_cb }`, `{ "get", getCommand }`) now produces a reference edge from the registration site to the function — so `codegraph_callers` and `codegraph_impact` surface callback wiring that previously looked like dead code. Works across all supported languages, including the language-specific forms: C/C++ `&fn`, Java `Class::method`, Kotlin `::fn`, Swift `#selector`, Objective-C `@selector`, Ruby `method(:fn)`, Scala eta-expansion, and Delphi/Pascal `@Handler` and `OnClick := Handler` event wiring. Callers output labels these "via callback registration". Resolution is deliberately conservative: an ambiguous name produces no edge rather than a wrong one. Re-index a project to benefit. Thanks @zmcrazy. (#756)
 - The `codegraph_node` MCP tool can now **read a whole source file like the built-in Read tool — only faster, served from the index**. Pass a file path with no symbol and it returns that file's current source with line numbers (the same `<n>⇥<line>` shape Read produces, so an assistant can edit straight from it), narrowable with `offset`/`limit` exactly like Read, plus a one-line note of which files depend on it (the file's blast radius). Use it anywhere you'd reach for Read on an indexed source file. Pass `symbolsOnly: true` for just the file's structure. Configuration/data files (`.yml` / `.properties`) are summarized by key only, never dumped, so secrets in them are never surfaced. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns the same bytes plus the blast radius, faster than re-reading the file.
 - New `codegraph upgrade` command updates CodeGraph to the latest release in place — it detects how you installed (the standalone `install.sh` / `install.ps1` bundle, npm, or npx) and does the right thing for each, on macOS, Linux, and Windows. Use `codegraph upgrade --check` to see whether an update is available without installing, or `codegraph upgrade <version>` to move to a specific version. After upgrading it reminds you to re-index your projects so they pick up the newer engine's improvements. (#679)
diff --git a/__tests__/function-ref.test.ts b/__tests__/function-ref.test.ts
index d7f9b58c2..440bb82db 100644
--- a/__tests__/function-ref.test.ts
+++ b/__tests__/function-ref.test.ts
@@ -617,6 +617,88 @@ describe('Function-as-value capture (#756)', () => {
     }
   });
 
+  it('PHP: HOF string callables, [$this,…] and [Cls::class,…] arrays; non-HOF strings ignored', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-php-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'handlers.php'),
+      "<?php\nfunction cmp_items($a, $b) { return $a <=> $b; }\n"
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'main.php'),
+      [
+        '<?php',
+        'class Saver {',
+        '    public function onSave($x) {}',
+        '    public function wire() {',
+        "        register_shutdown_function([$this, 'onSave']);",
+        '    }',
+        '}',
+        'class Loader {',
+        '    public static function load($cls) {}',
+        '}',
+        'function sorter($items) {',
+        "    usort($items, 'cmp_items');", // known HOF, cross-file string → edge
+        "    spl_autoload_register([Loader::class, 'load']);",
+        "    some_random_fn('cmp_items');", // NOT a known HOF → no edge
+        '    return $items;',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      // Exactly ONE source for cmp_items: the usort site, not some_random_fn.
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'cmp_items'))).toEqual(['sorter']);
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'onSave'))).toEqual(['wire']);
+      expect(sourceNames(cg, fnRefEdgesInto(cg, 'load'))).toEqual(['sorter']);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
+  it('RUBY HOOKS: before_action/rescue_from symbols resolve class-scoped incl. inherited; validates is excluded', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-rubyhooks-'));
+    fs.writeFileSync(
+      path.join(tmpDir, 'posts_controller.rb'),
+      [
+        'class ApplicationController',
+        '  def authenticate; end',
+        'end',
+        '',
+        'class PostsController < ApplicationController',
+        '  before_action :authenticate', // inherited → ApplicationController
+        '  after_save :reindex',
+        '  validates :title, presence: true', // attributes, NOT methods → no edge
+        '  rescue_from StandardError, with: :render_500',
+        '',
+        '  def reindex; end',
+        '  def render_500; end',
+        '  def title; end',
+        'end',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+
+      const auth = fnRefEdgesInto(cg, 'authenticate');
+      expect(auth).toHaveLength(1);
+      expect(cg.getNode(auth[0]!.target)?.qualifiedName).toContain('ApplicationController');
+
+      expect(fnRefEdgesInto(cg, 'reindex')).toHaveLength(1);
+      expect(fnRefEdgesInto(cg, 'render_500')).toHaveLength(1);
+      // `validates :title` names an attribute — the same-named METHOD must
+      // get no registration edge.
+      expect(fnRefEdgesInto(cg, 'title')).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
   it('DRAIN: resolvable function_ref rows leave unresolved_refs; re-index is stable', async () => {
     tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-drain-'));
     fs.writeFileSync(
diff --git a/docs/design/function-ref-capture.md b/docs/design/function-ref-capture.md
index bb325052d..d08fcaf54 100644
--- a/docs/design/function-ref-capture.md
+++ b/docs/design/function-ref-capture.md
@@ -57,7 +57,8 @@ custom `visitNode` hooks like Scala's val/var handler) get a candidates-only
 | Dart | `arguments` (`argument`) | `assignment_expression.right` | `pair.value` | `list_literal`, `static_final_declaration` | — |
 | Lua / Luau | `arguments` | `assignment_statement` (`expression_list.value`) | `field.value` (keyed + positional) | (same) | — |
 | Pascal | `exprArgs` (via `visitPascalBlock`) | `assignment.rhs` (`OnFire := Handler`) | — | — | `@Handler` (`exprUnary.operand`) |
-| PHP | **skipped** | — | — | — | first-class callable `fn(...)` already extracts as a `calls` edge; string callables are a precision risk, deferred |
+| PHP | string callables ONLY as args of known core HOFs (`usort`, `array_map`, `call_user_func*`… — `PHP_CALLABLE_HOFS`), ungated + unique-or-drop (PHP globals aren't imported) | — | — | — | `[$this, 'm']` → class-scoped `this.m`; `[Foo::class, 'm']` → qualified; `'Cls::m'` → qualified; first-class callable `fn(...)` already extracts as `calls` |
+| Ruby hooks | `(skip_)?(before\|after\|around)_*` + `validate`/`set_callback`/`helper_method`/`rescue_from(with:)` symbols → class-scoped `this.<sym>` (rides the supertype pass: `before_action :authenticate` → ApplicationController). `validates` (plural) excluded — its symbols are ATTRIBUTES | — | — | — | symbols under any other call yield nothing |
 
 ## Precision rules (each one bought by a real-repo false positive)
 
@@ -199,8 +200,18 @@ Index cost on redis: +6% time, +5% db size.
   imports, so cross-file bare callbacks only resolve when repo-unique
   (functions; methods are enclosing-type-only). Cross-TYPE `#selector`
   targets (rare — target-action is normally self) are scoped away too.
-- **PHP string callables**, **Ruby bare symbols** outside `method(:sym)`,
-  **`obj.method` member values** where `obj` isn't `this`/`self`: deferred.
+- **`obj.method` member values** where `obj` isn't `this`/`self`: deferred —
+  the receiver's type is statically unknowable without local data-flow.
+- **PHP strings outside known-HOF positions** (a bare `'handler'` to an
+  arbitrary function; framework registries like WordPress `add_action`):
+  deliberately uncaptured — a string is only trustworthy as a callable in a
+  known callable position. Framework registries belong in a `frameworks/`
+  resolver if ever added. **Ruby symbols outside the hook DSLs** likewise.
+- **The supertype pass is NODE-anchored** (file-anchored class node →
+  implements/extends edge targets → `contains`-anchored member lookup): a
+  name-keyed `getSupertypes('Engine')` unioned every rails `Engine`'s parents
+  and produced a cross-class wrong edge; the node walk eliminated it
+  (rails +440 → +385, all sampled edges genuine).
 - **`this.X` inherited members resolve through the supertype pass**
   (`resolveDeferredThisMemberRefs`, depth-capped BFS over implements/extends,
   runs after edges persist — same lifecycle as the #750 conformance pass).
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 76e3f5758..4494e6c10 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 21;
+export const EXTRACTION_VERSION = 22;
diff --git a/src/extraction/function-ref.ts b/src/extraction/function-ref.ts
index 2970cd4e3..1bae970a4 100644
--- a/src/extraction/function-ref.ts
+++ b/src/extraction/function-ref.ts
@@ -25,9 +25,9 @@
  *
  * Deliberately NOT covered (resolving the *dispatch* — `o->cb(x)` → the
  * registered function — needs data-flow through struct fields; a wrong edge
- * is worse than none): indirect-call resolution, PHP string callables,
- * Ruby bare symbols outside `method(:sym)`, and `obj.method` member values
- * where `obj` isn't `this`/`self`.
+ * is worse than none): indirect-call resolution and `obj.method` member
+ * values where `obj` isn't `this`/`self` (the receiver's type is statically
+ * unknowable without local data-flow).
  */
 
 import type { Node as SyntaxNode } from 'web-tree-sitter';
@@ -45,6 +45,15 @@ export interface FnRefCandidate {
    * C++'s flush policy keys on it.
    */
   explicitRef: boolean;
+  /**
+   * Skip the same-file/import name gate for this candidate. Set for PHP
+   * string callables in known HOF positions: PHP global functions are
+   * referenced cross-file WITHOUT imports (global namespace), so the gate
+   * can't see them — the strong positional prior (a string argument to
+   * `usort`/`array_map`/…) plus resolution's unique-or-drop rule carry the
+   * precision instead.
+   */
+  skipGate?: boolean;
 }
 
 /** How to pull candidate value nodes out of a dispatched container node. */
@@ -252,16 +261,30 @@ const CSHARP_SPEC: FnRefSpec = {
 
 const RUBY_SPEC: FnRefSpec = {
   // Bare identifiers in Ruby args are method CALLS or locals, never function
-  // values — only the `method(:name)` idiom (and `&method(:name)`) qualifies.
+  // values — only the `method(:name)` idiom (and `&method(:name)`) plus
+  // hook-DSL symbols (`before_action :authenticate`) qualify.
   idTypes: new Set<string>(),
   dispatch: new Map<string, CaptureRule>([
     ['argument_list', { mode: 'args' }],
     ['pair', { mode: 'value', field: 'value' }],
   ]),
   layers: new Map<string, string | null>([['block_argument', null]]),
-  special: new Set(['call']),
+  special: new Set(['call', 'simple_symbol']),
 };
 
+/**
+ * Rails/ActiveSupport-style hook DSLs whose symbol arguments name a method of
+ * the enclosing class: lifecycle callbacks (`before_action`, `after_save`,
+ * `around_create`, `skip_before_action`…), `validate :method`, `set_callback`,
+ * `helper_method`, and `rescue_from(..., with: :handler)`. NOT `validates`
+ * (plural) — its symbols name ATTRIBUTES, not methods.
+ */
+const RUBY_HOOK_RE = /^(skip_)?(before|after|around)_[a-z_]+$/;
+const RUBY_HOOK_NAMES = new Set(['validate', 'set_callback', 'helper_method', 'rescue_from']);
+function isRubyHookCall(name: string): boolean {
+  return RUBY_HOOK_RE.test(name) || RUBY_HOOK_NAMES.has(name);
+}
+
 const SWIFT_SPEC: FnRefSpec = {
   idTypes: new Set(['simple_identifier']),
   dispatch: new Map<string, CaptureRule>([
@@ -316,9 +339,39 @@ const PASCAL_SPEC: FnRefSpec = {
 };
 
 /**
- * Capture specs by language. PHP is deliberately absent: its first-class
- * callable `fn(...)` already extracts as a `calls` edge, and string callables
- * (`'fn_name'`) are a precision risk left for a follow-up.
+ * PHP core functions whose string arguments are CALLABLES — the positional
+ * prior that makes a bare string trustworthy as a function reference.
+ * Deliberately core-PHP only; framework registries (WordPress `add_action`)
+ * belong in a frameworks/ resolver if ever added.
+ */
+const PHP_CALLABLE_HOFS = new Set([
+  'array_map', 'array_filter', 'array_walk', 'array_walk_recursive', 'array_reduce',
+  'usort', 'uasort', 'uksort',
+  'array_udiff', 'array_udiff_assoc', 'array_uintersect', 'array_uintersect_assoc',
+  'call_user_func', 'call_user_func_array',
+  'forward_static_call', 'forward_static_call_array',
+  'preg_replace_callback', 'preg_replace_callback_array',
+  'register_shutdown_function', 'register_tick_function',
+  'set_error_handler', 'set_exception_handler', 'spl_autoload_register',
+  'ob_start', 'iterator_apply', 'header_register_callback',
+  'is_callable',
+]);
+
+const PHP_SPEC: FnRefSpec = {
+  // PHP has no bare-identifier function values (the first-class callable
+  // `fn(...)` already extracts as a `calls` edge). What qualifies:
+  //  - a string argument to a known callable-taking core function
+  //    (`usort($a, 'cmp_items')`) — see PHP_CALLABLE_HOFS
+  //  - array callables: `[$this, 'method']` (class-scoped) and
+  //    `[Foo::class, 'method']` (qualified), in any call's arguments
+  idTypes: new Set<string>(),
+  dispatch: new Map<string, CaptureRule>([['arguments', { mode: 'args' }]]),
+  layers: new Map<string, string | null>([['argument', null]]),
+  special: new Set(['encapsed_string', 'string', 'array_creation_expression']),
+};
+
+/**
+ * Capture specs by language.
  */
 export const FN_REF_SPECS: Record<string, FnRefSpec | undefined> = {
   c: cFamilySpec(),
@@ -334,6 +387,7 @@ export const FN_REF_SPECS: Record<string, FnRefSpec | undefined> = {
   java: JAVA_SPEC,
   kotlin: KOTLIN_SPEC,
   csharp: CSHARP_SPEC,
+  php: PHP_SPEC,
   ruby: RUBY_SPEC,
   swift: SWIFT_SPEC,
   scala: SCALA_SPEC,
@@ -441,7 +495,7 @@ export function captureFnRefCandidates(
     // flush, where file scope is known) drops bare ids outside file-scope
     // initializer tables.
     const explicitRef = !spec.idTypes.has(v.type);
-    for (const { name, node } of normalizeValue(v, spec, source, 0)) {
+    for (const { name, node, skipGate } of normalizeValue(v, spec, source, 0)) {
       if (!name || NAME_STOPLIST.has(name)) continue;
       out.push({
         name,
@@ -449,12 +503,20 @@ export function captureFnRefCandidates(
         column: node.startPosition.column,
         mode: rule.mode,
         explicitRef,
+        skipGate,
       });
     }
   }
   return out;
 }
 
+/** One normalized function-value: its name, source node, and gate policy. */
+interface NormalizedRef {
+  name: string;
+  node: SyntaxNode;
+  skipGate?: boolean;
+}
+
 /**
  * Normalize one value expression to zero or more function names. Recursion is
  * bounded (wrapper layers only); anything that isn't a recognized
@@ -465,7 +527,7 @@ function normalizeValue(
   spec: FnRefSpec,
   source: string,
   depth: number
-): Array<{ name: string; node: SyntaxNode }> {
+): NormalizedRef[] {
   if (depth > 4) return [];
   const type = node.type;
 
@@ -497,7 +559,7 @@ function normalizeValue(
       const inner = getChildByField(node, layerField);
       return inner ? normalizeValue(inner, spec, source, depth + 1) : [];
     }
-    const results: Array<{ name: string; node: SyntaxNode }> = [];
+    const results: NormalizedRef[] = [];
     for (let i = 0; i < node.namedChildCount; i++) {
       const child = node.namedChild(i);
       if (child) results.push(...normalizeValue(child, spec, source, depth + 1));
@@ -551,7 +613,7 @@ function normalizeSpecial(
   node: SyntaxNode,
   type: string,
   source: string
-): Array<{ name: string; node: SyntaxNode }> {
+): NormalizedRef[] {
   switch (type) {
     // Java method references. Receiver decides the resolution route (#808):
     //   `this::run0` / `super::close` → `this.<m>` (class-scoped resolver;
@@ -683,7 +745,99 @@ function normalizeSpecial(
       return isThisReceiver ? [{ name: getNodeText(name, source), node: name }] : [];
     }
 
+    // PHP string callable — trustworthy ONLY as an argument to a known
+    // callable-taking core function (`usort($a, 'cmp_items')`). PHP global
+    // functions are referenced cross-file without imports, so these skip the
+    // name gate and rely on resolution's unique-or-drop rule. A
+    // `'Cls::method'` string becomes a qualified candidate.
+    case 'encapsed_string':
+    case 'string': {
+      const callee = phpEnclosingCallName(node);
+      if (!callee || !PHP_CALLABLE_HOFS.has(callee)) return [];
+      const content = phpStringContent(node, source);
+      if (!content) return [];
+      if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(content)) {
+        return [{ name: content, node, skipGate: true }];
+      }
+      if (/^[A-Za-z_][A-Za-z0-9_]*::[A-Za-z_][A-Za-z0-9_]*$/.test(content)) {
+        return [{ name: content, node, skipGate: true }];
+      }
+      return [];
+    }
+
+    // PHP array callables, valid in ANY call's arguments (the shape itself is
+    // unambiguous): `[$this, 'method']` → class-scoped `this.method`;
+    // `[Foo::class, 'method']` → qualified `Foo::method`.
+    case 'array_creation_expression': {
+      if (node.namedChildCount !== 2) return [];
+      const recv = node.namedChild(0)?.namedChild(0);
+      const strEl = node.namedChild(1)?.namedChild(0);
+      if (!recv || !strEl) return [];
+      if (strEl.type !== 'encapsed_string' && strEl.type !== 'string') return [];
+      const member = phpStringContent(strEl, source);
+      if (!member || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(member)) return [];
+      if (recv.type === 'variable_name' && getNodeText(recv, source) === '$this') {
+        return [{ name: `this.${member}`, node: strEl }];
+      }
+      if (recv.type === 'class_constant_access_expression') {
+        const cls = recv.namedChild(0);
+        const kw = recv.namedChild(1);
+        if (cls && kw && getNodeText(kw, source) === 'class') {
+          return [{ name: `${getNodeText(cls, source)}::${member}`, node: strEl }];
+        }
+      }
+      return [];
+    }
+
+    // Ruby hook-DSL symbols (`before_action :authenticate`,
+    // `rescue_from E, with: :render_404`): the symbol names a method of the
+    // ENCLOSING class — route through the class-scoped `this.` resolver
+    // (which also walks superclasses, covering ApplicationController-style
+    // inheritance). Symbols under any other call yield nothing.
+    case 'simple_symbol': {
+      const call = rubyEnclosingCall(node);
+      if (!call) return [];
+      const method = getChildByField(call, 'method');
+      if (!method || !isRubyHookCall(getNodeText(method, source))) return [];
+      const sym = getNodeText(node, source).replace(/^:/, '');
+      if (!/^[A-Za-z_][A-Za-z0-9_?!]*$/.test(sym)) return [];
+      return [{ name: `this.${sym}`, node }];
+    }
+
     default:
       return [];
   }
 }
+
+/** Content of a PHP string literal node (single- or double-quoted). */
+function phpStringContent(node: SyntaxNode, source: string): string | null {
+  for (let i = 0; i < node.namedChildCount; i++) {
+    const child = node.namedChild(i);
+    if (child?.type === 'string_content') return getNodeText(child, source).trim();
+  }
+  return null;
+}
+
+/** The function name of the PHP call whose arguments contain `node`, if any. */
+function phpEnclosingCallName(node: SyntaxNode): string | null {
+  let cur: SyntaxNode | null = node.parent;
+  for (let hops = 0; cur && hops < 4; hops++, cur = cur.parent) {
+    if (cur.type === 'function_call_expression') {
+      const fn = getChildByField(cur, 'function');
+      return fn ? fn.text : null;
+    }
+    if (cur.type === 'member_call_expression' || cur.type === 'scoped_call_expression') {
+      return null; // method calls aren't core HOFs
+    }
+  }
+  return null;
+}
+
+/** The Ruby `call` node whose argument_list (or keyword pair) contains `node`. */
+function rubyEnclosingCall(node: SyntaxNode): SyntaxNode | null {
+  let cur: SyntaxNode | null = node.parent;
+  for (let hops = 0; cur && hops < 4; hops++, cur = cur.parent) {
+    if (cur.type === 'call') return cur;
+  }
+  return null;
+}
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index a3e827f56..f22ad37a2 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -454,15 +454,18 @@ export class TreeSitterExtractor {
     // segment — the simple name Java/Kotlin code uses in `OtherClass::method`
     // references.
     const SIMPLE_NAME = /^[A-Za-z_$][A-Za-z0-9_$]*$/;
-    const DOTTED_NAME = /^[A-Za-z_$][A-Za-z0-9_$.]*\.([A-Za-z_$][A-Za-z0-9_$]*)$/;
+    // JVM imports are dotted (`com.example.OtherClass`); PHP `use` imports
+    // are backslashed (`App\Services\Mailer`). Both contribute their last
+    // segment — the simple name code uses to reference them.
+    const QUALIFIED_IMPORT = /^[A-Za-z_$][A-Za-z0-9_$.\\]*[.\\]([A-Za-z_$][A-Za-z0-9_$]*)$/;
     const importedNames = new Set<string>();
     for (const r of this.unresolvedReferences) {
       if (r.referenceKind !== 'imports') continue;
       if (SIMPLE_NAME.test(r.referenceName)) {
         importedNames.add(r.referenceName);
       } else {
-        const dotted = r.referenceName.match(DOTTED_NAME);
-        if (dotted) importedNames.add(dotted[1]!);
+        const qualified = r.referenceName.match(QUALIFIED_IMPORT);
+        if (qualified) importedNames.add(qualified[1]!);
       }
     }
 
@@ -497,7 +500,9 @@ export class TreeSitterExtractor {
       //    (constant-expression context — see FnRefSpec.ungatedModes).
       //  - everything else: name ∈ same-file functions/methods ∪ imports.
       if (!c.name.startsWith('this.')) {
-        const skipGate = ungated?.has(c.mode) === true && atFileScope;
+        const skipGate =
+          (ungated?.has(c.mode) === true && atFileScope) ||
+          c.skipGate === true; // PHP HOF-position string callables (see FnRefCandidate.skipGate)
         if (!skipGate) {
           if (c.name.includes('::')) {
             const scopeName = c.name.slice(0, c.name.indexOf('::'));
diff --git a/src/resolution/index.ts b/src/resolution/index.ts
index 1e4920208..0d7ec4309 100644
--- a/src/resolution/index.ts
+++ b/src/resolution/index.ts
@@ -1212,9 +1212,17 @@ export class ReferenceResolver {
     if (!member) return null;
     const fromNode = this.queries.getNodeById(ref.fromNodeId);
     if (!fromNode) return null;
-    const sep = fromNode.qualifiedName.lastIndexOf('::');
-    if (sep <= 0) return null; // not inside a class scope
-    const classPrefix = fromNode.qualifiedName.slice(0, sep);
+    // A hook declared at class-body level (Ruby `before_action :authenticate`)
+    // attributes to the CLASS node itself — its qualified name IS the scope.
+    // For members, strip the member segment.
+    let classPrefix: string;
+    if (SUPERTYPE_BEARING_KINDS.has(fromNode.kind) || fromNode.kind === 'module') {
+      classPrefix = fromNode.qualifiedName;
+    } else {
+      const sep = fromNode.qualifiedName.lastIndexOf('::');
+      if (sep <= 0) return null; // not inside a class scope
+      classPrefix = fromNode.qualifiedName.slice(0, sep);
+    }
     const candidates = this.context
       .getNodesByQualifiedName(`${classPrefix}::${member}`)
       .filter(
@@ -1260,38 +1268,72 @@ export class ReferenceResolver {
       const member = ref.referenceName.slice('this.'.length);
       const fromNode = this.queries.getNodeById(ref.fromNodeId);
       if (!fromNode || !member) continue;
-      const sep = fromNode.qualifiedName.lastIndexOf('::');
-      if (sep <= 0) continue;
-      const classPrefix = fromNode.qualifiedName.slice(0, sep);
-      const className = classPrefix.includes('::')
-        ? classPrefix.slice(classPrefix.lastIndexOf('::') + 2)
-        : classPrefix;
-
-      // BFS up the supertype graph by simple name.
-      const seen = new Set<string>([className]);
-      let frontier = this.context.getSupertypes?.(className, ref.language) ?? [];
+      // Class-body-level hooks (Ruby) attribute to the CLASS node itself.
+      let className: string;
+      if (SUPERTYPE_BEARING_KINDS.has(fromNode.kind) || fromNode.kind === 'module') {
+        className = fromNode.name;
+      } else {
+        const sep = fromNode.qualifiedName.lastIndexOf('::');
+        if (sep <= 0) continue;
+        const classPrefix = fromNode.qualifiedName.slice(0, sep);
+        className = classPrefix.includes('::')
+          ? classPrefix.slice(classPrefix.lastIndexOf('::') + 2)
+          : classPrefix;
+      }
+
+      // NODE-anchored BFS up the supertype graph: start from the class node
+      // in the ref's own file (never a same-named class elsewhere — rails has
+      // a dozen `Engine`s), follow implements/extends EDGES to supertype
+      // NODES, and look members up through `contains` edges. No name-based
+      // unions anywhere — a name-keyed getSupertypes('Engine') merged every
+      // Engine's parents and produced a cross-class wrong edge on rails.
+      let frontierNodes = this.context
+        .getNodesByName(className)
+        .filter(
+          (n) =>
+            SUPERTYPE_BEARING_KINDS.has(n.kind) &&
+            n.filePath === ref.filePath
+        );
+      if (frontierNodes.length === 0) {
+        // The class itself may be declared in another file (partial/reopened
+        // classes); fall back to same-family nodes of that name.
+        frontierNodes = this.context
+          .getNodesByName(className)
+          .filter(
+            (n) =>
+              SUPERTYPE_BEARING_KINDS.has(n.kind) &&
+              sameLanguageFamily(n.language, ref.language)
+          );
+      }
+      const seenNodes = new Set<string>(frontierNodes.map((n) => n.id));
       let target: Node | null = null;
-      for (let depth = 0; depth < 5 && frontier.length > 0 && !target; depth++) {
-        const next: string[] = [];
-        for (const superName of frontier) {
-          if (seen.has(superName)) continue;
-          seen.add(superName);
-          const members = this.context
-            .getNodesByName(member)
-            .filter(
-              (n) =>
-                (n.kind === 'function' || n.kind === 'method') &&
-                sameLanguageFamily(n.language, ref.language) &&
-                (n.qualifiedName === `${superName}::${member}` ||
-                  n.qualifiedName.endsWith(`::${superName}::${member}`))
-            );
-          if (members.length > 0) {
-            target = members.reduce((a, b) => (a.startLine <= b.startLine ? a : b));
-            break;
+      for (let depth = 0; depth < 5 && frontierNodes.length > 0 && !target; depth++) {
+        const next: Node[] = [];
+        for (const typeNode of frontierNodes) {
+          for (const edge of this.queries.getOutgoingEdges(typeNode.id, ['implements', 'extends'])) {
+            const superNode = this.queries.getNodeById(edge.target);
+            if (!superNode || seenNodes.has(superNode.id)) continue;
+            seenNodes.add(superNode.id);
+            if (!SUPERTYPE_BEARING_KINDS.has(superNode.kind)) continue;
+            // Member lookup anchored on the supertype's contains edges.
+            for (const c of this.queries.getOutgoingEdges(superNode.id, ['contains'])) {
+              const m = this.queries.getNodeById(c.target);
+              if (
+                m &&
+                m.name === member &&
+                (m.kind === 'function' || m.kind === 'method') &&
+                sameLanguageFamily(m.language, ref.language)
+              ) {
+                target = m;
+                break;
+              }
+            }
+            if (target) break;
+            next.push(superNode);
           }
-          next.push(...(this.context.getSupertypes?.(superName, ref.language) ?? []));
+          if (target) break;
         }
-        frontier = next;
+        frontierNodes = next;
       }
 
       if (target) {
diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts
index 9b91aea22..9990d690d 100644
--- a/src/resolution/name-matcher.ts
+++ b/src/resolution/name-matcher.ts
@@ -192,12 +192,15 @@ export function matchFunctionRef(
   // A/B finding; same pattern in vendored docopt.py). Python's `self.m`
   // form keeps method targets via its own capture shape. C++ likewise: a
   // bare identifier can only be a FREE function (member values need
-  // `&Cls::method`). Other languages keep method targets: C# method groups,
-  // Swift/Dart implicit-self, Java/Kotlin method references.
+  // `&Cls::method`). PHP string callables name global FUNCTIONS (methods
+  // need the `[$obj, 'm']` array form, which carries its own shape). Other
+  // languages keep method targets: C# method groups, Swift/Dart
+  // implicit-self, Java/Kotlin method references.
   const bareFnOnly =
     ref.language === 'typescript' || ref.language === 'tsx' ||
     ref.language === 'javascript' || ref.language === 'jsx' ||
-    ref.language === 'cpp' || ref.language === 'python';
+    ref.language === 'cpp' || ref.language === 'python' ||
+    ref.language === 'php';
 
   // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"):
   // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m`

From dce61a5f4ae8e2cc58ce82918f002f8962788335 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 15:44:14 -0500
Subject: [PATCH 39/51] =?UTF-8?q?fix(extraction):=20qualified=20Type::memb?=
 =?UTF-8?q?er=20refs=20skip=20the=20name=20gate=20=E2=80=94=20no-import=20?=
 =?UTF-8?q?references=20resolve=20(#812)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`KtHandlers::handle` registered from another file produced no edge: the
extraction gate required the scope to be a same-file type or an IMPORTED
name, but Java/Kotlin same-package references and Kotlin companion members
need no import at all, so the gate could never see them. (The "companion
members extract unqualified" limit recorded during Arc A was a probe
artifact: a SINGLE-LINE `class X { companion object { … } }` is an
upstream tree-sitter-kotlin misparse (ERROR node); real multi-line
companions extract transparently as qualified methods of the class.)

Qualified `Type::member` candidates now skip the name gate the same way
`this.<member>` ones do: the explicit-ref syntax is self-selecting, and
resolution stays scope-suffix-anchored + unique-or-drop, so a
`Decoy::handle` can never match a `KtHandlers::handle` ref (tested).

A/B vs main: rxjava +4 (same-package `Maybe::just` / `Single::just`
method refs), fmt +3 (gtest `&Test::DeleteSelf_` /
`&TestSuite::RunSetUpTestSuite` cross-file member pointers), okio 0-delta,
redis byte-identical — every new edge verified genuine, zero calls edges
touched, node counts identical.

Full suite 1392 passed. EXTRACTION_VERSION 22 → 23 (re-index to benefit).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/function-ref.test.ts       | 45 ++++++++++++++++++++++++++++
 docs/design/function-ref-capture.md  | 14 ++++++---
 src/extraction/extraction-version.ts |  2 +-
 src/extraction/tree-sitter.ts        | 34 +++++----------------
 5 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 57c3a5ca8..67d387808 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 - Callback-registration coverage deepened across four more shapes: a `this.<member>` registration whose method lives on a **base class** now resolves through the inheritance chain (`bus.on("submit", this.handleSubmit)` in a subclass links to the parent's `handleSubmit`); Java and Kotlin **method references to other classes** (`Handlers::onMessage`, `OtherClass::handle`) resolve across files, with `this::` and `super::` scoped to the defining class and references through a variable deliberately left out; and Swift bare callback names now match only the **enclosing type's** methods (implicit `self`), eliminating a class of wrong edges where a parameter like `request` linked to a same-named method on an unrelated type. (Java, Kotlin, Swift, TypeScript, JavaScript)
 - PHP **string and array callables** now register: a string passed to a callable-taking core function (`usort($items, 'cmp_items')`, `array_map('absint', …)`, `call_user_func`, `spl_autoload_register`, …) links to that function — including across files — and the array forms `[$this, 'method']` and `[Foo::class, 'method']` link to the named method (the `$this` form resolves through the class and its parents). Strings passed to arbitrary functions are deliberately ignored: only known callable positions are trusted. Validated on WordPress core (+556 edges, every sampled edge a genuine registration). (PHP)
 - Ruby **lifecycle-hook symbols** now register: `before_action :authenticate`, `after_save :reindex`, `around_create`, `validate :check`, `rescue_from(…, with: :handler)` and friends link the symbol to the method it names — on the class itself or **inherited from a parent** (`before_action :authenticate` in a controller resolves to `ApplicationController`'s method). `validates` (plural) is excluded since its symbols name attributes, not methods. Validated on rails/rails (+385 edges, every sampled edge genuine). (Ruby)
+- Method references to a type that needed **no import** now resolve: Java/Kotlin same-package references (`.concatMapMaybe(Maybe::just, …)`), **Kotlin companion-object members** (`KtHandlers::handle`), and cross-file C++ member pointers (`&TestSuite::RunSetUpTestSuite`). Resolution stays anchored to the named type, so a same-named member on a different class never matches. (Java, Kotlin, C++)
 - CodeGraph now sees where a function is **registered as a callback**, not just where it's called. A function name passed as an argument (`signal(SIGINT, handler)`, `qsort(…, compare)`, `addEventListener(…, onBlur)`), assigned to a function pointer or field (`ops->recv_cb = my_cb`, `OnClick := Handler`), or placed in a struct initializer or handler table (`{ .recv_cb = my_cb }`, `{ "get", getCommand }`) now produces a reference edge from the registration site to the function — so `codegraph_callers` and `codegraph_impact` surface callback wiring that previously looked like dead code. Works across all supported languages, including the language-specific forms: C/C++ `&fn`, Java `Class::method`, Kotlin `::fn`, Swift `#selector`, Objective-C `@selector`, Ruby `method(:fn)`, Scala eta-expansion, and Delphi/Pascal `@Handler` and `OnClick := Handler` event wiring. Callers output labels these "via callback registration". Resolution is deliberately conservative: an ambiguous name produces no edge rather than a wrong one. Re-index a project to benefit. Thanks @zmcrazy. (#756)
 - The `codegraph_node` MCP tool can now **read a whole source file like the built-in Read tool — only faster, served from the index**. Pass a file path with no symbol and it returns that file's current source with line numbers (the same `<n>⇥<line>` shape Read produces, so an assistant can edit straight from it), narrowable with `offset`/`limit` exactly like Read, plus a one-line note of which files depend on it (the file's blast radius). Use it anywhere you'd reach for Read on an indexed source file. Pass `symbolsOnly: true` for just the file's structure. Configuration/data files (`.yml` / `.properties`) are summarized by key only, never dumped, so secrets in them are never surfaced. The agent-facing guidance was also retuned so assistants reach for codegraph while *implementing* a change (not only when answering questions), since one codegraph call returns the same bytes plus the blast radius, faster than re-reading the file.
 - New `codegraph upgrade` command updates CodeGraph to the latest release in place — it detects how you installed (the standalone `install.sh` / `install.ps1` bundle, npm, or npx) and does the right thing for each, on macOS, Linux, and Windows. Use `codegraph upgrade --check` to see whether an update is available without installing, or `codegraph upgrade <version>` to move to a specific version. After upgrading it reminds you to re-index your projects so they pick up the newer engine's improvements. (#679)
diff --git a/__tests__/function-ref.test.ts b/__tests__/function-ref.test.ts
index 440bb82db..993b68614 100644
--- a/__tests__/function-ref.test.ts
+++ b/__tests__/function-ref.test.ts
@@ -544,6 +544,51 @@ describe('Function-as-value capture (#756)', () => {
     }
   });
 
+  it('KOTLIN: companion-object refs resolve cross-file without imports; decoy companion untouched', async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-ktcomp-'));
+    // Same package, no imports — the Java/Kotlin reality the name gate can't
+    // see, which is why qualified `Type::member` candidates skip it.
+    fs.writeFileSync(
+      path.join(tmpDir, 'Handlers.kt'),
+      [
+        'class KtHandlers {',
+        '  companion object {',
+        '    fun handle(x: Int) {}',
+        '  }',
+        '}',
+        'class Decoy {',
+        '  companion object {',
+        '    fun handle(x: Int) {}',
+        '  }',
+        '}',
+      ].join('\n')
+    );
+    fs.writeFileSync(
+      path.join(tmpDir, 'Wirer.kt'),
+      [
+        'fun register(cb: Any) {}',
+        'class Wirer {',
+        '  fun wire() { register(KtHandlers::handle) }',
+        '}',
+      ].join('\n')
+    );
+
+    const cg = CodeGraph.initSync(tmpDir);
+    try {
+      await cg.indexAll();
+      const handles = cg.getNodesByName('handle');
+      const target = handles.find((n) => n.qualifiedName.includes('KtHandlers'))!;
+      const decoy = handles.find((n) => n.qualifiedName.includes('Decoy'))!;
+      const into = cg.getIncomingEdges(target.id).filter((e) => e.metadata?.fnRef === true);
+      expect(into).toHaveLength(1);
+      expect(cg.getNode(into[0]!.source)?.name).toBe('wire');
+      expect(cg.getIncomingEdges(decoy.id).filter((e) => e.metadata?.fnRef === true)).toHaveLength(0);
+    } finally {
+      cg.destroy();
+      tmpDir = undefined;
+    }
+  });
+
   it('SWIFT SCOPING: bare ids hit only the enclosing type’s methods; top-level bare hits functions only', async () => {
     tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fnref-swiftscope-'));
     fs.writeFileSync(
diff --git a/docs/design/function-ref-capture.md b/docs/design/function-ref-capture.md
index d08fcaf54..7c8ef733f 100644
--- a/docs/design/function-ref-capture.md
+++ b/docs/design/function-ref-capture.md
@@ -192,10 +192,16 @@ Index cost on redis: +6% time, +5% db size.
   cross-file (scope gated on same-file types ∪ imported names, incl. the last
   segment of dotted JVM imports); `this::m` / `super::m` ride the
   class-scoped + supertype path.
-- **Kotlin companion-object members** extract UNQUALIFIED (node `handle`, not
-  `KtHandlers::Companion::handle` — pre-existing extraction shape), so
-  `KtHandlers::handle` refs to companion members stay silent rather than
-  guess. Fix belongs in kotlin companion extraction.
+- **Qualified `Type::member` candidates skip the name gate** (like `this.X`):
+  Java/Kotlin same-package references and Kotlin companions need NO import,
+  so the gate could never see their scope — and the explicit-ref syntax is
+  self-selecting while resolution stays scope-suffix-anchored +
+  unique-or-drop (a `Decoy::handle` can't match a `KtHandlers::handle` ref).
+  This is also what resolves companion-member refs: companions extract
+  TRANSPARENTLY (`KtHandlers::handle`, method of the class) in real
+  multi-line code. (A single-line `class X { companion object { … } }` is an
+  upstream tree-sitter-kotlin misparse — ERROR node — and only ever appeared
+  in our own probe fixture; don't chase it.)
 - **Swift cross-file bare references**: Swift sees module-wide symbols without
   imports, so cross-file bare callbacks only resolve when repo-unique
   (functions; methods are enclosing-type-only). Cross-TYPE `#selector`
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 4494e6c10..7ce49929b 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 22;
+export const EXTRACTION_VERSION = 23;
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index f22ad37a2..7b6ed02ce 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -435,15 +435,8 @@ export class TreeSitterExtractor {
     if (isGeneratedFile(this.filePath)) return;
 
     const definedHere = new Set<string>();
-    const definedTypes = new Set<string>();
     for (const n of this.nodes) {
       if (n.kind === 'function' || n.kind === 'method') definedHere.add(n.name);
-      if (
-        n.kind === 'class' || n.kind === 'struct' || n.kind === 'interface' ||
-        n.kind === 'enum' || n.kind === 'trait' || n.kind === 'protocol'
-      ) {
-        definedTypes.add(n.name);
-      }
     }
 
     // Import-binding names only (all binding emitters push kind 'imports').
@@ -493,31 +486,20 @@ export class TreeSitterExtractor {
       //    strictly class-scoped (own members or the validated supertype
       //    pass), so nothing fuzzy can leak.
       //  - `Scope::member` (C++ member-pointers, Java/Kotlin type-qualified
-      //    method refs): the SCOPE name must be a type defined here or an
-      //    imported name (covers `OtherClass::method` cross-file), or the
-      //    member matches the plain gate (back-compat for C++ same-file).
+      //    method refs, PHP `'Cls::m'`): ALWAYS flush — the explicit-ref
+      //    syntax is self-selecting, the referenced type often needs NO
+      //    import (Java/Kotlin same-package, Kotlin companions), and
+      //    resolution is scope-suffix-anchored + unique-or-drop, so a
+      //    same-named member on another class can't match.
       //  - C-family file-scope initializers skip the gate entirely
       //    (constant-expression context — see FnRefSpec.ungatedModes).
       //  - everything else: name ∈ same-file functions/methods ∪ imports.
-      if (!c.name.startsWith('this.')) {
+      if (!c.name.startsWith('this.') && !c.name.includes('::')) {
         const skipGate =
           (ungated?.has(c.mode) === true && atFileScope) ||
           c.skipGate === true; // PHP HOF-position string callables (see FnRefCandidate.skipGate)
-        if (!skipGate) {
-          if (c.name.includes('::')) {
-            const scopeName = c.name.slice(0, c.name.indexOf('::'));
-            const memberName = c.name.slice(c.name.lastIndexOf('::') + 2);
-            if (
-              !definedTypes.has(scopeName) &&
-              !importedNames.has(scopeName) &&
-              !definedHere.has(memberName) &&
-              !importedNames.has(memberName)
-            ) {
-              continue;
-            }
-          } else if (!definedHere.has(c.name) && !importedNames.has(c.name)) {
-            continue;
-          }
+        if (!skipGate && !definedHere.has(c.name) && !importedNames.has(c.name)) {
+          continue;
         }
       }
       const key = `${c.fromNodeId}|${c.name}`;

From 222af6b87ceb4b89669b67dbb580540ef6ccf714 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 16:24:22 -0500
Subject: [PATCH 40/51] fix(mcp+resolution): stop conflating same-named symbols
 across monorepo apps (#764) (#813)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A NestJS-style monorepo has one UserService/UserModule/UserRepository per
app; with no package concept for TS they share one global name scope and
agents visibly warned that CodeGraph was mixing unrelated classes.

Two distinct problems, two fixes:

1. TOOL AGGREGATION. callers/callees returned one merged list across every
   same-named match, and impact merged all their blast radii into a single
   overstated subgraph. Now: matches group into DISTINCT DEFINITIONS
   (filePath + qualifiedName — same-file overloads still merge, that's the
   overload feature) and render one file-labeled section per definition;
   a new `file` argument (path or suffix, like codegraph_node's) narrows
   to one definition, suppressing the stale aggregation note; a
   non-matching `file` falls back to all definitions with a note.
   server-instructions documents the behavior.

2. RESOLUTION WRONG EDGES. Auditing a real monorepo (amplication, 54k
   nodes) found 1,036 cross-package `references` edges into duplicated
   names. Root cause: the React framework resolver ran PascalCase
   component resolution on refs from PLAIN .ts FILES (a GraphQL types
   file's own `Account` type alias lost to an arbitrary same-named CLASS
   in another package — the resolver's blind `components[0]` fallback at
   confidence 0.8 outranked the name-matcher's proximity-correct 0.7).
   Component resolution is now gated to JSX-capable refs (tsx/jsx) and
   never guesses among multiple candidates without a positional signal
   (same-dir / component-dir / unique). Cross-package wrong edges:
   1,036 -> 40 (-96%; the remainder are genuine shared-model imports and
   codegen template scaffolds), with the freed refs re-resolving to the
   correct same-file/same-package targets. excalidraw (a real React repo)
   is a zero-delta control — legitimate component refs all carry
   same-dir/component-dir signals.

Graph-level separation was verified correct on a fixture before any
changes (import + proximity resolution keeps apps apart) — the conflation
was tool-level plus the react-resolver edge class.

Tests: 6-test e2e suite (grouped callers/callees, per-definition impact
radii, file narrowing, fallback note, cross-app edge isolation) + react
resolver unit tests updated to production reality (tsx refs resolve,
plain-ts refs decline). Full suite 1398 passed. EXTRACTION_VERSION
23 -> 24 (re-index to drop the wrong cross-package edges).

Closes #764

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                               |   2 +
 __tests__/resolution.test.ts               |  13 +-
 __tests__/same-name-disambiguation.test.ts | 138 ++++++++++++
 src/extraction/extraction-version.ts       |   2 +-
 src/mcp/server-instructions.ts             |   2 +-
 src/mcp/tools.ts                           | 248 ++++++++++++++++-----
 src/resolution/frameworks/react.ts         |  20 +-
 7 files changed, 365 insertions(+), 60 deletions(-)
 create mode 100644 __tests__/same-name-disambiguation.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67d387808..9db49ec0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,8 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- Same-named symbols across a monorepo's apps are no longer conflated. In a NestJS-style workspace with one `UserService` per app, `codegraph_callers`, `codegraph_callees`, and `codegraph_impact` now report **one section per distinct definition** — each app's callers and blast radius under its own file-labeled heading — instead of a single merged list, and accept a `file` argument to focus exactly the definition you mean (like `codegraph_node` already did). Impact in particular no longer overstates a change's blast radius by merging unrelated same-named classes. Thanks @Igorgro. (#764)
+- Fixed a related source of cross-package wrong edges: PascalCase **type references from plain `.ts` files were being resolved as React components**, which could link a file's own type alias to an arbitrary same-named class in another package (on one large monorepo this produced over a thousand wrong cross-package reference edges; 96% are now gone, and the remainder are genuine shared-model imports). Component resolution now applies only to references from JSX-capable files and never guesses between multiple candidates without a positional signal. Re-index a project to benefit. (#764) (TypeScript, React)
 - TypeScript and JavaScript **class fields are now reported as properties instead of methods**. A plain field like `public fonts: Fonts;` previously extracted as a method, misrepresenting class shape and letting calls to same-named functions resolve to data fields (a boolean field named `isArray` was soaking up `Array.isArray(...)` call edges). Fields holding arrow functions or function expressions (`onClick = () => {…}`, including wrapped ones like `onScroll = throttle(() => {…})`) correctly remain methods and their bodies are still analyzed. Field initializers are analyzed too, so `history = createHistory()` records its call — and JavaScript class fields, which previously produced no symbol at all, now appear in the graph. Re-index a project to benefit. (#808) (TypeScript, JavaScript)
 - Callback registration through `this` now resolves precisely in TypeScript and JavaScript: `window.addEventListener("online", this.onOfflineStatusToggle)` or an API object like `{ mutateElement: this.mutateElement }` produces a reference edge to the **enclosing class's own method** — never a same-named method on an unrelated class, and never a data field. Builds on the callback-registration support below. (#808) (TypeScript, JavaScript)
 - Callback-registration coverage deepened across four more shapes: a `this.<member>` registration whose method lives on a **base class** now resolves through the inheritance chain (`bus.on("submit", this.handleSubmit)` in a subclass links to the parent's `handleSubmit`); Java and Kotlin **method references to other classes** (`Handlers::onMessage`, `OtherClass::handle`) resolve across files, with `this::` and `super::` scoped to the defining class and references through a variable deliberately left out; and Swift bare callback names now match only the **enclosing type's** methods (implicit `self`), eliminating a class of wrong edges where a parameter like `request` linked to a same-named method on an unrelated type. (Java, Kotlin, Swift, TypeScript, JavaScript)
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 12131f3cc..47c6b9220 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -581,12 +581,23 @@ from ..services import auth_service
         line: 10,
         column: 5,
         filePath: 'src/App.tsx',
-        language: 'typescript' as const,
+        // Refs extracted from .tsx files carry language 'tsx' — component
+        // resolution is gated to JSX-capable refs (#764: PascalCase TYPE refs
+        // from plain .ts files were resolving to arbitrary same-named classes).
+        language: 'tsx' as const,
       };
 
       const result = reactResolver!.resolve(ref, context);
       expect(result).not.toBeNull();
       expect(result?.targetNodeId).toBe('component:src/Button.tsx:Button:5');
+
+      // The same PascalCase name referenced from a plain .ts file is a TYPE
+      // reference, not a component usage — component resolution must decline
+      // and leave it to proximity-aware name matching (#764: a .ts GraphQL
+      // types file's own `Account` alias was losing to an arbitrary same-named
+      // class in another monorepo package).
+      const tsRef = { ...ref, filePath: 'src/models.ts', language: 'typescript' as const };
+      expect(reactResolver!.resolve(tsRef, context)).toBeNull();
     });
 
     it('should resolve custom hook references', () => {
diff --git a/__tests__/same-name-disambiguation.test.ts b/__tests__/same-name-disambiguation.test.ts
new file mode 100644
index 000000000..5c1ae4f1c
--- /dev/null
+++ b/__tests__/same-name-disambiguation.test.ts
@@ -0,0 +1,138 @@
+/**
+ * Same-named symbols across monorepo apps (#764).
+ *
+ * A NestJS-style monorepo has one `UserService` (and friends) per app. The
+ * graph keeps them as distinct nodes (import + proximity resolution), but the
+ * MCP tools used to AGGREGATE them: callers/callees returned one merged list
+ * and impact merged both blast radii — the conflation agents warned about.
+ *
+ * Now: multiple DISTINCT definitions (different file/qualified-name) render
+ * one section per definition, and `file` narrows to a single definition.
+ * Same-file overloads still merge (that's the overload feature).
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { CodeGraph } from '../src';
+import { ToolHandler } from '../src/mcp/tools';
+import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';
+
+let tmpDir: string;
+let cg: CodeGraph;
+let handler: ToolHandler;
+
+const text = async (tool: string, args: Record<string, unknown>): Promise<string> => {
+  const res = await handler.execute(tool, args);
+  return res.content?.[0]?.text ?? '';
+};
+
+beforeAll(async () => {
+  await initGrammars();
+  await loadAllGrammars();
+
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-764-'));
+  const mk = (rel: string, content: string) => {
+    const p = path.join(tmpDir, rel);
+    fs.mkdirSync(path.dirname(p), { recursive: true });
+    fs.writeFileSync(p, content);
+  };
+
+  for (const app of ['billing', 'admin']) {
+    mk(
+      `apps/${app}/src/users/user.service.ts`,
+      [
+        "import { UserRepository } from './user.repository';",
+        'export class UserService {',
+        '  constructor(private readonly repo: UserRepository) {}',
+        '  findAll(): string[] {',
+        `    return this.repo.load_${app}();`,
+        '  }',
+        '}',
+      ].join('\n')
+    );
+    mk(
+      `apps/${app}/src/users/user.repository.ts`,
+      `export class UserRepository {\n  load_${app}(): string[] { return []; }\n}\n`
+    );
+    mk(
+      `apps/${app}/src/users/user.controller.ts`,
+      [
+        "import { UserService } from './user.service';",
+        'export class UserController {',
+        '  constructor(private readonly users: UserService) {}',
+        '  list(): string[] { return this.users.findAll(); }',
+        '}',
+      ].join('\n')
+    );
+  }
+
+  cg = CodeGraph.initSync(tmpDir);
+  await cg.indexAll();
+  handler = new ToolHandler(cg);
+}, 120_000);
+
+afterAll(() => {
+  cg?.destroy();
+  if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+});
+
+describe('same-named symbols across apps (#764)', () => {
+  it('graph keeps the apps apart: no cross-app edges at all', () => {
+    const billing = new Set(
+      cg.getNodesByName('findAll').filter((n) => n.filePath.includes('billing')).map((n) => n.id)
+    );
+    for (const id of billing) {
+      for (const e of cg.getIncomingEdges(id)) {
+        const src = cg.getNode(e.source);
+        expect(src?.filePath.includes('admin')).toBe(false);
+      }
+    }
+  });
+
+  it('callers: one section per distinct definition, each with only its own callers', async () => {
+    const out = await text('codegraph_callers', { symbol: 'findAll' });
+    expect(out).toContain('2 distinct definitions');
+    // Section per definition…
+    expect(out).toContain('apps/admin/src/users/user.service.ts');
+    expect(out).toContain('apps/billing/src/users/user.service.ts');
+    // …and the billing section must list the billing controller, not admin's.
+    const billingSection = out.slice(out.indexOf('apps/billing/src/users/user.service.ts'));
+    const billingBody = billingSection.slice(0, billingSection.indexOf('###', 3) > 0 ? billingSection.indexOf('###', 3) : undefined);
+    expect(billingBody).toContain('apps/billing/src/users/user.controller.ts');
+    expect(billingBody).not.toContain('apps/admin/src/users/user.controller.ts');
+  });
+
+  it('callers: `file` narrows to one definition (flat list, no stale aggregation note)', async () => {
+    const out = await text('codegraph_callers', {
+      symbol: 'findAll',
+      file: 'apps/billing/src/users/user.service.ts',
+    });
+    expect(out).not.toContain('distinct definitions');
+    expect(out).toContain('apps/billing/src/users/user.controller.ts');
+    expect(out).not.toContain('apps/admin/');
+    expect(out).not.toContain('Aggregated results');
+  });
+
+  it('callers: a non-matching `file` falls back to all definitions with a note', async () => {
+    const out = await text('codegraph_callers', { symbol: 'findAll', file: 'apps/nonexistent/x.ts' });
+    expect(out).toContain('no definition of "findAll" matches file');
+    expect(out).toContain('2 distinct definitions');
+  });
+
+  it('impact: separate blast radius per definition, never a merged one', async () => {
+    const out = await text('codegraph_impact', { symbol: 'UserService' });
+    expect(out).toContain('2 distinct definitions');
+    // Each section's count covers ONE app (service + ctor + findAll +
+    // controller side), not the union of both.
+    const counts = [...out.matchAll(/affects (\d+) symbols/g)].map((m) => Number(m[1]));
+    expect(counts).toHaveLength(2);
+    for (const c of counts) expect(c).toBeLessThanOrEqual(7);
+  });
+
+  it('callees: grouped the same way', async () => {
+    const out = await text('codegraph_callees', { symbol: 'list' });
+    expect(out).toContain('2 distinct definitions');
+  });
+});
diff --git a/src/extraction/extraction-version.ts b/src/extraction/extraction-version.ts
index 7ce49929b..618a1b1c3 100644
--- a/src/extraction/extraction-version.ts
+++ b/src/extraction/extraction-version.ts
@@ -21,4 +21,4 @@
  * turns the re-index hint into noise — keep it honest (see CLAUDE.md, "Honesty
  * in the product is load-bearing").
  */
-export const EXTRACTION_VERSION = 23;
+export const EXTRACTION_VERSION = 24;
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
index cfa730eaf..3e40d2c8d 100644
--- a/src/mcp/server-instructions.ts
+++ b/src/mcp/server-instructions.ts
@@ -47,7 +47,7 @@ typically one to a few calls; a grep/read exploration is dozens.
 - **Almost any question — "how does X work", architecture, a bug, "what/where is X", or surveying an area** → \`codegraph_explore\` (PRIMARY — call FIRST; ONE capped call returns the verbatim source of the relevant symbols grouped by file; most often the ONLY call you need)
 - **"How does X reach/become Y? / the flow / the path from X to Y"** → \`codegraph_explore\`, naming the symbols that span the flow (e.g. \`mutateElement renderScene\`) — it surfaces the call path among them, including dynamic-dispatch hops (callbacks, React re-render, JSX children) grep can't follow
 - **"What is the symbol named X?" (just its location)** → \`codegraph_search\`
-- **"What calls this?" / "What does this call?" / "What would changing this break?"** → \`codegraph_callers\` / \`codegraph_callees\` / \`codegraph_impact\`. Callers includes where a function is **registered as a callback** (passed as an argument, assigned to a function pointer/field, listed in a handler table) — labeled "via callback registration" — so a function with no direct calls is NOT dead if it's wired up somewhere
+- **"What calls this?" / "What does this call?" / "What would changing this break?"** → \`codegraph_callers\` / \`codegraph_callees\` / \`codegraph_impact\`. Callers includes where a function is **registered as a callback** (passed as an argument, assigned to a function pointer/field, listed in a handler table) — labeled "via callback registration" — so a function with no direct calls is NOT dead if it's wired up somewhere. When several UNRELATED symbols share a name (one \`UserService\` per monorepo app), these tools report **one section per definition** (never a merged list) — pass \`file\` to focus the definition you mean
 - **Reading a source FILE (any time you'd use the \`Read\` tool)** → \`codegraph_node\` with a \`file\` path and no \`symbol\`. It returns the file's **current source with line numbers — the same \`<n>\\t<line>\` shape \`Read\` gives you, safe to \`Edit\` from** — narrowable with \`offset\`/\`limit\` exactly like \`Read\`, PLUS a one-line note of which files depend on it. Same bytes as \`Read\`, faster (served from the index), with the blast radius attached. Use it **instead of \`Read\`** for indexed source files; fall back to \`Read\` only for what codegraph doesn't index (configs, docs). Pass \`symbolsOnly: true\` for just the file's structure.
 - **About to read or edit a symbol you can name** → \`codegraph_node\` with that \`symbol\` (SECONDARY — the after-explore depth tool): the verbatim source (\`includeCode: true\`) PLUS its caller/callee trail, so before changing it you see what calls it and what your edit would break. For an OVERLOADED name it returns EVERY matching definition's body in one call, so you never Read a file to find the right overload
 - **"What's in directory X?"** → \`codegraph_files\`
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 7351ca55d..0d5a9a26c 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -411,6 +411,10 @@ export const tools: ToolDefinition[] = [
           type: 'string',
           description: 'Name of the function, method, or class to find callers for',
         },
+        file: {
+          type: 'string',
+          description: 'Narrow to the definition in this file (path or suffix) when several same-named symbols exist (e.g. one UserService per app in a monorepo)',
+        },
         limit: {
           type: 'number',
           description: 'Maximum number of callers to return (default: 20)',
@@ -431,6 +435,10 @@ export const tools: ToolDefinition[] = [
           type: 'string',
           description: 'Name of the function, method, or class to find callees for',
         },
+        file: {
+          type: 'string',
+          description: 'Narrow to the definition in this file (path or suffix) when several same-named symbols exist',
+        },
         limit: {
           type: 'number',
           description: 'Maximum number of callees to return (default: 20)',
@@ -451,6 +459,10 @@ export const tools: ToolDefinition[] = [
           type: 'string',
           description: 'Name of the symbol to analyze impact for',
         },
+        file: {
+          type: 'string',
+          description: 'Narrow to the definition in this file (path or suffix) when several same-named symbols exist',
+        },
         depth: {
           type: 'number',
           description: 'How many levels of dependencies to traverse (default: 2)',
@@ -1095,6 +1107,47 @@ export class ToolHandler {
     return this.textResult(this.truncateOutput(formatted));
   }
 
+  /**
+   * Group symbol matches into DISTINCT DEFINITIONS — one group per
+   * (filePath, qualifiedName), so same-file overloads stay together while
+   * unrelated same-named classes across a monorepo's apps (#764: one
+   * `UserService` per NestJS app) are kept apart. Optionally narrowed by a
+   * `file` path/suffix first.
+   */
+  private groupDefinitions(
+    nodes: Node[],
+    fileFilter: string | undefined
+  ): { groups: Node[][]; filteredOut: boolean } {
+    let pool = nodes;
+    let filteredOut = false;
+    if (fileFilter) {
+      const wanted = fileFilter.replace(/^\.\//, '');
+      const narrowed = pool.filter(
+        (n) => n.filePath === wanted || n.filePath.endsWith(wanted) || n.filePath.endsWith(`/${wanted}`)
+      );
+      if (narrowed.length > 0) {
+        pool = narrowed;
+      } else {
+        filteredOut = true;
+      }
+    }
+    const byDef = new Map<string, Node[]>();
+    for (const n of pool) {
+      const key = `${n.filePath}|${n.qualifiedName}`;
+      const group = byDef.get(key);
+      if (group) group.push(n);
+      else byDef.set(key, [n]);
+    }
+    return { groups: [...byDef.values()], filteredOut };
+  }
+
+  /** Section heading for one distinct definition in grouped output. */
+  private definitionHeading(group: Node[]): string {
+    const head = group[0]!;
+    const line = head.startLine ? `:${head.startLine}` : '';
+    return `### ${head.qualifiedName} (${head.kind}) — ${head.filePath}${line}`;
+  }
+
   /**
    * Handle codegraph_callers
    */
@@ -1104,33 +1157,68 @@ export class ToolHandler {
 
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const limit = clamp((args.limit as number) || 20, 1, 100);
+    const fileFilter = typeof args.file === 'string' ? args.file : undefined;
 
     const allMatches = this.findAllSymbols(cg, symbol);
     if (allMatches.nodes.length === 0) {
       return this.textResult(`Symbol "${symbol}" not found in the codebase`);
     }
 
-    // Aggregate callers across all matching symbols
-    const seen = new Set<string>();
-    const allCallers: Node[] = [];
-    const labels = new Map<string, string>();
-    for (const node of allMatches.nodes) {
-      for (const c of cg.getCallers(node.id)) {
-        if (!seen.has(c.node.id)) {
-          seen.add(c.node.id);
-          allCallers.push(c.node);
-          const label = this.edgeLabel(c.edge);
-          if (label) labels.set(c.node.id, label);
+    const { groups, filteredOut } = this.groupDefinitions(allMatches.nodes, fileFilter);
+    const filterNote = filteredOut
+      ? `\n\n> **Note:** no definition of "${symbol}" matches file "${fileFilter}" — showing all definitions instead.`
+      : '';
+
+    const collect = (defNodes: Node[]) => {
+      const seen = new Set<string>();
+      const callers: Node[] = [];
+      const labels = new Map<string, string>();
+      for (const node of defNodes) {
+        for (const c of cg.getCallers(node.id)) {
+          if (!seen.has(c.node.id)) {
+            seen.add(c.node.id);
+            callers.push(c.node);
+            const label = this.edgeLabel(c.edge);
+            if (label) labels.set(c.node.id, label);
+          }
         }
       }
-    }
+      return { callers, labels };
+    };
 
-    if (allCallers.length === 0) {
-      return this.textResult(`No callers found for "${symbol}"${allMatches.note}`);
+    // Single definition (or same-file overloads): the familiar flat list.
+    if (groups.length === 1) {
+      const { callers, labels } = collect(groups[0]!);
+      if (callers.length === 0) {
+        return this.textResult(`No callers found for "${symbol}"${allMatches.note}${filterNote}`);
+      }
+      // A successful `file` narrowing makes the multi-symbol aggregation note
+      // stale — suppress it.
+      const note = fileFilter && !filteredOut ? '' : allMatches.note;
+      const formatted = this.formatNodeList(callers.slice(0, limit), `Callers of ${symbol}`, labels) + note + filterNote;
+      return this.textResult(this.truncateOutput(formatted));
     }
 
-    const formatted = this.formatNodeList(allCallers.slice(0, limit), `Callers of ${symbol}`, labels) + allMatches.note;
-    return this.textResult(this.truncateOutput(formatted));
+    // Multiple DISTINCT definitions (#764): one section per definition so an
+    // agent never mistakes one app's callers for another's. Narrow with
+    // `file` to focus a single definition.
+    const lines: string[] = [
+      `## Callers of ${symbol} — ${groups.length} distinct definitions (narrow with \`file\`)`,
+    ];
+    for (const group of groups) {
+      const { callers, labels } = collect(group);
+      lines.push('', this.definitionHeading(group));
+      if (callers.length === 0) {
+        lines.push('- (no callers)');
+        continue;
+      }
+      for (const node of callers.slice(0, limit)) {
+        const location = node.startLine ? `:${node.startLine}` : '';
+        const label = labels.get(node.id);
+        lines.push(`- ${node.name} (${node.kind}) - ${node.filePath}${location}${label ? ` — via ${label}` : ''}`);
+      }
+    }
+    return this.textResult(this.truncateOutput(lines.join('\n') + filterNote));
   }
 
   /**
@@ -1142,33 +1230,65 @@ export class ToolHandler {
 
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const limit = clamp((args.limit as number) || 20, 1, 100);
+    const fileFilter = typeof args.file === 'string' ? args.file : undefined;
 
     const allMatches = this.findAllSymbols(cg, symbol);
     if (allMatches.nodes.length === 0) {
       return this.textResult(`Symbol "${symbol}" not found in the codebase`);
     }
 
-    // Aggregate callees across all matching symbols
-    const seen = new Set<string>();
-    const allCallees: Node[] = [];
-    const labels = new Map<string, string>();
-    for (const node of allMatches.nodes) {
-      for (const c of cg.getCallees(node.id)) {
-        if (!seen.has(c.node.id)) {
-          seen.add(c.node.id);
-          allCallees.push(c.node);
-          const label = this.edgeLabel(c.edge);
-          if (label) labels.set(c.node.id, label);
+    const { groups, filteredOut } = this.groupDefinitions(allMatches.nodes, fileFilter);
+    const filterNote = filteredOut
+      ? `\n\n> **Note:** no definition of "${symbol}" matches file "${fileFilter}" — showing all definitions instead.`
+      : '';
+
+    const collect = (defNodes: Node[]) => {
+      const seen = new Set<string>();
+      const callees: Node[] = [];
+      const labels = new Map<string, string>();
+      for (const node of defNodes) {
+        for (const c of cg.getCallees(node.id)) {
+          if (!seen.has(c.node.id)) {
+            seen.add(c.node.id);
+            callees.push(c.node);
+            const label = this.edgeLabel(c.edge);
+            if (label) labels.set(c.node.id, label);
+          }
         }
       }
-    }
+      return { callees, labels };
+    };
 
-    if (allCallees.length === 0) {
-      return this.textResult(`No callees found for "${symbol}"${allMatches.note}`);
+    if (groups.length === 1) {
+      const { callees, labels } = collect(groups[0]!);
+      if (callees.length === 0) {
+        return this.textResult(`No callees found for "${symbol}"${allMatches.note}${filterNote}`);
+      }
+      // A successful `file` narrowing makes the multi-symbol aggregation note
+      // stale — suppress it.
+      const note = fileFilter && !filteredOut ? '' : allMatches.note;
+      const formatted = this.formatNodeList(callees.slice(0, limit), `Callees of ${symbol}`, labels) + note + filterNote;
+      return this.textResult(this.truncateOutput(formatted));
     }
 
-    const formatted = this.formatNodeList(allCallees.slice(0, limit), `Callees of ${symbol}`, labels) + allMatches.note;
-    return this.textResult(this.truncateOutput(formatted));
+    // Multiple DISTINCT definitions (#764): per-definition sections.
+    const lines: string[] = [
+      `## Callees of ${symbol} — ${groups.length} distinct definitions (narrow with \`file\`)`,
+    ];
+    for (const group of groups) {
+      const { callees, labels } = collect(group);
+      lines.push('', this.definitionHeading(group));
+      if (callees.length === 0) {
+        lines.push('- (no callees)');
+        continue;
+      }
+      for (const node of callees.slice(0, limit)) {
+        const location = node.startLine ? `:${node.startLine}` : '';
+        const label = labels.get(node.id);
+        lines.push(`- ${node.name} (${node.kind}) - ${node.filePath}${location}${label ? ` — via ${label}` : ''}`);
+      }
+    }
+    return this.textResult(this.truncateOutput(lines.join('\n') + filterNote));
   }
 
   /**
@@ -1180,39 +1300,59 @@ export class ToolHandler {
 
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
     const depth = clamp((args.depth as number) || 2, 1, 10);
+    const fileFilter = typeof args.file === 'string' ? args.file : undefined;
 
     const allMatches = this.findAllSymbols(cg, symbol);
     if (allMatches.nodes.length === 0) {
       return this.textResult(`Symbol "${symbol}" not found in the codebase`);
     }
 
-    // Aggregate impact across all matching symbols
-    const mergedNodes = new Map<string, Node>();
-    const mergedEdges: Edge[] = [];
-    const seenEdges = new Set<string>();
+    const { groups, filteredOut } = this.groupDefinitions(allMatches.nodes, fileFilter);
+    const filterNote = filteredOut
+      ? `\n\n> **Note:** no definition of "${symbol}" matches file "${fileFilter}" — showing all definitions instead.`
+      : '';
 
-    for (const node of allMatches.nodes) {
-      const impact = cg.getImpactRadius(node.id, depth);
-      for (const [id, n] of impact.nodes) {
-        mergedNodes.set(id, n);
-      }
-      for (const e of impact.edges) {
-        const key = `${e.source}->${e.target}:${e.kind}`;
-        if (!seenEdges.has(key)) {
-          seenEdges.add(key);
-          mergedEdges.push(e);
+    const impactOf = (defNodes: Node[]) => {
+      const mergedNodes = new Map<string, Node>();
+      const mergedEdges: Edge[] = [];
+      const seenEdges = new Set<string>();
+      for (const node of defNodes) {
+        const impact = cg.getImpactRadius(node.id, depth);
+        for (const [id, n] of impact.nodes) {
+          mergedNodes.set(id, n);
+        }
+        for (const e of impact.edges) {
+          const key = `${e.source}->${e.target}:${e.kind}`;
+          if (!seenEdges.has(key)) {
+            seenEdges.add(key);
+            mergedEdges.push(e);
+          }
         }
       }
-    }
-
-    const mergedImpact = {
-      nodes: mergedNodes,
-      edges: mergedEdges,
-      roots: allMatches.nodes.map(n => n.id),
+      return { nodes: mergedNodes, edges: mergedEdges, roots: defNodes.map((n) => n.id) };
     };
 
-    const formatted = this.formatImpact(symbol, mergedImpact) + allMatches.note;
-    return this.textResult(this.truncateOutput(formatted));
+    // Single definition (or same-file overloads): the familiar merged report.
+    if (groups.length === 1) {
+      const formatted = this.formatImpact(symbol, impactOf(groups[0]!)) + (fileFilter && !filteredOut ? "" : allMatches.note) + filterNote;
+      return this.textResult(this.truncateOutput(formatted));
+    }
+
+    // Multiple DISTINCT definitions (#764): a blast radius PER definition —
+    // merging unrelated same-named classes (one UserService per monorepo app)
+    // overstated impact and confused agents. Narrow with `file`.
+    const sections: string[] = [
+      `## Impact of ${symbol} — ${groups.length} distinct definitions (each with its own blast radius; narrow with \`file\`)`,
+    ];
+    for (const group of groups) {
+      const head = group[0]!;
+      const line = head.startLine ? `:${head.startLine}` : '';
+      sections.push(
+        '',
+        this.formatImpact(`${head.qualifiedName} (${head.filePath}${line})`, impactOf(group))
+      );
+    }
+    return this.textResult(this.truncateOutput(sections.join('\n') + filterNote));
   }
 
   /**
diff --git a/src/resolution/frameworks/react.ts b/src/resolution/frameworks/react.ts
index d60aef40f..05b0d8288 100644
--- a/src/resolution/frameworks/react.ts
+++ b/src/resolution/frameworks/react.ts
@@ -32,8 +32,19 @@ export const reactResolver: FrameworkResolver = {
   },
 
   resolve(ref: UnresolvedRef, context: ResolutionContext): ResolvedRef | null {
-    // Pattern 1: Component references (PascalCase)
-    if (isPascalCase(ref.referenceName) && !isBuiltInType(ref.referenceName)) {
+    // Pattern 1: Component references (PascalCase). Only from JSX-capable
+    // files — a component is USED in markup, which only parses in .tsx/.jsx.
+    // Without this gate, every PascalCase TYPE reference in plain .ts files
+    // went through component resolution: in a monorepo with same-named
+    // classes per package (#764, amplication), a `.ts` GraphQL-types file's
+    // own `Account` type alias lost to an arbitrary `Account` CLASS in
+    // another package (the framework's 0.8 outranked the name-matcher's
+    // proximity-correct 0.7).
+    if (
+      (ref.language === 'tsx' || ref.language === 'jsx') &&
+      isPascalCase(ref.referenceName) &&
+      !isBuiltInType(ref.referenceName)
+    ) {
       const result = resolveComponent(ref.referenceName, ref.filePath, context);
       if (result) {
         return {
@@ -305,7 +316,10 @@ function resolveComponent(
   );
   if (preferred.length > 0) return preferred[0]!.id;
 
-  return components[0]!.id;
+  // No positional signal: only an UNAMBIGUOUS name may resolve. Returning
+  // components[0] here picked an arbitrary same-named class anywhere in the
+  // repo (#764) — let the name-matcher's proximity scoring decide instead.
+  return components.length === 1 ? components[0]!.id : null;
 }
 
 /**

From 763ee9c82567645cbd6eeaff8dce73e48bcfa5d7 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 16:31:18 -0500
Subject: [PATCH 41/51] fix(resolution): Svelte/Vue component resolvers get the
 #764 ambiguity rule (#814)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #813: the React resolver's blind components[0] fallback was
the demonstrated wrong-edge source, but the Svelte and Vue resolvers had
the same flaw in their own shape:

- svelte: resolveComponent fell back to components[0] across the whole
  repo when no same-directory match existed — an arbitrary pick among
  same-named components in a multi-app monorepo.
- vue: resolveComponent returned the FIRST basename-matching .vue file
  found anywhere in the tree; its same-directory pass below was
  unreachable dead code. apps/a/Button.vue vs apps/b/Button.vue was a
  file-enumeration-order coin flip.

Both now follow the #764 rule: same-directory first, otherwise only an
UNAMBIGUOUS name resolves — ambiguity falls through to the name-matcher's
proximity scoring instead of guessing.

Safety: zero-delta A/B on the README's own framework benchmark repos
(sveltejs/realworld — the 100% Svelte coverage repo — and nuxt/movies,
93.5% Vue coverage) plus the excalidraw control: node counts identical,
zero calls or references edges changed. Single-app repos have unique
component names, so the rule only bites where the old behavior was
already a coin flip. Full suite 1398 passed.

Also verified the #813 per-definition tool grouping is language-agnostic
(probed Go same-named functions across packages — grouped identically to
the TS fixture).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                        |  2 +-
 src/resolution/frameworks/svelte.ts |  6 +++-
 src/resolution/frameworks/vue.ts    | 49 +++++++++++++----------------
 3 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9db49ec0d..74e63e738 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,7 +17,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ### New Features
 
 - Same-named symbols across a monorepo's apps are no longer conflated. In a NestJS-style workspace with one `UserService` per app, `codegraph_callers`, `codegraph_callees`, and `codegraph_impact` now report **one section per distinct definition** — each app's callers and blast radius under its own file-labeled heading — instead of a single merged list, and accept a `file` argument to focus exactly the definition you mean (like `codegraph_node` already did). Impact in particular no longer overstates a change's blast radius by merging unrelated same-named classes. Thanks @Igorgro. (#764)
-- Fixed a related source of cross-package wrong edges: PascalCase **type references from plain `.ts` files were being resolved as React components**, which could link a file's own type alias to an arbitrary same-named class in another package (on one large monorepo this produced over a thousand wrong cross-package reference edges; 96% are now gone, and the remainder are genuine shared-model imports). Component resolution now applies only to references from JSX-capable files and never guesses between multiple candidates without a positional signal. Re-index a project to benefit. (#764) (TypeScript, React)
+- Fixed a related source of cross-package wrong edges: PascalCase **type references from plain `.ts` files were being resolved as React components**, which could link a file's own type alias to an arbitrary same-named class in another package (on one large monorepo this produced over a thousand wrong cross-package reference edges; 96% are now gone, and the remainder are genuine shared-model imports). Component resolution now applies only to references from JSX-capable files and never guesses between multiple candidates without a positional signal. The **Svelte and Vue component resolvers had the same arbitrary-pick flaw** (Vue resolved the first same-named `.vue` file found anywhere in the tree) and now follow the same rule: same-directory first, otherwise only an unambiguous name resolves. Re-index a project to benefit. (#764) (TypeScript, React, Svelte, Vue)
 - TypeScript and JavaScript **class fields are now reported as properties instead of methods**. A plain field like `public fonts: Fonts;` previously extracted as a method, misrepresenting class shape and letting calls to same-named functions resolve to data fields (a boolean field named `isArray` was soaking up `Array.isArray(...)` call edges). Fields holding arrow functions or function expressions (`onClick = () => {…}`, including wrapped ones like `onScroll = throttle(() => {…})`) correctly remain methods and their bodies are still analyzed. Field initializers are analyzed too, so `history = createHistory()` records its call — and JavaScript class fields, which previously produced no symbol at all, now appear in the graph. Re-index a project to benefit. (#808) (TypeScript, JavaScript)
 - Callback registration through `this` now resolves precisely in TypeScript and JavaScript: `window.addEventListener("online", this.onOfflineStatusToggle)` or an API object like `{ mutateElement: this.mutateElement }` produces a reference edge to the **enclosing class's own method** — never a same-named method on an unrelated class, and never a data field. Builds on the callback-registration support below. (#808) (TypeScript, JavaScript)
 - Callback-registration coverage deepened across four more shapes: a `this.<member>` registration whose method lives on a **base class** now resolves through the inheritance chain (`bus.on("submit", this.handleSubmit)` in a subclass links to the parent's `handleSubmit`); Java and Kotlin **method references to other classes** (`Handlers::onMessage`, `OtherClass::handle`) resolve across files, with `this::` and `super::` scoped to the defining class and references through a variable deliberately left out; and Swift bare callback names now match only the **enclosing type's** methods (implicit `self`), eliminating a class of wrong edges where a parameter like `request` linked to a same-named method on an unrelated type. (Java, Kotlin, Swift, TypeScript, JavaScript)
diff --git a/src/resolution/frameworks/svelte.ts b/src/resolution/frameworks/svelte.ts
index 8848c8576..684852957 100644
--- a/src/resolution/frameworks/svelte.ts
+++ b/src/resolution/frameworks/svelte.ts
@@ -220,7 +220,11 @@ function resolveComponent(
   const sameDir = components.filter((n) => n.filePath.startsWith(fromDir));
   if (sameDir.length > 0) return sameDir[0]!.id;
 
-  return components[0]!.id;
+  // No positional signal: only an UNAMBIGUOUS name may resolve — picking
+  // components[0] chose an arbitrary same-named component in a multi-app
+  // monorepo (#764). Ambiguity falls through to the name-matcher, whose
+  // proximity scoring decides.
+  return components.length === 1 ? components[0]!.id : null;
 }
 
 /**
diff --git a/src/resolution/frameworks/vue.ts b/src/resolution/frameworks/vue.ts
index 14b7760bd..c830885be 100644
--- a/src/resolution/frameworks/vue.ts
+++ b/src/resolution/frameworks/vue.ts
@@ -279,39 +279,32 @@ function resolveComponent(
   fromFile: string,
   context: ResolutionContext
 ): string | null {
-  const allFiles = context.getAllFiles();
-  const vueFiles = allFiles.filter((f) => f.endsWith('.vue'));
-
-  // Check for exact name match (Button -> Button.vue)
-  for (const file of vueFiles) {
+  // Collect ALL basename matches first. The previous version returned the
+  // FIRST `Button.vue` found anywhere in the tree (its same-directory pass
+  // below was unreachable), so a multi-app monorepo with one `Button.vue`
+  // per app resolved to an arbitrary one (#764).
+  const matches: string[] = [];
+  for (const file of context.getAllFiles()) {
+    if (!file.endsWith('.vue')) continue;
     const fileName = file.split(/[/\\]/).pop() || '';
-    const componentName = fileName.replace(/\.vue$/, '');
-    if (componentName === name) {
-      const nodes = context.getNodesInFile(file);
-      const component = nodes.find((n) => n.kind === 'component' && n.name === name);
-      if (component) {
-        return component.id;
-      }
-    }
+    if (fileName.replace(/\.vue$/, '') === name) matches.push(file);
   }
+  if (matches.length === 0) return null;
+
+  const componentIn = (file: string): string | null => {
+    const nodes = context.getNodesInFile(file);
+    const component = nodes.find((n) => n.kind === 'component' && n.name === name);
+    return component ? component.id : null;
+  };
 
-  // Check same directory first for better specificity
+  // Same directory first for specificity
   const fromDir = fromFile.substring(0, fromFile.lastIndexOf('/'));
-  for (const file of vueFiles) {
-    if (file.startsWith(fromDir)) {
-      const fileName = file.split(/[/\\]/).pop() || '';
-      const componentName = fileName.replace(/\.vue$/, '');
-      if (componentName === name) {
-        const nodes = context.getNodesInFile(file);
-        const component = nodes.find((n) => n.kind === 'component');
-        if (component) {
-          return component.id;
-        }
-      }
-    }
-  }
+  const sameDir = matches.filter((f) => f.startsWith(fromDir));
+  if (sameDir.length > 0) return componentIn(sameDir[0]!);
 
-  return null;
+  // No positional signal: only an UNAMBIGUOUS basename may resolve;
+  // ambiguity falls through to the name-matcher's proximity scoring.
+  return matches.length === 1 ? componentIn(matches[0]!) : null;
 }
 
 /**

From 823ffd1c3d909180360c2c88d21fb13fabb26fb4 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 18:50:11 -0500
Subject: [PATCH 42/51] =?UTF-8?q?feat(extraction+resolution):=20Astro=20su?=
 =?UTF-8?q?pport=20=E2=80=94=20frontmatter/template=20extraction=20+=20src?=
 =?UTF-8?q?/pages=20routes=20(#768)=20(#815)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.astro files were not indexed at all, leaving a typical Astro site mostly
invisible to search/impact/explore. New AstroExtractor (Svelte/Vue SFC
pattern): component node per file, TS frontmatter + <script> blocks
delegated to the TypeScript extractor, template {fn(...)} calls (incl. the
multiline `{posts.map((post) => (` opening line), PascalCase component-tag
references. New astroResolver: Astro global + astro:* virtual modules as
framework-provided, component resolution with the #764 ambiguity rule,
src/pages/ file-based routes ([param]→:param, [...rest]→*rest, _-prefixed
and *.config.* excluded). SFC languages now preload the TS/JS grammars
their extractors delegate to (a pure-SFC file set previously had none
loaded). Also fixes a pre-existing Svelte/Vue script-block off-by-one that
reported every script symbol one line low.

Validated per the playbook: stalux (the issue's repro) 54/54 .astro files
indexed, getIconNode found at its exact line, 14/14 routes, 93.0% fair
cross-file coverage; AstroPaper 27/27 components, 13/13 routes (underscore
dirs correctly excluded), explore connects page→Card→Datetime through the
jsx-render synthesizer; node/edge counts stable across re-syncs.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                       |   2 +
 README.md                          |   9 +-
 __tests__/extraction.test.ts       | 212 +++++++++++++++++
 __tests__/frameworks.test.ts       |  72 ++++++
 __tests__/resolution.test.ts       |  41 ++++
 src/extraction/astro-extractor.ts  | 365 +++++++++++++++++++++++++++++
 src/extraction/grammars.ts         |  17 +-
 src/extraction/svelte-extractor.ts |   9 +-
 src/extraction/tree-sitter.ts      |   5 +
 src/extraction/vue-extractor.ts    |   9 +-
 src/mcp/tools.ts                   |   4 +-
 src/resolution/frameworks/astro.ts | 195 +++++++++++++++
 src/resolution/frameworks/index.ts |   3 +
 src/resolution/import-resolver.ts  |   6 +-
 src/types.ts                       |   1 +
 15 files changed, 934 insertions(+), 16 deletions(-)
 create mode 100644 src/extraction/astro-extractor.ts
 create mode 100644 src/resolution/frameworks/astro.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 74e63e738..570b17537 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- **Astro projects are now indexed.** `.astro` files previously weren't parsed at all — on a typical Astro site that left most of the codebase invisible to search, impact, and `codegraph_explore`. CodeGraph now extracts the TypeScript frontmatter (functions, imports, `getStaticPaths`, …) and client-side `<script>` blocks, captures function calls and `<Component>` usages in template markup so cross-component dependencies trace end-to-end, resolves the `Astro` global and `astro:*` module imports as framework-provided, and maps `src/pages/` file-based routing to route nodes (`.astro` pages and `.ts` endpoints, including `[param]` and `[...rest]` dynamic segments, with underscore-prefixed files correctly excluded). Validated on two real-world Astro sites with 93% measured cross-file coverage and every page mapping to its route. Thanks @xingwangzhe. (#768) (Astro)
 - Same-named symbols across a monorepo's apps are no longer conflated. In a NestJS-style workspace with one `UserService` per app, `codegraph_callers`, `codegraph_callees`, and `codegraph_impact` now report **one section per distinct definition** — each app's callers and blast radius under its own file-labeled heading — instead of a single merged list, and accept a `file` argument to focus exactly the definition you mean (like `codegraph_node` already did). Impact in particular no longer overstates a change's blast radius by merging unrelated same-named classes. Thanks @Igorgro. (#764)
 - Fixed a related source of cross-package wrong edges: PascalCase **type references from plain `.ts` files were being resolved as React components**, which could link a file's own type alias to an arbitrary same-named class in another package (on one large monorepo this produced over a thousand wrong cross-package reference edges; 96% are now gone, and the remainder are genuine shared-model imports). Component resolution now applies only to references from JSX-capable files and never guesses between multiple candidates without a positional signal. The **Svelte and Vue component resolvers had the same arbitrary-pick flaw** (Vue resolved the first same-named `.vue` file found anywhere in the tree) and now follow the same rule: same-directory first, otherwise only an unambiguous name resolves. Re-index a project to benefit. (#764) (TypeScript, React, Svelte, Vue)
 - TypeScript and JavaScript **class fields are now reported as properties instead of methods**. A plain field like `public fonts: Fonts;` previously extracted as a method, misrepresenting class shape and letting calls to same-named functions resolve to data fields (a boolean field named `isArray` was soaking up `Array.isArray(...)` call edges). Fields holding arrow functions or function expressions (`onClick = () => {…}`, including wrapped ones like `onScroll = throttle(() => {…})`) correctly remain methods and their bodies are still analyzed. Field initializers are analyzed too, so `history = createHistory()` records its call — and JavaScript class fields, which previously produced no symbol at all, now appear in the graph. Re-index a project to benefit. (#808) (TypeScript, JavaScript)
@@ -38,6 +39,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- Symbols defined in Svelte and Vue `<script>` blocks were reported one line below where they actually are — a function on line 3 was reported at line 4 — which offset every script-block symbol's location in search, `codegraph_node`, and explore output. Line numbers now match the file exactly. Re-index a project to benefit. (Svelte, Vue)
 - Doc comments are now captured for exported, `const`-assigned, and decorated declarations, and the documentation a symbol carries is now clean across every supported language. Previously a comment above `export class X`, `export const fn = () => …`, a plain `const fn = () => …`, or a decorated Python `def`/`class` (`@app.route(...)`, `@dataclass`) was dropped entirely — only comments directly above a plain declaration were kept. CodeGraph now finds the comment through the `export` / `const` / decorator wrapper. Comment-marker cleanup was also rounded out for every language CodeGraph supports: Rust/Swift/Kotlin doc lines (`///`, `//!`), Python/Ruby/shell `#`, Lua/Luau (`--` and `--[[ ]]`), and Pascal (`{ }` and `(* *)`) no longer leave stray markers in the stored text — validated end-to-end across all 19 code languages plus Svelte/Vue `<script>` blocks. (#780). Thanks @caleb-kaiser.
 - Go method calls made through a chained factory function now resolve to the correct type. A call like `New().Method()` used to drop the receiver, so the chained method attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Go return types (a pointer `*Foo` resolves to `Foo`, and a multi-return `(*Foo, error)` to its first result), infers the chained receiver's type from what the factory function returns, and resolves the method on it — including methods promoted from an embedded struct — creating the edge only when the type or an embedded type genuinely has the method. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Go)
 - Scala method calls made through a companion-object factory, a fluent chain, or a case-class `apply` now resolve to the correct type. A call like `Foo.create().bar()` or `Builder(cfg).bar()` used to drop the receiver, so the chained method silently attached to a same-named method on an unrelated type — most often mis-attributing a standard-library `Option` / `Iterator` `.map` / `.flatMap` / `.foreach` onto your own same-named class. CodeGraph now captures Scala return types (a generic `List[Foo]` resolves to its container `List`, a qualified `pkg.Foo` to `Foo`), infers the chained receiver's type from what the inner call returns or constructs, and resolves the method on it — including methods inherited from a trait the type extends — creating the edge only when that type or one of its traits genuinely has the method (so a wrong inference produces no edge instead of a misleading one). Existing Scala indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Scala)
diff --git a/README.md b/README.md
index 11c379b48..e5e0088ae 100644
--- a/README.md
+++ b/README.md
@@ -225,8 +225,8 @@ CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — acr
 | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 |
 | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes |
 | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config |
-| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, Svelte, Vue, Liquid, Pascal/Delphi |
-| **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 16 frameworks |
+| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, Svelte, Vue, Astro, Liquid, Pascal/Delphi |
+| **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 17 frameworks |
 | **Mixed iOS / React Native / Expo** | Closes cross-language flows that static parsing misses: Swift ↔ ObjC bridging, React Native legacy bridge + TurboModules + Fabric view components, native → JS event emitters, Expo Modules |
 | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only |
 
@@ -281,6 +281,7 @@ CodeGraph detects web-framework routing files and emits `route` nodes linked by
 | **Vapor** | `app.get("x", use: handler)` |
 | **React Router** / **SvelteKit** | Route component nodes |
 | **Vue Router** / **Nuxt** | `pages/` file-based routes, `server/api/` endpoints, route middleware |
+| **Astro** | `src/pages/` file-based routes (`.astro` pages + `.ts` endpoints, `[param]`/`[...rest]` syntax) |
 
 ---
 
@@ -636,6 +637,7 @@ is written):
 | Dart | `.dart` | Full support |
 | Svelte | `.svelte` | Full support (script extraction, Svelte 5 runes, SvelteKit routes) |
 | Vue | `.vue` | Full support (script + script-setup extraction, Nuxt page/API/middleware routes) |
+| Astro | `.astro` | Full support (frontmatter + script extraction, template component/call references, `src/pages/` routes) |
 | Liquid | `.liquid` | Full support |
 | Pascal / Delphi | `.pas`, `.dpr`, `.dpk`, `.lpr` | Full support (classes, records, interfaces, enums, DFM/FMX form files) |
 | Lua | `.lua` | Full support (functions, methods with receivers, local variables, `require` imports, call edges) |
@@ -664,12 +666,13 @@ Impact and blast-radius queries are only as good as the dependency graph behind
 | Dart | flutter/packages | 92.4% |
 | Svelte / SvelteKit | sveltejs/realworld | 100% |
 | Vue / Nuxt | nuxt/movies | 93.5% |
+| Astro | xingwangzhe/stalux | 93.0% |
 | Lua | nvim-telescope/telescope.nvim | 84.2% |
 | Luau | dphfox/Fusion | 92.2% |
 | Liquid | Shopify/dawn | 73.8% |
 | Pascal / Delphi | PascalCoin | 77.4% |
 
-Framework routing is validated the same way, on a canonical app per framework: Express 100%, FastAPI 98%, Flask 100%, NestJS 96.8%, Gin 96.5%, Axum 100%, Rocket 93.8%, Vapor 100%, Laravel 92%, Rails 89.6%, React Router 100% — and the convention/reflection-heavy ones at their honest static-analysis ceiling: ASP.NET 83.9%, Spring 83.3%, Drupal 78.9%, Play 76.3%, Django 74.1%. SvelteKit and Vue/Nuxt use file-based routing, so their page/endpoint coverage is the Svelte/SvelteKit (100%) and Vue/Nuxt (93.5%) figures in the table above.
+Framework routing is validated the same way, on a canonical app per framework: Express 100%, FastAPI 98%, Flask 100%, NestJS 96.8%, Gin 96.5%, Axum 100%, Rocket 93.8%, Vapor 100%, Laravel 92%, Rails 89.6%, React Router 100% — and the convention/reflection-heavy ones at their honest static-analysis ceiling: ASP.NET 83.9%, Spring 83.3%, Drupal 78.9%, Play 76.3%, Django 74.1%. SvelteKit, Vue/Nuxt, and Astro use file-based routing, so their page/endpoint coverage is the Svelte/SvelteKit (100%), Vue/Nuxt (93.5%), and Astro (93.0% — every `src/pages/` file maps to a route node on the two validation repos) figures in the table above.
 
 ## Troubleshooting
 
diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index 907b46658..7f2d13f5f 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -5895,6 +5895,218 @@ const value = 42;
   });
 });
 
+describe('Astro Extraction', () => {
+  it('should detect Astro files', () => {
+    expect(detectLanguage('src/pages/index.astro')).toBe('astro');
+    expect(detectLanguage('Layout.astro')).toBe('astro');
+    expect(isLanguageSupported('astro')).toBe(true);
+  });
+
+  it('should extract component node from an .astro file', () => {
+    const code = `---
+const title = 'Hello';
+---
+<h1>{title}</h1>
+`;
+    const result = extractFromSource('Card.astro', code);
+
+    const componentNode = result.nodes.find((n) => n.kind === 'component');
+    expect(componentNode).toBeDefined();
+    expect(componentNode?.name).toBe('Card');
+    expect(componentNode?.language).toBe('astro');
+    expect(componentNode?.isExported).toBe(true);
+  });
+
+  it('should extract frontmatter symbols with correct line numbers (#768)', () => {
+    const code = `---
+import { formatDate } from '../utils/format';
+
+function getIconNode(name: string): string {
+  return name;
+}
+
+const { title } = Astro.props;
+---
+<span>{title}</span>
+`;
+    const result = extractFromSource('navs.astro', code);
+
+    // The #768 repro: a function defined in frontmatter must be found
+    const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'getIconNode');
+    expect(fn).toBeDefined();
+    expect(fn?.language).toBe('astro');
+    expect(fn?.startLine).toBe(4);
+
+    const imp = result.nodes.find((n) => n.kind === 'import');
+    expect(imp).toBeDefined();
+    expect(imp?.startLine).toBe(2);
+  });
+
+  it('should extract exported getStaticPaths from frontmatter', () => {
+    const code = `---
+export async function getStaticPaths() {
+  return [];
+}
+const { slug } = Astro.params;
+---
+<p>{slug}</p>
+`;
+    const result = extractFromSource('[slug].astro', code);
+
+    const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'getStaticPaths');
+    expect(fn).toBeDefined();
+    expect(fn?.isExported).toBe(true);
+  });
+
+  it('should extract calls from template expressions', () => {
+    const code = `---
+import { formatDate } from '../utils/format';
+const date = new Date();
+---
+<time>{formatDate(date)}</time>
+`;
+    const result = extractFromSource('Stamp.astro', code);
+
+    const call = result.unresolvedReferences.find(
+      (ref) => ref.referenceKind === 'calls' && ref.referenceName === 'formatDate' && ref.line === 5
+    );
+    expect(call).toBeDefined();
+  });
+
+  it('should extract calls from a multiline expression opening line', () => {
+    const code = `---
+const posts = [];
+---
+<ul>
+  {posts.map((post) => (
+    <li>{render(post)}</li>
+  ))}
+</ul>
+`;
+    const result = extractFromSource('List.astro', code);
+
+    const mapCall = result.unresolvedReferences.find(
+      (ref) => ref.referenceKind === 'calls' && ref.referenceName === 'posts.map'
+    );
+    expect(mapCall).toBeDefined();
+    const innerCall = result.unresolvedReferences.find(
+      (ref) => ref.referenceKind === 'calls' && ref.referenceName === 'render'
+    );
+    expect(innerCall).toBeDefined();
+  });
+
+  it('should extract PascalCase component usages from the template', () => {
+    const code = `---
+import Layout from '../layouts/Layout.astro';
+import PostCard from '../components/PostCard.astro';
+---
+<Layout title="Home">
+  <PostCard />
+  <Fragment slot="head" />
+  <div class="plain-html" />
+</Layout>
+`;
+    const result = extractFromSource('index.astro', code);
+
+    const refs = result.unresolvedReferences.filter((r) => r.referenceKind === 'references');
+    const names = refs.map((r) => r.referenceName);
+    expect(names).toContain('Layout');
+    expect(names).toContain('PostCard');
+    // Astro built-ins and lowercase HTML are not component references
+    expect(names).not.toContain('Fragment');
+    expect(names).not.toContain('div');
+  });
+
+  it('should not extract template patterns from frontmatter, script, or style content', () => {
+    const code = `---
+// <FakeComponent /> inside frontmatter comment
+const x = { y: maybeCall(1) };
+---
+<div>real</div>
+<script>
+  const z = { w: scriptCall(2) };
+</script>
+<style>
+  .a { color: red; }
+</style>
+`;
+    const result = extractFromSource('Guard.astro', code);
+
+    const templateRefs = result.unresolvedReferences.filter(
+      (r) => r.referenceKind === 'references' && r.referenceName === 'FakeComponent'
+    );
+    expect(templateRefs).toHaveLength(0);
+
+    // maybeCall/scriptCall come from the delegated TS extraction (once),
+    // not double-counted by the template scanner
+    const maybeCalls = result.unresolvedReferences.filter(
+      (r) => r.referenceName === 'maybeCall' && r.referenceKind === 'calls'
+    );
+    expect(maybeCalls.length).toBeLessThanOrEqual(1);
+  });
+
+  it('should extract <script> block symbols with correct line numbers', () => {
+    const code = `---
+const a = 1;
+---
+<div>hi</div>
+<script>
+function trackView(page: string) {
+  console.log(page);
+}
+</script>
+`;
+    const result = extractFromSource('Tracker.astro', code);
+
+    const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'trackView');
+    expect(fn).toBeDefined();
+    expect(fn?.startLine).toBe(6);
+    expect(fn?.language).toBe('astro');
+  });
+
+  it('should create component node for a frontmatter-less template-only file', () => {
+    const code = `<div>Static content</div>
+`;
+    const result = extractFromSource('Static.astro', code);
+
+    const componentNode = result.nodes.find((n) => n.kind === 'component');
+    expect(componentNode).toBeDefined();
+    expect(componentNode?.name).toBe('Static');
+    expect(componentNode?.language).toBe('astro');
+  });
+
+  it('should treat an unclosed frontmatter fence as no frontmatter', () => {
+    const code = `---
+const broken = true;
+<div>never closed</div>
+`;
+    const result = extractFromSource('Broken.astro', code);
+
+    // No TS delegation happened (the fence never closes), but the component
+    // node still exists and nothing throws.
+    const componentNode = result.nodes.find((n) => n.kind === 'component');
+    expect(componentNode).toBeDefined();
+    expect(result.nodes.find((n) => n.name === 'broken')).toBeUndefined();
+  });
+
+  it('should create containment edges from component to frontmatter nodes', () => {
+    const code = `---
+const value = 42;
+---
+<div>{value}</div>
+`;
+    const result = extractFromSource('Contained.astro', code);
+
+    const componentNode = result.nodes.find((n) => n.kind === 'component');
+    expect(componentNode).toBeDefined();
+
+    const containEdges = result.edges.filter(
+      (e) => e.source === componentNode!.id && e.kind === 'contains'
+    );
+    expect(containEdges.length).toBeGreaterThan(0);
+  });
+});
+
 describe('Instantiates + Decorates edge extraction', () => {
   it('emits an instantiates ref for `new Foo()`', () => {
     const code = `
diff --git a/__tests__/frameworks.test.ts b/__tests__/frameworks.test.ts
index c0e874908..ff1abb57b 100644
--- a/__tests__/frameworks.test.ts
+++ b/__tests__/frameworks.test.ts
@@ -1373,6 +1373,7 @@ func boot(routes: RoutesBuilder) throws {
 
 import { reactResolver } from '../src/resolution/frameworks/react';
 import { svelteResolver } from '../src/resolution/frameworks/svelte';
+import { astroResolver } from '../src/resolution/frameworks/astro';
 
 describe('reactResolver.extract — React Router', () => {
   it('extracts a v6 <Route path element={<Comp/>}>', () => {
@@ -1428,6 +1429,77 @@ describe('svelteResolver.extract (smoke)', () => {
   });
 });
 
+describe('astroResolver.extract — src/pages file-based routing', () => {
+  const routeNames = (filePath: string): string[] =>
+    astroResolver.extract!(filePath, '').nodes.filter((n) => n.kind === 'route').map((n) => n.name);
+
+  it('maps index.astro to /', () => {
+    expect(routeNames('src/pages/index.astro')).toEqual(['/']);
+  });
+
+  it('maps nested index and plain pages', () => {
+    expect(routeNames('src/pages/blog/index.astro')).toEqual(['/blog']);
+    expect(routeNames('src/pages/about.astro')).toEqual(['/about']);
+  });
+
+  it('converts [param] and [...rest] syntax', () => {
+    expect(routeNames('src/pages/blog/[slug].astro')).toEqual(['/blog/:slug']);
+    expect(routeNames('src/pages/[...path].astro')).toEqual(['/*path']);
+  });
+
+  it('maps .ts endpoints under src/pages to routes', () => {
+    expect(routeNames('src/pages/api/posts.ts')).toEqual(['/api/posts']);
+    expect(routeNames('src/pages/rss.xml.js')).toEqual(['/rss.xml']);
+  });
+
+  it('excludes underscore-prefixed segments and config files', () => {
+    expect(routeNames('src/pages/_partial.astro')).toEqual([]);
+    expect(routeNames('src/pages/blog/_components/Card.astro')).toEqual([]);
+    expect(routeNames('src/pages/vite.config.ts')).toEqual([]);
+  });
+
+  it('ignores .astro files outside src/pages', () => {
+    expect(routeNames('src/components/Button.astro')).toEqual([]);
+    expect(routeNames('docs/pages/guide.astro')).toEqual([]);
+  });
+});
+
+describe('astroResolver.resolve — Astro global and virtual modules', () => {
+  const ctx = {} as never;
+  const baseRef = {
+    fromNodeId: 'component:a',
+    line: 1,
+    column: 0,
+    filePath: 'src/pages/index.astro',
+    language: 'astro',
+  };
+
+  it('claims Astro.* global references as framework-provided', () => {
+    const res = astroResolver.resolve(
+      { ...baseRef, referenceName: 'Astro.props', referenceKind: 'references' } as never,
+      ctx
+    );
+    expect(res?.resolvedBy).toBe('framework');
+    expect(res?.confidence).toBe(1.0);
+  });
+
+  it('claims astro:content virtual module imports', () => {
+    const res = astroResolver.resolve(
+      { ...baseRef, referenceName: 'astro:content', referenceKind: 'imports' } as never,
+      ctx
+    );
+    expect(res?.resolvedBy).toBe('framework');
+  });
+
+  it('leaves ordinary names alone', () => {
+    const res = astroResolver.resolve(
+      { ...baseRef, referenceName: 'astrolabe', referenceKind: 'calls' } as never,
+      { getNodesByName: () => [] } as never
+    );
+    expect(res).toBeNull();
+  });
+});
+
 // Regression tests: commented-out and docstring route examples must NOT
 // surface as phantom route nodes. These would have failed before the
 // strip-comments wiring (the regex would happily scan comments/docstrings).
diff --git a/__tests__/resolution.test.ts b/__tests__/resolution.test.ts
index 47c6b9220..3059392d4 100644
--- a/__tests__/resolution.test.ts
+++ b/__tests__/resolution.test.ts
@@ -1438,6 +1438,47 @@ func main() {
       expect(callers.some((c) => c.node.filePath === 'src/Bar.svelte')).toBe(true);
     });
 
+    it('links an .astro page to the component and TS util it uses (#768)', async () => {
+      // The canonical Astro shape: a page imports a layout/component in
+      // frontmatter and uses it as a template tag; the component's template
+      // calls an imported .ts util. Both hops must produce graph edges or
+      // an Astro project is invisible to callers/impact.
+      fs.mkdirSync(path.join(tempDir, 'src/components'), { recursive: true });
+      fs.mkdirSync(path.join(tempDir, 'src/utils'), { recursive: true });
+      fs.mkdirSync(path.join(tempDir, 'src/pages'), { recursive: true });
+      fs.writeFileSync(
+        path.join(tempDir, 'src/utils/format.ts'),
+        `export function formatDate(d: Date): string { return d.toISOString(); }\n`
+      );
+      fs.writeFileSync(
+        path.join(tempDir, 'src/components/PostCard.astro'),
+        `---\nimport { formatDate } from '../utils/format';\nconst { date } = Astro.props;\n---\n<time>{formatDate(date)}</time>\n`
+      );
+      fs.writeFileSync(
+        path.join(tempDir, 'src/pages/index.astro'),
+        `---\nimport PostCard from '../components/PostCard.astro';\n---\n<PostCard date={new Date()} />\n`
+      );
+
+      cg = await CodeGraph.init(tempDir, { index: true });
+      cg.resolveReferences();
+
+      // Hop 1: page → component (template tag through the frontmatter import)
+      const cardNode = cg
+        .getNodesByKind('component')
+        .find((n) => n.name === 'PostCard' && n.filePath === 'src/components/PostCard.astro');
+      expect(cardNode).toBeDefined();
+      const cardCallers = cg.getCallers(cardNode!.id);
+      expect(cardCallers.some((c) => c.node.filePath === 'src/pages/index.astro')).toBe(true);
+
+      // Hop 2: component template call → .ts util
+      const fmtNode = cg
+        .getNodesByKind('function')
+        .find((n) => n.name === 'formatDate' && n.filePath === 'src/utils/format.ts');
+      expect(fmtNode).toBeDefined();
+      const fmtCallers = cg.getCallers(fmtNode!.id);
+      expect(fmtCallers.some((c) => c.node.filePath === 'src/components/PostCard.astro')).toBe(true);
+    });
+
     it('resolves a bare directory import (import { x } from "." / "./") to index.ts (#629)', async () => {
       // `import { helper } from '.'` (or './') must map to the
       // directory's index.ts before the re-export chase can run. The
diff --git a/src/extraction/astro-extractor.ts b/src/extraction/astro-extractor.ts
new file mode 100644
index 000000000..e38989375
--- /dev/null
+++ b/src/extraction/astro-extractor.ts
@@ -0,0 +1,365 @@
+import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference } from '../types';
+import { generateNodeId } from './tree-sitter-helpers';
+import { TreeSitterExtractor } from './tree-sitter';
+import { isLanguageSupported } from './grammars';
+
+/**
+ * Astro built-in components — compiler-provided (`<Fragment>`) or shipped by
+ * `astro:components` (`<Code>`, `<Debug>`), not user code.
+ */
+const ASTRO_BUILTIN_COMPONENTS = new Set(['Fragment', 'Code', 'Debug']);
+
+/**
+ * AstroExtractor - Extracts code relationships from Astro component files
+ *
+ * Astro files are multi-language: a TypeScript frontmatter block fenced by
+ * `---` lines, a JSX-like HTML template, and optional <script>/<style> blocks.
+ * Rather than parsing a full Astro grammar, we extract the frontmatter and
+ * <script> contents and delegate them to the TypeScript TreeSitterExtractor
+ * (Astro processes both as TypeScript by default — no `lang` attr needed).
+ *
+ * Also extracts function calls from template expressions (`{fn(...)}`) and
+ * component usages (`<PascalCase>`) so cross-file edges are captured even
+ * when the only reference lives in markup.
+ *
+ * Every .astro file produces a component node (Astro components are always
+ * importable).
+ */
+export class AstroExtractor {
+  private filePath: string;
+  private source: string;
+  private nodes: Node[] = [];
+  private edges: Edge[] = [];
+  private unresolvedReferences: UnresolvedReference[] = [];
+  private errors: ExtractionError[] = [];
+
+  constructor(filePath: string, source: string) {
+    this.filePath = filePath;
+    this.source = source;
+  }
+
+  /**
+   * Extract from Astro source
+   */
+  extract(): ExtractionResult {
+    const startTime = Date.now();
+
+    try {
+      // Create component node for the .astro file itself
+      const componentNode = this.createComponentNode();
+
+      // Extract and process the frontmatter block (--- fenced, TypeScript)
+      const frontmatter = this.extractFrontmatter();
+      if (frontmatter) {
+        this.processScriptContent(frontmatter, componentNode.id, 'frontmatter');
+      }
+
+      // Extract and process <script> blocks (client-side, TypeScript-capable)
+      for (const block of this.extractScriptBlocks()) {
+        this.processScriptContent(block, componentNode.id, 'script');
+      }
+
+      // Ranges the template scans must skip: frontmatter + <script>/<style>
+      const coveredRanges = this.getCoveredRanges(frontmatter);
+
+      // Extract function calls from template expressions ({fn(...)})
+      this.extractTemplateCalls(componentNode.id, coveredRanges);
+
+      // Extract component usages from template (<ComponentName>)
+      this.extractTemplateComponents(componentNode.id, coveredRanges);
+    } catch (error) {
+      this.errors.push({
+        message: `Astro extraction error: ${error instanceof Error ? error.message : String(error)}`,
+        severity: 'error',
+        code: 'parse_error',
+      });
+    }
+
+    return {
+      nodes: this.nodes,
+      edges: this.edges,
+      unresolvedReferences: this.unresolvedReferences,
+      errors: this.errors,
+      durationMs: Date.now() - startTime,
+    };
+  }
+
+  /**
+   * Create a component node for the .astro file
+   */
+  private createComponentNode(): Node {
+    const lines = this.source.split('\n');
+    const fileName = this.filePath.split(/[/\\]/).pop() || this.filePath;
+    const componentName = fileName.replace(/\.astro$/, '');
+    const id = generateNodeId(this.filePath, 'component', componentName, 1);
+
+    const node: Node = {
+      id,
+      kind: 'component',
+      name: componentName,
+      qualifiedName: `${this.filePath}::${componentName}`,
+      filePath: this.filePath,
+      language: 'astro',
+      startLine: 1,
+      endLine: lines.length,
+      startColumn: 0,
+      endColumn: lines[lines.length - 1]?.length || 0,
+      isExported: true, // Astro components are always importable
+      updatedAt: Date.now(),
+    };
+
+    this.nodes.push(node);
+    return node;
+  }
+
+  /**
+   * Extract the frontmatter block: the content between the opening `---`
+   * fence (first non-blank line of the file) and the closing `---` fence.
+   * An unclosed fence is treated as "no frontmatter" rather than swallowing
+   * the whole template as TypeScript.
+   *
+   * Returns the content plus its 0-indexed start line, or null.
+   */
+  private extractFrontmatter(): { content: string; startLine: number; endLine: number } | null {
+    const lines = this.source.split('\n');
+
+    // Opening fence must be the first non-blank line
+    let openIdx = -1;
+    for (let i = 0; i < lines.length; i++) {
+      const trimmed = lines[i]!.trim();
+      if (trimmed === '') continue;
+      if (trimmed === '---') openIdx = i;
+      break;
+    }
+    if (openIdx === -1) return null;
+
+    // Closing fence
+    let closeIdx = -1;
+    for (let i = openIdx + 1; i < lines.length; i++) {
+      if (lines[i]!.trim() === '---') {
+        closeIdx = i;
+        break;
+      }
+    }
+    if (closeIdx === -1) return null;
+
+    return {
+      content: lines.slice(openIdx + 1, closeIdx).join('\n'),
+      startLine: openIdx + 1, // 0-indexed line where content starts
+      endLine: closeIdx, // 0-indexed line of the closing fence
+    };
+  }
+
+  /**
+   * Extract <script> blocks from the template portion
+   */
+  private extractScriptBlocks(): Array<{ content: string; startLine: number }> {
+    const blocks: Array<{ content: string; startLine: number }> = [];
+
+    const scriptRegex = /<script(\s[^>]*)?>(?<content>[\s\S]*?)<\/script>/g;
+    let match;
+
+    while ((match = scriptRegex.exec(this.source)) !== null) {
+      const content = match.groups?.content || match[2] || '';
+
+      // Calculate the 0-indexed line where the content begins. The content
+      // starts right after the opening tag's `>` — its leading `\n` is part
+      // of the content, so relative line 1 sits ON the tag's closing line
+      // (do not add 1 here; that double-counts the embedded newline).
+      const beforeScript = this.source.substring(0, match.index);
+      const scriptTagLine = (beforeScript.match(/\n/g) || []).length;
+      const openingTag = match[0].substring(0, match[0].indexOf('>') + 1);
+      const openingTagLines = (openingTag.match(/\n/g) || []).length;
+      const contentStartLine = scriptTagLine + openingTagLines; // 0-indexed
+
+      blocks.push({ content, startLine: contentStartLine });
+    }
+
+    return blocks;
+  }
+
+  /**
+   * Process frontmatter / script content by delegating to TreeSitterExtractor.
+   * Astro treats both as TypeScript by default.
+   */
+  private processScriptContent(
+    block: { content: string; startLine: number },
+    componentNodeId: string,
+    label: 'frontmatter' | 'script'
+  ): void {
+    if (!isLanguageSupported('typescript')) {
+      this.errors.push({
+        message: `Parser for typescript not available, cannot parse Astro ${label} block`,
+        severity: 'warning',
+      });
+      return;
+    }
+
+    // Delegate to TreeSitterExtractor
+    const extractor = new TreeSitterExtractor(this.filePath, block.content, 'typescript');
+    const result = extractor.extract();
+
+    // Offset line numbers from the block back to .astro file positions
+    for (const node of result.nodes) {
+      node.startLine += block.startLine;
+      node.endLine += block.startLine;
+      node.language = 'astro'; // Mark as astro, not TS
+
+      this.nodes.push(node);
+
+      // Add containment edge from component to this node
+      this.edges.push({
+        source: componentNodeId,
+        target: node.id,
+        kind: 'contains',
+      });
+    }
+
+    // Offset edges (they reference line numbers)
+    for (const edge of result.edges) {
+      if (edge.line) {
+        edge.line += block.startLine;
+      }
+      this.edges.push(edge);
+    }
+
+    // Offset unresolved references
+    for (const ref of result.unresolvedReferences) {
+      ref.line += block.startLine;
+      ref.filePath = this.filePath;
+      ref.language = 'astro';
+      this.unresolvedReferences.push(ref);
+    }
+
+    // Carry over errors
+    for (const error of result.errors) {
+      if (error.line) {
+        error.line += block.startLine;
+      }
+      this.errors.push(error);
+    }
+  }
+
+  /**
+   * Line ranges (0-indexed, inclusive) the template scans must skip:
+   * the frontmatter block and <script>/<style> blocks.
+   */
+  private getCoveredRanges(
+    frontmatter: { startLine: number; endLine: number } | null
+  ): Array<[number, number]> {
+    const coveredRanges: Array<[number, number]> = [];
+
+    if (frontmatter) {
+      // Cover from the opening fence line through the closing fence line
+      coveredRanges.push([frontmatter.startLine - 1, frontmatter.endLine]);
+    }
+
+    const tagRegex = /<(script|style)(\s[^>]*)?>[\s\S]*?<\/\1>/g;
+    let tagMatch;
+    while ((tagMatch = tagRegex.exec(this.source)) !== null) {
+      const startLine = (this.source.substring(0, tagMatch.index).match(/\n/g) || []).length;
+      const endLine = startLine + (tagMatch[0].match(/\n/g) || []).length;
+      coveredRanges.push([startLine, endLine]);
+    }
+
+    return coveredRanges;
+  }
+
+  /**
+   * Extract function calls from Astro template expressions.
+   *
+   * Astro templates embed JSX-like expressions (`{formatDate(post.date)}`,
+   * `class:list={cn(...)}`), so calls frequently live in markup rather than
+   * the frontmatter. We scan template lines for `{expression}` groups and
+   * extract call patterns from them. A `{` group left open at end-of-line
+   * (the pervasive `{posts.map((post) => (` pattern) contributes the calls
+   * on its opening line.
+   */
+  private extractTemplateCalls(
+    componentNodeId: string,
+    coveredRanges: Array<[number, number]>
+  ): void {
+    const lines = this.source.split('\n');
+    // Complete groups: {...} — excluding JSX comments ({/* ... */})
+    const exprRegex = /\{([^}/][^}]*)\}/g;
+    // A group opened but not closed on this line
+    const openExprRegex = /\{([^}/][^}]*)$/;
+
+    for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
+      if (coveredRanges.some(([start, end]) => lineIdx >= start && lineIdx <= end)) continue;
+
+      const line = lines[lineIdx]!;
+      const exprs: Array<{ text: string; offset: number }> = [];
+
+      let exprMatch;
+      while ((exprMatch = exprRegex.exec(line)) !== null) {
+        exprs.push({ text: exprMatch[1]!, offset: exprMatch.index });
+      }
+      const openMatch = openExprRegex.exec(line.replace(exprRegex, ''));
+      if (openMatch) {
+        exprs.push({ text: openMatch[1]!, offset: line.lastIndexOf('{') });
+      }
+
+      for (const expr of exprs) {
+        // Extract function calls: identifiers followed by (
+        // Matches: cn(...), formatDate(...), obj.method(...)
+        const callRegex = /\b([a-zA-Z_$][\w$.]*)\s*\(/g;
+        let callMatch;
+        while ((callMatch = callRegex.exec(expr.text)) !== null) {
+          const calleeName = callMatch[1]!;
+          // Skip control-flow keywords valid inside expressions
+          if (calleeName === 'if' || calleeName === 'await' || calleeName === 'function') continue;
+
+          this.unresolvedReferences.push({
+            fromNodeId: componentNodeId,
+            referenceName: calleeName,
+            referenceKind: 'calls',
+            line: lineIdx + 1, // 1-indexed
+            column: expr.offset + callMatch.index,
+            filePath: this.filePath,
+            language: 'astro',
+          });
+        }
+      }
+    }
+  }
+
+  /**
+   * Extract component usages from the Astro template.
+   *
+   * PascalCase tags like <Layout>, <PostCard /> represent component
+   * instantiations — analogous to function calls in imperative code.
+   * Lowercase tags are native HTML (Astro does not register kebab-case
+   * components the way Vue does, so those are real custom elements and
+   * are skipped).
+   */
+  private extractTemplateComponents(
+    componentNodeId: string,
+    coveredRanges: Array<[number, number]>
+  ): void {
+    const lines = this.source.split('\n');
+    // Opening/self-closing tags (closing tags </Foo> start with </ so won't match)
+    const componentTagRegex = /<([A-Z][a-zA-Z0-9_$]*)\b/g;
+
+    for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
+      if (coveredRanges.some(([start, end]) => lineIdx >= start && lineIdx <= end)) continue;
+
+      const line = lines[lineIdx]!;
+      let match;
+      while ((match = componentTagRegex.exec(line)) !== null) {
+        const componentName = match[1]!;
+        if (ASTRO_BUILTIN_COMPONENTS.has(componentName)) continue;
+
+        this.unresolvedReferences.push({
+          fromNodeId: componentNodeId,
+          referenceName: componentName,
+          referenceKind: 'references',
+          line: lineIdx + 1, // 1-indexed
+          column: match.index + 1,
+          filePath: this.filePath,
+          language: 'astro',
+        });
+      }
+    }
+  }
+}
diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts
index 5f4937400..eabdb598e 100644
--- a/src/extraction/grammars.ts
+++ b/src/extraction/grammars.ts
@@ -10,7 +10,7 @@ import * as path from 'path';
 import { Parser, Language as WasmLanguage } from 'web-tree-sitter';
 import { Language } from '../types';
 
-export type GrammarLanguage = Exclude<Language, 'svelte' | 'vue' | 'liquid' | 'razor' | 'yaml' | 'twig' | 'xml' | 'properties' | 'unknown'>;
+export type GrammarLanguage = Exclude<Language, 'svelte' | 'vue' | 'astro' | 'liquid' | 'razor' | 'yaml' | 'twig' | 'xml' | 'properties' | 'unknown'>;
 
 /**
  * WASM filename map — maps each language to its .wasm grammar file
@@ -93,6 +93,7 @@ export const EXTENSION_MAP: Record<string, Language> = {
   '.liquid': 'liquid',
   '.svelte': 'svelte',
   '.vue': 'vue',
+  '.astro': 'astro',
   '.pas': 'pascal',
   '.dpr': 'pascal',
   '.dpk': 'pascal',
@@ -183,6 +184,14 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise<v
     await initGrammars();
   }
 
+  // SFC languages (svelte/vue/astro) have no grammar of their own — their
+  // extractors delegate <script>/frontmatter content to the TS/JS extractor,
+  // so those grammars must be loaded even when no plain .ts/.js file is in
+  // the index set (e.g. a pure-.astro content site).
+  if (languages.some((l) => l === 'svelte' || l === 'vue' || l === 'astro')) {
+    languages = [...languages, 'typescript', 'javascript'];
+  }
+
   // Deduplicate and filter to languages that have WASM grammars and aren't already loaded
   const toLoad = [...new Set(languages)].filter(
     (lang): lang is GrammarLanguage =>
@@ -300,6 +309,7 @@ function looksLikeObjc(source: string): boolean {
 export function isLanguageSupported(language: Language): boolean {
   if (language === 'svelte') return true; // custom extractor (script block delegation)
   if (language === 'vue') return true; // custom extractor (script block delegation)
+  if (language === 'astro') return true; // custom extractor (frontmatter/script block delegation)
   if (language === 'liquid') return true; // custom regex extractor
   if (language === 'razor') return true; // custom RazorExtractor (.cshtml/.razor markup)
   if (language === 'yaml') return true; // file-level tracking only; Drupal routing extraction via framework resolver
@@ -314,7 +324,7 @@ export function isLanguageSupported(language: Language): boolean {
  * Check if a grammar has been loaded and is ready for parsing.
  */
 export function isGrammarLoaded(language: Language): boolean {
-  if (language === 'svelte' || language === 'vue' || language === 'liquid' || language === 'razor') return true;
+  if (language === 'svelte' || language === 'vue' || language === 'astro' || language === 'liquid' || language === 'razor') return true;
   if (language === 'yaml' || language === 'twig') return true; // no WASM grammar needed
   if (language === 'xml' || language === 'properties') return true; // no WASM grammar needed
   return languageCache.has(language);
@@ -337,7 +347,7 @@ export function isFileLevelOnlyLanguage(language: Language): boolean {
  * Get all supported languages (those with grammar definitions).
  */
 export function getSupportedLanguages(): Language[] {
-  return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'liquid'];
+  return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'astro', 'liquid'];
 }
 
 /**
@@ -403,6 +413,7 @@ export function getLanguageDisplayName(language: Language): string {
     dart: 'Dart',
     svelte: 'Svelte',
     vue: 'Vue',
+    astro: 'Astro',
     liquid: 'Liquid',
     pascal: 'Pascal / Delphi',
     scala: 'Scala',
diff --git a/src/extraction/svelte-extractor.ts b/src/extraction/svelte-extractor.ts
index 5586ee343..ab0b93921 100644
--- a/src/extraction/svelte-extractor.ts
+++ b/src/extraction/svelte-extractor.ts
@@ -135,13 +135,16 @@ export class SvelteExtractor {
       // Detect module script
       const isModule = /context\s*=\s*["']module["']/.test(attrs);
 
-      // Calculate start line of the script content (line after <script>)
+      // Calculate the 0-indexed line where the content begins. The content
+      // starts right after the opening tag's `>` — its leading `\n` is part
+      // of the content, so relative line 1 sits ON the tag's closing line
+      // (adding 1 here double-counted the embedded newline and shifted every
+      // script-block symbol down a line).
       const beforeScript = this.source.substring(0, match.index);
       const scriptTagLine = (beforeScript.match(/\n/g) || []).length;
-      // The content starts on the line after the opening <script> tag
       const openingTag = match[0].substring(0, match[0].indexOf('>') + 1);
       const openingTagLines = (openingTag.match(/\n/g) || []).length;
-      const contentStartLine = scriptTagLine + openingTagLines + 1; // 0-indexed line
+      const contentStartLine = scriptTagLine + openingTagLines; // 0-indexed line
 
       blocks.push({
         content,
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 7b6ed02ce..e62f97578 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -24,6 +24,7 @@ import { EXTRACTORS } from './languages';
 import { LiquidExtractor } from './liquid-extractor';
 import { RazorExtractor } from './razor-extractor';
 import { SvelteExtractor } from './svelte-extractor';
+import { AstroExtractor } from './astro-extractor';
 import { DfmExtractor } from './dfm-extractor';
 import { VueExtractor } from './vue-extractor';
 import { MyBatisExtractor } from './mybatis-extractor';
@@ -4752,6 +4753,10 @@ export function extractFromSource(
     // Use custom extractor for Vue
     const extractor = new VueExtractor(filePath, source);
     result = extractor.extract();
+  } else if (detectedLanguage === 'astro') {
+    // Use custom extractor for Astro (frontmatter + template delegation)
+    const extractor = new AstroExtractor(filePath, source);
+    result = extractor.extract();
   } else if (detectedLanguage === 'liquid') {
     // Use custom extractor for Liquid
     const extractor = new LiquidExtractor(filePath, source);
diff --git a/src/extraction/vue-extractor.ts b/src/extraction/vue-extractor.ts
index cb5ca1da4..862f17c79 100644
--- a/src/extraction/vue-extractor.ts
+++ b/src/extraction/vue-extractor.ts
@@ -143,13 +143,16 @@ export class VueExtractor {
       // Detect <script setup>
       const isSetup = /\bsetup\b/.test(attrs);
 
-      // Calculate start line of the script content (line after <script>)
+      // Calculate the 0-indexed line where the content begins. The content
+      // starts right after the opening tag's `>` — its leading `\n` is part
+      // of the content, so relative line 1 sits ON the tag's closing line
+      // (adding 1 here double-counted the embedded newline and shifted every
+      // script-block symbol down a line).
       const beforeScript = this.source.substring(0, match.index);
       const scriptTagLine = (beforeScript.match(/\n/g) || []).length;
-      // The content starts on the line after the opening <script> tag
       const openingTag = match[0].substring(0, match[0].indexOf('>') + 1);
       const openingTagLines = (openingTag.match(/\n/g) || []).length;
-      const contentStartLine = scriptTagLine + openingTagLines + 1; // 0-indexed line
+      const contentStartLine = scriptTagLine + openingTagLines; // 0-indexed line
 
       blocks.push({
         content,
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 0d5a9a26c..9923ab505 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -1446,7 +1446,7 @@ export class ToolHandler {
       // names (Class.method / Class::method) — the agent's most precise input,
       // resolved exactly by findAllSymbols. (The old strip mangled Class.method
       // into Class, throwing the method away.)
-      const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
+      const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte|astro)$/i;
       const tokens = [...new Set(
         query.split(/[\s,()[\]]+/)
           .map((t) => t.replace(FILE_EXT, '').trim())
@@ -1794,7 +1794,7 @@ export class ToolHandler {
     // agent explicitly named is in the subgraph and its file is scored.
     const namedSeedIds = new Set<string>();
     {
-      const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
+      const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte|astro)$/i;
       const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
       const isTestPath = (p: string) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
       const bodyLines = (n: Node) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
diff --git a/src/resolution/frameworks/astro.ts b/src/resolution/frameworks/astro.ts
new file mode 100644
index 000000000..63e1af132
--- /dev/null
+++ b/src/resolution/frameworks/astro.ts
@@ -0,0 +1,195 @@
+/**
+ * Astro Framework Resolver
+ *
+ * Handles Astro component references, the `Astro` global, `astro:*` virtual
+ * module imports, and Astro's `src/pages/` file-based routing.
+ */
+
+import { Node } from '../../types';
+import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+
+/**
+ * Astro virtual module prefixes — framework-provided, not user code
+ */
+const ASTRO_VIRTUAL_MODULES = [
+  'astro:content',
+  'astro:assets',
+  'astro:actions',
+  'astro:env',
+  'astro:i18n',
+  'astro:middleware',
+  'astro:transitions',
+  'astro:components',
+  'astro:schema',
+];
+
+export const astroResolver: FrameworkResolver = {
+  name: 'astro',
+
+  detect(context: ResolutionContext): boolean {
+    // Check for astro in package.json
+    const packageJson = context.readFile('package.json');
+    if (packageJson) {
+      try {
+        const pkg = JSON.parse(packageJson);
+        const deps = { ...pkg.dependencies, ...pkg.devDependencies };
+        if (deps.astro) {
+          return true;
+        }
+      } catch {
+        // Invalid JSON
+      }
+    }
+
+    // Check for .astro files in project
+    const allFiles = context.getAllFiles();
+    return allFiles.some((f) => f.endsWith('.astro'));
+  },
+
+  resolve(ref: UnresolvedRef, context: ResolutionContext): ResolvedRef | null {
+    // Pattern 1: the `Astro` global (Astro.props, Astro.url, Astro.params, …)
+    // — runtime-provided in every component's frontmatter. Resolving it as
+    // framework-provided keeps it from name-matching a user symbol named Astro.
+    if (ref.referenceName === 'Astro' || ref.referenceName.startsWith('Astro.')) {
+      return {
+        original: ref,
+        targetNodeId: ref.fromNodeId,
+        confidence: 1.0,
+        resolvedBy: 'framework',
+      };
+    }
+
+    // Pattern 2: astro:* virtual module imports (astro:content, astro:assets, …)
+    if (ref.referenceKind === 'imports' && ref.referenceName.startsWith('astro:')) {
+      if (ASTRO_VIRTUAL_MODULES.some((prefix) => ref.referenceName.startsWith(prefix))) {
+        return {
+          original: ref,
+          targetNodeId: ref.fromNodeId,
+          confidence: 1.0,
+          resolvedBy: 'framework',
+        };
+      }
+    }
+
+    // Pattern 3: Component references (PascalCase) — resolve to component
+    // nodes. Template tags arrive as `references`, frontmatter expression
+    // usages as `calls`.
+    if (
+      isPascalCase(ref.referenceName) &&
+      (ref.referenceKind === 'references' || ref.referenceKind === 'calls')
+    ) {
+      const result = resolveComponent(ref.referenceName, ref.filePath, context);
+      if (result) {
+        return {
+          original: ref,
+          targetNodeId: result,
+          confidence: 0.8,
+          resolvedBy: 'framework',
+        };
+      }
+    }
+
+    return null;
+  },
+
+  extract(filePath: string, _content: string) {
+    const nodes: Node[] = [];
+    const now = Date.now();
+
+    // Normalize to forward slashes
+    const normalized = filePath.replace(/\\/g, '/');
+
+    // Astro file-based routing lives under src/pages/ — .astro files are
+    // pages, .ts/.js files are API endpoints. (.md/.mdx pages exist too but
+    // aren't indexed as source.) Underscore-prefixed segments are excluded
+    // from routing by Astro.
+    const pagesMatch = /(?:^|\/)src\/pages\//.exec(normalized);
+    if (pagesMatch && /\.(astro|ts|js|mjs)$/.test(normalized)) {
+      const afterPages = normalized.substring(pagesMatch.index + pagesMatch[0].length);
+      const base = afterPages.split('/').pop() || '';
+
+      // Underscore-prefixed segments are excluded from routing by Astro;
+      // a stray `*.config.*` in a pages dir is never a route.
+      if (
+        !afterPages.split('/').some((segment) => segment.startsWith('_')) &&
+        !/\.config\.[a-z]+$/.test(base)
+      ) {
+        const routePath = filePathToAstroRoute(afterPages);
+
+        nodes.push({
+          id: `route:${filePath}:${routePath}:1`,
+          kind: 'route',
+          name: routePath,
+          qualifiedName: `${filePath}::route:${routePath}`,
+          filePath,
+          startLine: 1,
+          endLine: 1,
+          startColumn: 0,
+          endColumn: 0,
+          language: normalized.endsWith('.astro') ? 'astro' : 'typescript',
+          updatedAt: now,
+        });
+      }
+    }
+
+    return { nodes, references: [] };
+  },
+};
+
+/**
+ * Check if string is PascalCase
+ */
+function isPascalCase(str: string): boolean {
+  return /^[A-Z][a-zA-Z0-9]*$/.test(str);
+}
+
+/**
+ * Resolve an Astro component reference using name-based lookup
+ */
+function resolveComponent(
+  name: string,
+  fromFile: string,
+  context: ResolutionContext
+): string | null {
+  // Look for component nodes by name
+  const candidates = context.getNodesByName(name);
+  const components = candidates.filter((n) => n.kind === 'component');
+
+  if (components.length === 0) return null;
+
+  // Prefer same directory
+  const fromDir = fromFile.substring(0, fromFile.lastIndexOf('/'));
+  const sameDir = components.filter((n) => n.filePath.startsWith(fromDir));
+  if (sameDir.length > 0) return sameDir[0]!.id;
+
+  // No positional signal: only an UNAMBIGUOUS name may resolve — picking
+  // components[0] would choose an arbitrary same-named component in a
+  // multi-app monorepo (#764). Ambiguity falls through to the name-matcher,
+  // whose proximity scoring decides.
+  return components.length === 1 ? components[0]!.id : null;
+}
+
+/**
+ * Convert a path under src/pages/ to an Astro route path.
+ *
+ * blog/[slug].astro        -> /blog/:slug
+ * blog/[...path].astro     -> /blog/*path
+ * api/posts.ts             -> /api/posts
+ * index.astro              -> /
+ */
+function filePathToAstroRoute(afterPages: string): string {
+  // Remove the extension
+  const withoutExt = afterPages.replace(/\.(astro|ts|js|mjs)$/, '');
+
+  // index files map to their parent path (index -> /, blog/index -> /blog)
+  const withoutIndex = withoutExt.replace(/(^|\/)index$/, '$1').replace(/\/$/, '');
+
+  // Convert Astro param syntax
+  const route = '/' + withoutIndex
+    .replace(/\[\.\.\.([^\]]+)\]/g, '*$1') // [...rest] -> *rest (catch-all)
+    .replace(/\[([^\]]+)\]/g, ':$1'); // [param] -> :param
+
+  if (route === '/') return '/';
+  // Remove trailing slash
+  return route.replace(/\/$/, '');
+}
diff --git a/src/resolution/frameworks/index.ts b/src/resolution/frameworks/index.ts
index 88bf205e6..4fc3c3a5b 100644
--- a/src/resolution/frameworks/index.ts
+++ b/src/resolution/frameworks/index.ts
@@ -13,6 +13,7 @@ import { nestjsResolver } from './nestjs';
 import { reactResolver } from './react';
 import { svelteResolver } from './svelte';
 import { vueResolver } from './vue';
+import { astroResolver } from './astro';
 import { djangoResolver, flaskResolver, fastapiResolver } from './python';
 import { railsResolver } from './ruby';
 import { springResolver } from './java';
@@ -39,6 +40,7 @@ const FRAMEWORK_RESOLVERS: FrameworkResolver[] = [
   reactResolver,
   svelteResolver,
   vueResolver,
+  astroResolver,
   // Python
   djangoResolver,
   flaskResolver,
@@ -128,6 +130,7 @@ export { nestjsResolver } from './nestjs';
 export { reactResolver } from './react';
 export { svelteResolver } from './svelte';
 export { vueResolver } from './vue';
+export { astroResolver } from './astro';
 export { djangoResolver, flaskResolver, fastapiResolver } from './python';
 export { railsResolver } from './ruby';
 export { springResolver } from './java';
diff --git a/src/resolution/import-resolver.ts b/src/resolution/import-resolver.ts
index 64930ddf0..716e10880 100644
--- a/src/resolution/import-resolver.ts
+++ b/src/resolution/import-resolver.ts
@@ -24,6 +24,7 @@ const EXTENSION_RESOLUTION: Record<string, string[]> = {
   // `.svelte`/`.vue` file resolve to nothing, so barrel callers vanish (#629).
   svelte: ['.ts', '.js', '.svelte', '.tsx', '.jsx', '/index.ts', '/index.js', '/index.svelte'],
   vue: ['.ts', '.js', '.vue', '.tsx', '.jsx', '/index.ts', '/index.js', '/index.vue'],
+  astro: ['.ts', '.js', '.astro', '.tsx', '.jsx', '/index.ts', '/index.js', '/index.astro'],
   python: ['.py', '/__init__.py'],
   go: ['.go'],
   rust: ['.rs', '/mod.rs'],
@@ -582,9 +583,10 @@ export function extractImportMappings(
 
   if (language === 'typescript' || language === 'javascript' || language === 'tsx' || language === 'jsx') {
     mappings.push(...extractJSImports(content));
-  } else if (language === 'svelte' || language === 'vue') {
+  } else if (language === 'svelte' || language === 'vue' || language === 'astro') {
     // Svelte/Vue single-file components import via plain ES6 inside their
-    // `<script>` block. Without this, a `.svelte`/`.vue` consumer produces
+    // `<script>` block (Astro: the `---` frontmatter). Without this, a
+    // `.svelte`/`.vue`/`.astro` consumer produces
     // zero import mappings, so `resolveViaImport` can't run and a barrel
     // import (`import { Foo } from './lib'`) falls back to name-matching —
     // which silently fails whenever the re-export alias differs from the
diff --git a/src/types.ts b/src/types.ts
index be3452d97..e57a74229 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -83,6 +83,7 @@ export const LANGUAGES = [
   'dart',
   'svelte',
   'vue',
+  'astro',
   'liquid',
   'pascal',
   'scala',

From 0682681175b82178a3bcc789bd6a186fab38ba58 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 19:39:16 -0500
Subject: [PATCH 43/51] =?UTF-8?q?chore(agent-eval):=20standing=20A/B=20mod?=
 =?UTF-8?q?el=20policy=20=E2=80=94=20sonnet=20+=20high=20effort,=20never?=
 =?UTF-8?q?=20Opus/Fable=20(#816)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All agent A/B arms now run claude --model sonnet --effort high by default
(MODEL/EFFORT env overrides exist). Sonnet is the deliberate floor model:
codegraph's users attach whatever host they already run (Cursor Composer,
Gemini, ...), and a stronger model's tool-use masks the salience problems a
weaker one exposes — what lands on Sonnet generalizes up; Opus/Fable-only
wins don't generalize down. Policy recorded in CLAUDE.md's validation
methodology; 11 hardcoded --model opus call sites across 9 eval scripts
switched to the env-overridable default.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 CLAUDE.md                                | 1 +
 scripts/agent-eval/ab-adoption.sh        | 2 +-
 scripts/agent-eval/ab-hook.sh            | 2 +-
 scripts/agent-eval/ab-impl.sh            | 2 +-
 scripts/agent-eval/ab-new-vs-baseline.sh | 2 +-
 scripts/agent-eval/ab-sufficiency.sh     | 2 +-
 scripts/agent-eval/bench-why-repo.sh     | 2 +-
 scripts/agent-eval/run-agent.sh          | 2 +-
 scripts/agent-eval/run-all.sh            | 8 +++++---
 scripts/agent-eval/run-arms.sh           | 2 +-
 10 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index bad199f20..18310031c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -137,6 +137,7 @@ For each **language × framework**, validate on **small, medium, and large** rea
 1. **Pick the canonical flow** for the framework ("how does X reach Y": state→render, request→handler→view, query→SQL, action→reducer→store…).
 2. **Deterministic probes** (`scripts/agent-eval/probe-{node,explore}.mjs` against the built `dist/`): `codegraph_explore` with the flow's symbol names connects from→to end-to-end with no break (its Flow section shows the path); **no node explosion** (`select count(*) from nodes` stable before/after re-index); synthesized-edge **precision** spot-check (`select … where provenance='heuristic'`).
 3. **Agent A/B** (`scripts/agent-eval/run-all.sh <repo> "<Q>"`): with vs without codegraph, **≥2 runs/arm** (run-to-run variance is large — never conclude from n=1). Record **duration, total tool calls, Read, Grep**. Optional forced-Read-0 sufficiency proof via the block-read hook (`scripts/agent-eval/hook-settings.json`).
+   - **Model policy — every A/B arm runs Claude with `--model sonnet --effort high`. Always. Never Opus/Fable.** All `scripts/agent-eval/*.sh` default to this (`MODEL`/`EFFORT` env override exists — don't raise it without an explicit reason from the maintainer). Two reasons, and the second matters more than cost: (a) Sonnet doesn't burn tokens; (b) **Sonnet is the deliberate floor model** — codegraph's real users attach it to whatever agent they already run (Cursor Composer, Gemini, etc.), so we validate on a "dumber" model on purpose: a stronger model's tool-use covers up the salience/sufficiency problems a weaker one exposes. An affordance that lands on Sonnet generalizes up to every host; one that only works on Opus/Fable doesn't generalize down to the agents most users actually have. Both arms always use the same model.
    - **MCP attach is a startup-latency issue, not a hard block.** On a multi-step task the agent dives into Read/grep before codegraph finishes its ~2-3s startup (worse when the eval is itself run nested inside a Claude session, under CPU contention), so it runs with no codegraph. Fix: **pre-warm a persistent daemon** for the target (`CODEGRAPH_DAEMON_IDLE_TIMEOUT_MS` high; spawn `serve --mcp --path <target> </dev/null &`; wait for `.codegraph/daemon.sock`) **and skip the startup re-exec** (`CODEGRAPH_WASM_RELAUNCHED=1`) so claude connects before the agent's first turn. Don't trust claude's `init` snapshot — it can read `status:"pending"` / 0 tools even when it then connects; judge by actual codegraph usage in `parse-run.mjs`'s `by type`. To isolate a change — **new-build vs baseline-build, both codegraph-on** (vs run-all.sh's with-vs-without) — use `scripts/agent-eval/ab-new-vs-baseline.sh <indexed-repo> "<task>" [baseline-ref]` (it bakes in the pre-warm).
 4. **Pass bar:** a normal flow question reaches **~0 Read/Grep within the repo's explore-call budget**, runs **faster** than without-codegraph, and shows **no regression on a control repo**. Record the numbers in `docs/design/dynamic-dispatch-coverage-playbook.md` (the coverage matrix).
 
diff --git a/scripts/agent-eval/ab-adoption.sh b/scripts/agent-eval/ab-adoption.sh
index eabf802c3..d5c6dc222 100644
--- a/scripts/agent-eval/ab-adoption.sh
+++ b/scripts/agent-eval/ab-adoption.sh
@@ -71,7 +71,7 @@ run_arm() { # label, N
     echo "----- [$label] run $i -----"
     ( cd "$tgt" && claude -p "$TASK" \
         --output-format stream-json --verbose --permission-mode bypassPermissions \
-        --model opus --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
+        --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
         </dev/null > "$OUT/run-$label-$i.jsonl" 2>"$OUT/run-$label-$i.err" )
     count "$OUT/run-$label-$i.jsonl"
     pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
diff --git a/scripts/agent-eval/ab-hook.sh b/scripts/agent-eval/ab-hook.sh
index 8c1af32a3..0e46fccbd 100644
--- a/scripts/agent-eval/ab-hook.sh
+++ b/scripts/agent-eval/ab-hook.sh
@@ -72,7 +72,7 @@ run_one() { # arm-label, run-index, use-hook(0|1)
   # array expansion otherwise, which would skip the no-hook arm's claude run.
   ( cd "$tgt" && claude -p "$TASK" \
       --output-format stream-json --verbose --permission-mode bypassPermissions \
-      --model opus --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" ${extra[@]+"${extra[@]}"} \
+      --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" ${extra[@]+"${extra[@]}"} \
       </dev/null > "$OUT/run-$label-$idx.jsonl" 2>"$OUT/run-$label-$idx.err" )
   node "$PARSE" "$OUT/run-$label-$idx.jsonl" 2>&1 | grep -E "by type|Result" || echo "  (parse failed — see $OUT/run-$label-$idx.jsonl)"
   pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
diff --git a/scripts/agent-eval/ab-impl.sh b/scripts/agent-eval/ab-impl.sh
index c5c23b58e..b6f219b14 100644
--- a/scripts/agent-eval/ab-impl.sh
+++ b/scripts/agent-eval/ab-impl.sh
@@ -64,7 +64,7 @@ run(){ # label, withCodegraph(0/1)
       prewarm "$tgt"
     else cp "$OUT/mcp-empty.json" "$cfg"; fi
     ( cd "$tgt" && claude -p "$Q" --output-format stream-json --verbose \
-        --permission-mode bypassPermissions --model opus --max-budget-usd 4 \
+        --permission-mode bypassPermissions --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
         --strict-mcp-config --mcp-config "$cfg" </dev/null > "$OUT/$label-$i.jsonl" 2>"$OUT/$label-$i.err" )
     echo "[$label] run $i:"; analyze "$OUT/$label-$i.jsonl"
     if [ -n "$BUILD_CMD" ]; then ( cd "$tgt" && eval "$BUILD_CMD" >/dev/null 2>&1 && echo "      build: PASS" || echo "      build: FAIL" ); fi
diff --git a/scripts/agent-eval/ab-new-vs-baseline.sh b/scripts/agent-eval/ab-new-vs-baseline.sh
index 7e5cc84e5..b7fe4e768 100755
--- a/scripts/agent-eval/ab-new-vs-baseline.sh
+++ b/scripts/agent-eval/ab-new-vs-baseline.sh
@@ -75,7 +75,7 @@ run_arm() { # label, target-copy
   echo "############## ARM [$label] ##############"
   ( cd "$tgt" && claude -p "$TASK" \
       --output-format stream-json --verbose --permission-mode bypassPermissions \
-      --model opus --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
+      --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 --strict-mcp-config --mcp-config "$c" \
       </dev/null > "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
   node "$PARSE" "$OUT/run-$label.jsonl" 2>&1 | grep -E "by type|Result" || echo "  (parse failed — see $OUT/run-$label.jsonl)"
   pkill -9 -f "serve --mcp --path $tgt" 2>/dev/null
diff --git a/scripts/agent-eval/ab-sufficiency.sh b/scripts/agent-eval/ab-sufficiency.sh
index 066253657..3db4ff1f6 100644
--- a/scripts/agent-eval/ab-sufficiency.sh
+++ b/scripts/agent-eval/ab-sufficiency.sh
@@ -66,7 +66,7 @@ run(){ # label, cfg, prewarm(0/1)
   for i in $(seq 1 "$RUNS"); do
     [ "$pw" = "1" ] && prewarm
     ( cd "$TGT" && claude -p "$Q" --output-format stream-json --verbose \
-        --permission-mode bypassPermissions --model opus --max-budget-usd 4 \
+        --permission-mode bypassPermissions --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
         --strict-mcp-config --mcp-config "$cfg" </dev/null > "$OUT/$label-$i.jsonl" 2>"$OUT/$label-$i.err" )
     echo "[$label] run $i:"; analyze "$OUT/$label-$i.jsonl"
   done
diff --git a/scripts/agent-eval/bench-why-repo.sh b/scripts/agent-eval/bench-why-repo.sh
index 2bbedf8fc..2e26a2ffc 100644
--- a/scripts/agent-eval/bench-why-repo.sh
+++ b/scripts/agent-eval/bench-why-repo.sh
@@ -15,7 +15,7 @@ printf '{"mcpServers":{"codegraph":{"command":"%s","args":["serve","--mcp","--pa
 for i in $(seq 1 "$N"); do
   pkill -f "serve --mcp" 2>/dev/null; sleep 1; rm -f "$REPO/.codegraph/daemon.sock"
   ( cd "$REPO" && claude -p "$Q$WHY" --output-format stream-json --verbose \
-      --permission-mode bypassPermissions --model opus --effort "${EFFORT:-high}" --max-budget-usd 4 \
+      --permission-mode bypassPermissions --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4 \
       --strict-mcp-config --mcp-config "$OUT/cg.json" > "$OUT/with$i.jsonl" 2>"$OUT/with$i.err" )
   echo "WITH run $i: exit $? ($(wc -l < "$OUT/with$i.jsonl" | tr -d ' ') lines)"
 done
diff --git a/scripts/agent-eval/run-agent.sh b/scripts/agent-eval/run-agent.sh
index b599c43b3..abbee8726 100755
--- a/scripts/agent-eval/run-agent.sh
+++ b/scripts/agent-eval/run-agent.sh
@@ -25,7 +25,7 @@ cd "$REPO" || exit 1
 claude -p "$PROMPT" \
   --output-format stream-json --verbose \
   --permission-mode bypassPermissions \
-  --model opus \
+  --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" \
   --max-budget-usd 2 \
   --strict-mcp-config --mcp-config "$MCP_CONFIG" \
   > "$OUT" 2>"$OUT_DIR/run-${LABEL}.err"
diff --git a/scripts/agent-eval/run-all.sh b/scripts/agent-eval/run-all.sh
index 4b40dce9c..b68292591 100755
--- a/scripts/agent-eval/run-all.sh
+++ b/scripts/agent-eval/run-all.sh
@@ -7,6 +7,8 @@
 # Usage: run-all.sh <repo-path> "<question>" [headless|tmux|all]
 # Env:   CG_BIN          codegraph binary (default: command -v codegraph)
 #        AGENT_EVAL_OUT  output dir (default: /tmp/agent-eval)
+#        MODEL / EFFORT  claude model/effort (default: sonnet / high — the
+#                        standing A/B policy; see CLAUDE.md, don't raise)
 set -uo pipefail
 
 REPO="${1:?usage: run-all.sh <repo-path> \"<question>\" [headless|tmux|all]}"
@@ -39,7 +41,7 @@ headless() {
   ( cd "$REPO" && claude -p "$Q" \
       --output-format stream-json --verbose \
       --permission-mode bypassPermissions \
-      --model opus \
+      --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" \
       --max-budget-usd 4 \
       --strict-mcp-config --mcp-config "$cfg" \
       > "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
@@ -56,11 +58,11 @@ fi
 
 if [ "$MODE" = tmux ] || [ "$MODE" = all ]; then
   echo "############################## INTERACTIVE [with] ##############################"
-  CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-codegraph.json" \
+  CLAUDE_EXTRA_ARGS="--model ${MODEL:-sonnet} --effort ${EFFORT:-high} --strict-mcp-config --mcp-config $OUT/mcp-codegraph.json" \
     bash "$HARNESS/itrun.sh" "$REPO" "int-with" "$Q" 2>&1 || echo "[itrun WITH failed]"
   echo
   echo "############################## INTERACTIVE [without] ##############################"
-  CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-empty.json" \
+  CLAUDE_EXTRA_ARGS="--model ${MODEL:-sonnet} --effort ${EFFORT:-high} --strict-mcp-config --mcp-config $OUT/mcp-empty.json" \
     bash "$HARNESS/itrun.sh" "$REPO" "int-without" "$Q" 2>&1 || echo "[itrun WITHOUT failed]"
   echo
 fi
diff --git a/scripts/agent-eval/run-arms.sh b/scripts/agent-eval/run-arms.sh
index af3da6dc5..48d4cf856 100755
--- a/scripts/agent-eval/run-arms.sh
+++ b/scripts/agent-eval/run-arms.sh
@@ -48,7 +48,7 @@ fi
 
 LOG="$OUT/$ARM-r$RID.jsonl"; ERR="$OUT/$ARM-r$RID.err"
 ARGS=( -p "$Q" --output-format stream-json --verbose
-       --permission-mode bypassPermissions --model opus --max-budget-usd 4
+       --permission-mode bypassPermissions --model "${MODEL:-sonnet}" --effort "${EFFORT:-high}" --max-budget-usd 4
        --strict-mcp-config --mcp-config "$CFG" )
 [ -n "$STEERING" ] && ARGS+=( --append-system-prompt "$STEERING" )
 

From f9fcc2cd6a49aa5fc12664f7fb438561cd45daa0 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 20:03:26 -0500
Subject: [PATCH 44/51] =?UTF-8?q?feat(mcp):=20unindexed=20sessions=20go=20?=
 =?UTF-8?q?quiet=20=E2=80=94=20empty=20tools/list=20+=20inactive=20instruc?=
 =?UTF-8?q?tions,=20no-error=20policy=20(#769)=20(#817)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An MCP session in a workspace with no .codegraph/ previously got the full
"lean on codegraph for everything" playbook plus all 8 tools, then every
call returned isError — and one or two early errors teach an agent to
abandon codegraph for the whole session (maintainer-observed). Now the
initialize response picks an instructions variant by index state (cheap
sync walk-up, #172 respond-fast contract holds) and tools/list serves an
EMPTY list when unindexed: absence is the one signal an agent can't
misread. Indexing is deliberately the user's call — the inactive note
tells the agent not to run init itself.

No-error policy in the tool handler: expected/recoverable conditions
(NotIndexedError — cross-project query to an unindexed path, default-
project detection miss) return SUCCESS-shaped guidance instead of
isError; security refusals (PathRefusalError) stay hard errors without
retry encouragement; genuine internal failures keep isError but add a
retry-once note so a transient blip doesn't convert to permanent
abandonment. Principle recorded in CLAUDE.md.

Also: codegraph_search kind:"type" (advertised by its own schema enum)
silently matched nothing — now maps to type_alias; codegraph_explore's
query param no longer tells agents to run codegraph_search first
(contradicted explore's call-FIRST design); server-instructions
§Limitations rewords the unindexed case to stay-out-for-the-session.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                    |   2 +
 CLAUDE.md                       |   1 +
 __tests__/mcp-unindexed.test.ts | 212 ++++++++++++++++++++++++++++++++
 src/mcp/server-instructions.ts  |  23 +++-
 src/mcp/session.ts              |  26 +++-
 src/mcp/tools.ts                |  60 +++++++--
 6 files changed, 312 insertions(+), 12 deletions(-)
 create mode 100644 __tests__/mcp-unindexed.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 570b17537..0c0289850 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- **CodeGraph now goes quiet instead of failing loudly in unindexed projects.** When an AI agent's session starts in a workspace that has no CodeGraph index, the MCP server now announces itself as inactive with a short note and lists no tools at all — instead of presenting the full toolset and erroring on every call, which taught agents to distrust CodeGraph even where it works. Querying another project that isn't indexed likewise returns clear guidance (use your regular tools for that codebase; the user can run `codegraph init` there to enable CodeGraph) instead of an error, and genuine internal errors now tell the agent to retry once rather than give up on CodeGraph entirely. Indexing stays your decision — agents are told not to run it themselves. (#769)
 - **Astro projects are now indexed.** `.astro` files previously weren't parsed at all — on a typical Astro site that left most of the codebase invisible to search, impact, and `codegraph_explore`. CodeGraph now extracts the TypeScript frontmatter (functions, imports, `getStaticPaths`, …) and client-side `<script>` blocks, captures function calls and `<Component>` usages in template markup so cross-component dependencies trace end-to-end, resolves the `Astro` global and `astro:*` module imports as framework-provided, and maps `src/pages/` file-based routing to route nodes (`.astro` pages and `.ts` endpoints, including `[param]` and `[...rest]` dynamic segments, with underscore-prefixed files correctly excluded). Validated on two real-world Astro sites with 93% measured cross-file coverage and every page mapping to its route. Thanks @xingwangzhe. (#768) (Astro)
 - Same-named symbols across a monorepo's apps are no longer conflated. In a NestJS-style workspace with one `UserService` per app, `codegraph_callers`, `codegraph_callees`, and `codegraph_impact` now report **one section per distinct definition** — each app's callers and blast radius under its own file-labeled heading — instead of a single merged list, and accept a `file` argument to focus exactly the definition you mean (like `codegraph_node` already did). Impact in particular no longer overstates a change's blast radius by merging unrelated same-named classes. Thanks @Igorgro. (#764)
 - Fixed a related source of cross-package wrong edges: PascalCase **type references from plain `.ts` files were being resolved as React components**, which could link a file's own type alias to an arbitrary same-named class in another package (on one large monorepo this produced over a thousand wrong cross-package reference edges; 96% are now gone, and the remainder are genuine shared-model imports). Component resolution now applies only to references from JSX-capable files and never guesses between multiple candidates without a positional signal. The **Svelte and Vue component resolvers had the same arbitrary-pick flaw** (Vue resolved the first same-named `.vue` file found anywhere in the tree) and now follow the same rule: same-directory first, otherwise only an unambiguous name resolves. Re-index a project to benefit. (#764) (TypeScript, React, Svelte, Vue)
@@ -39,6 +40,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Fixes
 
+- The `codegraph_search` tool's `kind: "type"` filter — a value its own schema advertises — silently matched nothing; it now correctly finds type aliases. The `codegraph_explore` tool's parameter guidance also no longer suggests running `codegraph_search` first, which contradicted explore's call-it-first design and cost agents an extra round-trip.
 - Symbols defined in Svelte and Vue `<script>` blocks were reported one line below where they actually are — a function on line 3 was reported at line 4 — which offset every script-block symbol's location in search, `codegraph_node`, and explore output. Line numbers now match the file exactly. Re-index a project to benefit. (Svelte, Vue)
 - Doc comments are now captured for exported, `const`-assigned, and decorated declarations, and the documentation a symbol carries is now clean across every supported language. Previously a comment above `export class X`, `export const fn = () => …`, a plain `const fn = () => …`, or a decorated Python `def`/`class` (`@app.route(...)`, `@dataclass`) was dropped entirely — only comments directly above a plain declaration were kept. CodeGraph now finds the comment through the `export` / `const` / decorator wrapper. Comment-marker cleanup was also rounded out for every language CodeGraph supports: Rust/Swift/Kotlin doc lines (`///`, `//!`), Python/Ruby/shell `#`, Lua/Luau (`--` and `--[[ ]]`), and Pascal (`{ }` and `(* *)`) no longer leave stray markers in the stored text — validated end-to-end across all 19 code languages plus Svelte/Vue `<script>` blocks. (#780). Thanks @caleb-kaiser.
 - Go method calls made through a chained factory function now resolve to the correct type. A call like `New().Method()` used to drop the receiver, so the chained method attached to a same-named method on an unrelated type — or didn't resolve. CodeGraph now captures Go return types (a pointer `*Foo` resolves to `Foo`, and a multi-return `(*Foo, error)` to its first result), infers the chained receiver's type from what the factory function returns, and resolves the method on it — including methods promoted from an embedded struct — creating the edge only when the type or an embedded type genuinely has the method. Existing Go indexes should be re-indexed (`codegraph index -f`) to benefit. (#750) (Go)
diff --git a/CLAUDE.md b/CLAUDE.md
index 18310031c..6fe116de0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -104,6 +104,7 @@ CodeGraph's only channels to influence the agent are low-salience: the MCP `init
 What works is meeting the agent where it already is:
 - **explore-flow** — `codegraph_explore` is the PRIMARY tool the agent reliably calls; its query is a precise bag of symbol names (incl. qualified `Class.method`) spanning the flow the agent is after; explore finds the call path _among those named symbols_ (riding synthesized edges) and leads its output with it. (`buildFlowFromNamedSymbols`: segment/co-naming disambiguation; ≤1 unnamed bridge so it never wanders a god-function's fan-out. Overload-aware: a PascalCase type token in the query biases an overloaded name to that type's own def — `DataRequest task` → DataRequest's `task`, not the abstract base; named-symbol files sort first.)
 - **Sufficiency** — make the tool's output complete enough that the agent stops. `codegraph_node` returns the full body + the caller/callee trail, and for an AMBIGUOUS name returns **every overload's body in one call** (so the agent never Reads a file to find the right overload — validated on Alamofire/gin). This is the after-explore depth tool (labeled SECONDARY).
+- **Errors teach abandonment** — one or two `isError: true` responses early in a session and the agent stops calling codegraph entirely (maintainer-observed, repeatedly). `isError` is reserved for genuine "stop trying" cases: security refusals (`PathRefusalError`) and real malfunctions (which carry a retry-once note). Every expected/recoverable condition — project not indexed, symbol not found, file not in the index — returns a **SUCCESS-shaped response carrying the guidance** (`NotIndexedError` → `textResult`, see `ToolHandler.execute`'s catch). The same principle session-wide: an **unindexed workspace serves an empty `tools/list` + a 2-line "inactive" instructions variant** instead of 8 tools that all fail — absence is the one signal an agent can't misread, and indexing is deliberately the user's call, never the agent's.
 
 What fails is the inverse — folding a precise answer into a **fuzzy-input** tool: the now-removed `codegraph_context` took a description, not symbols, so it couldn't disambiguate a flow's endpoints and surfaced the _wrong feature_ (which is why it was cut). Precise output needs precise input — explore takes a symbol bag for exactly this reason. (`codegraph_trace` was likewise removed: explore-flow does its job and the agent under-picked it.)
 
diff --git a/__tests__/mcp-unindexed.test.ts b/__tests__/mcp-unindexed.test.ts
new file mode 100644
index 000000000..52b4d1ccb
--- /dev/null
+++ b/__tests__/mcp-unindexed.test.ts
@@ -0,0 +1,212 @@
+/**
+ * Unindexed-workspace session policy tests.
+ *
+ * An MCP session attached to a workspace with no .codegraph/ must go quiet
+ * rather than fail loudly: `initialize` returns the short "inactive"
+ * instructions variant (not the full playbook), `tools/list` returns an
+ * EMPTY list, and a tool call that still arrives (cross-project
+ * `projectPath`, or a host that skips tools/list) answers with a
+ * SUCCESS-shaped guidance message — never `isError: true`. One or two early
+ * isError responses teach an agent to abandon codegraph for the whole
+ * session; that observed failure mode is what this suite guards.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { spawn, ChildProcessWithoutNullStreams } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { CodeGraph } from '../src';
+import { ToolHandler } from '../src/mcp/tools';
+
+const BIN = path.resolve(__dirname, '../dist/bin/codegraph.js');
+
+function spawnServer(cwd: string): ChildProcessWithoutNullStreams {
+  return spawn(process.execPath, [BIN, 'serve', '--mcp'], {
+    cwd,
+    stdio: ['pipe', 'pipe', 'pipe'],
+    // Direct (in-process) mode — the unindexed path never has a daemon
+    // anyway (the daemon socket lives in .codegraph/), and this keeps the
+    // suite from leaking a detached daemon in the indexed test.
+    env: { ...process.env, CODEGRAPH_NO_DAEMON: '1' },
+  }) as ChildProcessWithoutNullStreams;
+}
+
+/** Send a JSON-RPC request and resolve with the response matching its id. */
+function request(
+  child: ChildProcessWithoutNullStreams,
+  msg: { id: number; method: string; params?: unknown },
+  timeoutMs = 15000
+): Promise<Record<string, unknown>> {
+  return new Promise((resolve, reject) => {
+    let buf = '';
+    const timer = setTimeout(() => {
+      child.stdout.off('data', onData);
+      reject(new Error(`timeout waiting for response id=${msg.id}`));
+    }, timeoutMs);
+    const onData = (chunk: Buffer) => {
+      buf += chunk.toString();
+      let idx: number;
+      while ((idx = buf.indexOf('\n')) !== -1) {
+        const line = buf.slice(0, idx).trim();
+        buf = buf.slice(idx + 1);
+        if (!line) continue;
+        try {
+          const parsed = JSON.parse(line) as Record<string, unknown>;
+          if (parsed.id === msg.id) {
+            clearTimeout(timer);
+            child.stdout.off('data', onData);
+            resolve(parsed);
+            return;
+          }
+        } catch {
+          // non-JSON noise on stdout — ignore
+        }
+      }
+    };
+    child.stdout.on('data', onData);
+    child.stdin.write(JSON.stringify({ jsonrpc: '2.0', ...msg }) + '\n');
+  });
+}
+
+function initializeParams(projectPath: string) {
+  return {
+    protocolVersion: '2025-11-25',
+    capabilities: {},
+    clientInfo: { name: 'test', version: '0.0.0' },
+    rootUri: `file://${projectPath}`,
+  };
+}
+
+describe('Unindexed-workspace session policy', () => {
+  let tempDir: string;
+  let child: ChildProcessWithoutNullStreams | null = null;
+
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-unindexed-'));
+  });
+
+  afterEach(() => {
+    if (child) {
+      child.kill('SIGKILL');
+      child = null;
+    }
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it('initialize returns the short "inactive" instructions, not the playbook', async () => {
+    fs.writeFileSync(path.join(tempDir, 'index.ts'), 'export const x = 1;\n');
+    child = spawnServer(tempDir);
+
+    const res = await request(child, { id: 0, method: 'initialize', params: initializeParams(tempDir) });
+    const instructions = (res.result as { instructions: string }).instructions;
+
+    expect(instructions).toMatch(/inactive/i);
+    expect(instructions).toMatch(/codegraph init/);
+    // The full playbook must NOT be sent into a session where every call fails
+    expect(instructions).not.toMatch(/Tool selection by intent/);
+    expect(instructions).not.toMatch(/codegraph_explore/);
+  });
+
+  it('tools/list returns an EMPTY list when the workspace has no index', async () => {
+    child = spawnServer(tempDir);
+    await request(child, { id: 0, method: 'initialize', params: initializeParams(tempDir) });
+
+    const res = await request(child, { id: 1, method: 'tools/list' });
+    expect((res.result as { tools: unknown[] }).tools).toEqual([]);
+  });
+
+  it('an INDEXED workspace still gets the full playbook and all tools', async () => {
+    fs.writeFileSync(path.join(tempDir, 'index.ts'), 'export function hello(): string { return "hi"; }\n');
+    const cg = await CodeGraph.init(tempDir, { index: true });
+    cg.close();
+
+    child = spawnServer(tempDir);
+    const init = await request(child, { id: 0, method: 'initialize', params: initializeParams(tempDir) });
+    const instructions = (init.result as { instructions: string }).instructions;
+    expect(instructions).toMatch(/Tool selection by intent/);
+    expect(instructions).not.toMatch(/inactive/i);
+
+    const list = await request(child, { id: 1, method: 'tools/list' });
+    const tools = (list.result as { tools: Array<{ name: string }> }).tools;
+    // A 1-file project triggers the pre-existing tiny-repo tool gating (a
+    // reduced core set) — the contract under test is "indexed → tools are
+    // PRESENT", in contrast to the unindexed empty list above.
+    expect(tools.length).toBeGreaterThanOrEqual(3);
+    expect(tools.map((t) => t.name)).toContain('codegraph_explore');
+  });
+});
+
+describe('No-error policy on expected conditions', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noerror-'));
+  });
+
+  afterEach(() => {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it('cross-project query to an unindexed path is SUCCESS-shaped guidance, not isError', async () => {
+    const res = await new ToolHandler(null).execute('codegraph_search', {
+      query: 'anything',
+      projectPath: tempDir,
+    });
+
+    expect(res.isError).toBeUndefined();
+    expect(res.content[0]!.text).toMatch(/isn't indexed/);
+    expect(res.content[0]!.text).toMatch(/codegraph init/);
+    expect(res.content[0]!.text).toMatch(/built-in tools/);
+  });
+
+  it('no-default-project (working-directory detection miss) is SUCCESS-shaped guidance', async () => {
+    const res = await new ToolHandler(null).execute('codegraph_search', { query: 'anything' });
+
+    expect(res.isError).toBeUndefined();
+    expect(res.content[0]!.text).toMatch(/No CodeGraph project is loaded/);
+    expect(res.content[0]!.text).toMatch(/projectPath/);
+  });
+
+  it.runIf(process.platform !== 'win32')(
+    'sensitive-path refusal stays a hard error (no retry encouragement)',
+    async () => {
+      const res = await new ToolHandler(null).execute('codegraph_search', {
+        query: 'anything',
+        projectPath: '/etc',
+      });
+
+      expect(res.isError).toBe(true);
+      expect(res.content[0]!.text).not.toMatch(/retry the call once/);
+    }
+  );
+});
+
+describe('search kind filter', () => {
+  let tempDir: string;
+  let cg: CodeGraph;
+
+  beforeEach(async () => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-kind-'));
+    fs.writeFileSync(
+      path.join(tempDir, 'types.ts'),
+      'export type PaymentMethod = { id: string };\nexport function pay(): void {}\n'
+    );
+    cg = await CodeGraph.init(tempDir, { index: true });
+  });
+
+  afterEach(() => {
+    cg.close();
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it("kind: 'type' (the advertised enum value) finds type aliases", async () => {
+    const res = await new ToolHandler(cg).execute('codegraph_search', {
+      query: 'PaymentMethod',
+      kind: 'type',
+    });
+
+    expect(res.isError).toBeUndefined();
+    expect(res.content[0]!.text).toMatch(/PaymentMethod/);
+    expect(res.content[0]!.text).not.toMatch(/No results found/);
+  });
+});
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
index 3e40d2c8d..1e5503614 100644
--- a/src/mcp/server-instructions.ts
+++ b/src/mcp/server-instructions.ts
@@ -71,8 +71,29 @@ typically one to a few calls; a grep/read exploration is dozens.
 
 ## Limitations
 
-- If a tool reports the project isn't initialized, \`.codegraph/\` doesn't exist yet — offer to run \`codegraph init -i\` to build the index.
+- If a tool reports a project isn't indexed (no \`.codegraph/\`), stop calling codegraph tools for that project for the rest of the session and use your built-in tools there instead. Indexing is the user's decision — mention they can run \`codegraph init\` if it comes up, but don't run it yourself.
 - Index lags file writes by ~1 second.
 - Cross-file resolution is best-effort name matching; ambiguous calls may return multiple candidates.
 - No live correctness validation — that's still the TypeScript compiler / test suite / linter's job. Codegraph supplements those with structural context they don't have.
 `;
+
+/**
+ * Instructions variant sent when the workspace has NO codegraph index.
+ *
+ * Sending the full playbook ("lean on codegraph for everything") into a
+ * session where every call would fail wastes the agent's calls and — worse —
+ * the failures teach it codegraph is broken. The unindexed variant is a
+ * short, unambiguous "inactive this session" note; `tools/list` is gated to
+ * empty in the same state, so the agent has nothing to mis-call. Indexing is
+ * deliberately left to the user: the agent is told NOT to run init itself.
+ */
+export const SERVER_INSTRUCTIONS_UNINDEXED = `# Codegraph — inactive (workspace not indexed)
+
+This workspace has no codegraph index (no \`.codegraph/\` directory), so no
+codegraph tools are available this session. Work with your built-in tools as
+usual.
+
+Indexing is the user's decision — do not run it yourself. If the user asks
+about codegraph, they can enable it by running \`codegraph init\` in the
+project root and starting a new session.
+`;
diff --git a/src/mcp/session.ts b/src/mcp/session.ts
index 3b83e24a5..f2b193c12 100644
--- a/src/mcp/session.ts
+++ b/src/mcp/session.ts
@@ -16,8 +16,9 @@ import * as path from 'path';
 import { JsonRpcRequest, JsonRpcNotification, JsonRpcTransport, ErrorCodes } from './transport';
 import { MCPEngine } from './engine';
 import { tools } from './tools';
-import { SERVER_INSTRUCTIONS } from './server-instructions';
+import { SERVER_INSTRUCTIONS, SERVER_INSTRUCTIONS_UNINDEXED } from './server-instructions';
 import { CodeGraphPackageVersion } from './version';
+import { findNearestCodeGraphRoot } from '../directory';
 
 /**
  * MCP Server Info — kept on the session because some clients log it. The
@@ -178,12 +179,24 @@ export class MCPSession {
       explicitPath = this.explicitProjectPath;
     }
 
+    // Pick the instructions variant by the workspace's index state — a cheap
+    // synchronous walk-up (existsSync loop only, no DB open, so the #172
+    // respond-fast contract holds). An unindexed workspace gets the short
+    // "inactive this session" note instead of the full playbook: the playbook
+    // tells the agent to lean on tools that would all fail, and early failures
+    // teach the agent to abandon codegraph entirely. `tools/list` is gated the
+    // same way (empty list when unindexed). When no explicit path is known yet
+    // (roots/list dance pending), cwd is the best predictor of where the
+    // default project will resolve — and on a mismatch the worst case is the
+    // optimistic full playbook backstopped by the empty tool list.
+    const indexed = findNearestCodeGraphRoot(explicitPath ?? process.cwd()) !== null;
+
     // Respond to the handshake BEFORE doing any heavy init — see issue #172.
     this.transport.sendResult(request.id, {
       protocolVersion: PROTOCOL_VERSION,
       capabilities: { tools: {} },
       serverInfo: SERVER_INFO,
-      instructions: SERVER_INSTRUCTIONS,
+      instructions: indexed ? SERVER_INSTRUCTIONS : SERVER_INSTRUCTIONS_UNINDEXED,
     });
 
     if (explicitPath) {
@@ -196,8 +209,15 @@ export class MCPSession {
 
   private async handleToolsList(request: JsonRpcRequest): Promise<void> {
     await this.retryInitIfNeeded();
+    // An unindexed workspace serves an EMPTY tool list: absence is the one
+    // signal an agent can't misread. Listing 8 tools that all fail wastes the
+    // agent's calls and teaches it codegraph is broken (observed: one or two
+    // early isError responses and the agent stops calling codegraph for the
+    // whole session). A `codegraph init` run after the server started is
+    // picked up on the next tools/list — retryInitIfNeeded re-walks — though
+    // most hosts only request the list once per connection.
     this.transport.sendResult(request.id, {
-      tools: this.engine.getToolHandler().getTools(),
+      tools: this.engine.hasDefaultCodeGraph() ? this.engine.getToolHandler().getTools() : [],
     });
   }
 
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 9923ab505..87287829b 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -28,6 +28,25 @@ import {
 } from 'fs';
 import { clamp, validatePathWithinRoot, validateProjectPath, isConfigLeafNode, CONFIG_LEAF_LANGUAGES } from '../utils';
 import { isGeneratedFile } from '../extraction/generated-detection';
+
+/**
+ * An expected, recoverable "codegraph can't serve this" condition — most
+ * importantly a project with no index. The dispatch catch converts these to
+ * SUCCESS-shaped responses (guidance text, NO isError): an `isError: true`
+ * early in a session teaches the agent the toolset is broken and it stops
+ * calling codegraph entirely (observed repeatedly), which is exactly wrong
+ * for conditions the agent can simply work around (use built-in tools for
+ * that codebase / pass projectPath). isError is reserved for "stop trying"
+ * cases: security refusals ({@link PathRefusalError}) and genuine
+ * malfunctions.
+ */
+export class NotIndexedError extends Error {}
+
+/**
+ * A security refusal (sensitive system path). Stays `isError: true` WITHOUT
+ * retry guidance — abandoning this path is the desired agent reaction.
+ */
+export class PathRefusalError extends Error {}
 import { resolve as resolvePath } from 'path';
 
 /** Maximum output length to prevent context bloat (characters) */
@@ -522,7 +541,7 @@ export const tools: ToolDefinition[] = [
       properties: {
         query: {
           type: 'string',
-          description: 'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). Use codegraph_search first to find relevant names.',
+          description: 'Symbol names, file names, or short code terms to explore (e.g., "AuthService loginUser session-manager", "GraphTraverser BFS impact traversal.ts"). For a flow question, name the symbols spanning the flow (e.g. "mutateElement renderScene"). A natural-language question works too — no prior codegraph_search needed.',
         },
         maxFiles: {
           type: 'number',
@@ -752,14 +771,16 @@ export class ToolHandler {
     if (!projectPath) {
       if (!this.cg) {
         const searched = this.defaultProjectHint ?? process.cwd();
-        throw new Error(
+        throw new NotIndexedError(
           'No CodeGraph project is loaded for this session.\n' +
           `Searched for a .codegraph/ directory starting from: ${searched}\n` +
-          'The index is likely fine — this is a working-directory detection issue: ' +
+          'If this project IS indexed, this is a working-directory detection issue: ' +
           "the MCP client launched the server outside your project and didn't report the " +
           'workspace root. Fix it either way:\n' +
           '  • Pass projectPath to the tool call, e.g. projectPath: "/absolute/path/to/your/project"\n' +
-          '  • Or add --path to the server\'s MCP config args: ["serve", "--mcp", "--path", "/absolute/path/to/your/project"]'
+          '  • Or add --path to the server\'s MCP config args: ["serve", "--mcp", "--path", "/absolute/path/to/your/project"]\n' +
+          'If the project simply has no index, continue with your built-in tools (Read/Grep/Glob) ' +
+          "and don't call codegraph again this session — the user can run 'codegraph init' to enable it."
         );
       }
       return this.cg;
@@ -778,7 +799,7 @@ export class ToolHandler {
     if (existsSync(projectPath)) {
       const pathError = validateProjectPath(projectPath);
       if (pathError) {
-        throw new Error(pathError);
+        throw new PathRefusalError(pathError);
       }
     }
 
@@ -786,7 +807,12 @@ export class ToolHandler {
     const resolvedRoot = findNearestCodeGraphRoot(projectPath);
 
     if (!resolvedRoot) {
-      throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
+      throw new NotIndexedError(
+        `The project at ${projectPath} isn't indexed with codegraph (no .codegraph/ directory found ` +
+        'walking up from it), so codegraph cannot query it. Use your built-in tools (Read/Grep/Glob) ' +
+        "for that codebase instead, and don't call codegraph for it again this session. " +
+        "Indexing is the user's decision — they can run 'codegraph init' in that project to enable it."
+      );
     }
 
     // If the path resolves to the default project, reuse the already-open
@@ -1069,7 +1095,21 @@ export class ToolHandler {
       const withWorktree = this.withWorktreeNotice(result, args.projectPath as string | undefined);
       return this.withStalenessNotice(withWorktree, args.projectPath as string | undefined);
     } catch (err) {
-      return this.errorResult(`Tool execution failed: ${err instanceof Error ? err.message : String(err)}`);
+      // Expected condition, not a malfunction: answer as a SUCCESS so the
+      // agent keeps trusting the toolset for projects that ARE indexed.
+      // (An isError here teaches session-long abandonment — see NotIndexedError.)
+      if (err instanceof NotIndexedError) {
+        return this.textResult(err.message);
+      }
+      // Security refusal: a clean error, no retry encouragement.
+      if (err instanceof PathRefusalError) {
+        return this.errorResult(err.message);
+      }
+      return this.errorResult(
+        `Tool execution failed: ${err instanceof Error ? err.message : String(err)}. ` +
+        'This is an internal codegraph error — retry the call once; if it persists, ' +
+        'continue without codegraph for this task.'
+      );
     }
   }
 
@@ -1081,7 +1121,11 @@ export class ToolHandler {
     if (typeof query !== 'string') return query;
 
     const cg = this.getCodeGraph(args.projectPath as string | undefined);
-    const kind = args.kind as string | undefined;
+    const rawKind = args.kind as string | undefined;
+    // The schema enum says 'type' (what agents naturally reach for); the
+    // NodeKind is 'type_alias'. Without the mapping, kind: "type" silently
+    // matched nothing — a filter value we advertise must work.
+    const kind = rawKind === 'type' ? 'type_alias' : rawKind;
     const rawLimit = Number(args.limit) || 10;
     const limit = clamp(rawLimit, 1, 100);
 

From c450fd95b73cc99db050ec707359383c2ec010ac Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 20:50:22 -0500
Subject: [PATCH 45/51] =?UTF-8?q?feat(mcp):=20default=20tool=20surface=20t?=
 =?UTF-8?q?rimmed=20to=204=20=E2=80=94=20explore,=20node,=20search,=20call?=
 =?UTF-8?q?ers=20(#818)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

callees/impact/files/status stay fully functional (handlers, CLI, library
API untouched; CODEGRAPH_MCP_TOOLS re-enables any) but are no longer
LISTED by default. Evidence: codegraph_impact appears in zero recorded
eval runs ever; its blast-radius info already arrives inline on explore
(Blast radius section) and node (dependents note). callees is redundant
by construction (a symbol's body IS its callee list). files/status
"reduce to one grep" per the tiny-repo audit, and staleness banners
already inline pending-sync. callers stays: exhaustive call-site
enumeration (incl. callback registrations, per-definition sections) is
the one job explore/node don't replicate. Fewer tools = fewer mis-picks
+ ~300 schema tokens saved per session; presence itself steers.

server-instructions rewritten around the 4-tool surface ("what does X
call" → node body+trail; "what breaks" → callers + inline blast radius).
Tiny-repo gate unchanged (its trio ⊆ the default set); stale gate
comment corrected (context/trace are long gone — its "5 core tools" are
today's trio).

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                         |  1 +
 __tests__/mcp-tool-allowlist.test.ts | 27 +++++++++++++------
 src/mcp/server-instructions.ts       | 11 ++++----
 src/mcp/tools.ts                     | 40 ++++++++++++++++++++++++----
 4 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c0289850..9981cfd3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- **The MCP tool list is now a focused default of four** — `codegraph_explore`, `codegraph_node`, `codegraph_search`, and `codegraph_callers`. The other four (`codegraph_callees`, `codegraph_impact`, `codegraph_files`, `codegraph_status`) remain fully functional — the CLI and library API are unchanged, and `CODEGRAPH_MCP_TOOLS` re-enables any of them — but they're no longer listed to agents by default: measured agent behavior shows they're never or rarely picked, and the information they carry already arrives inline on the tools agents do use (explore's blast-radius section, node's dependents note, a symbol's own body as its callee list). A leaner list saves context tokens every session and steers agents to the right tool by presence alone.
 - **CodeGraph now goes quiet instead of failing loudly in unindexed projects.** When an AI agent's session starts in a workspace that has no CodeGraph index, the MCP server now announces itself as inactive with a short note and lists no tools at all — instead of presenting the full toolset and erroring on every call, which taught agents to distrust CodeGraph even where it works. Querying another project that isn't indexed likewise returns clear guidance (use your regular tools for that codebase; the user can run `codegraph init` there to enable CodeGraph) instead of an error, and genuine internal errors now tell the agent to retry once rather than give up on CodeGraph entirely. Indexing stays your decision — agents are told not to run it themselves. (#769)
 - **Astro projects are now indexed.** `.astro` files previously weren't parsed at all — on a typical Astro site that left most of the codebase invisible to search, impact, and `codegraph_explore`. CodeGraph now extracts the TypeScript frontmatter (functions, imports, `getStaticPaths`, …) and client-side `<script>` blocks, captures function calls and `<Component>` usages in template markup so cross-component dependencies trace end-to-end, resolves the `Astro` global and `astro:*` module imports as framework-provided, and maps `src/pages/` file-based routing to route nodes (`.astro` pages and `.ts` endpoints, including `[param]` and `[...rest]` dynamic segments, with underscore-prefixed files correctly excluded). Validated on two real-world Astro sites with 93% measured cross-file coverage and every page mapping to its route. Thanks @xingwangzhe. (#768) (Astro)
 - Same-named symbols across a monorepo's apps are no longer conflated. In a NestJS-style workspace with one `UserService` per app, `codegraph_callers`, `codegraph_callees`, and `codegraph_impact` now report **one section per distinct definition** — each app's callers and blast radius under its own file-labeled heading — instead of a single merged list, and accept a `file` argument to focus exactly the definition you mean (like `codegraph_node` already did). Impact in particular no longer overstates a change's blast radius by merging unrelated same-named classes. Thanks @Igorgro. (#764)
diff --git a/__tests__/mcp-tool-allowlist.test.ts b/__tests__/mcp-tool-allowlist.test.ts
index a9e7aff65..08067c918 100644
--- a/__tests__/mcp-tool-allowlist.test.ts
+++ b/__tests__/mcp-tool-allowlist.test.ts
@@ -17,13 +17,23 @@ describe('CODEGRAPH_MCP_TOOLS allowlist', () => {
 
   const listed = () => new ToolHandler(null).getTools().map(t => t.name).sort();
 
-  it('exposes the full tool surface when unset', () => {
+  it('exposes the default 4-tool surface when unset', () => {
     delete process.env[ENV];
-    const all = listed();
-    expect(all).toContain('codegraph_explore');
-    expect(all).not.toContain('codegraph_context');
-    expect(all).not.toContain('codegraph_trace');
-    expect(all.length).toBeGreaterThanOrEqual(8);
+    // The default set (see DEFAULT_MCP_TOOLS): explore + node are the
+    // validated workhorses, search the cheap lookup, callers the one
+    // irreplaceable enumerator. callees/impact/files/status stay defined
+    // and executable but unlisted — impact appeared in ZERO recorded runs.
+    expect(listed()).toEqual([
+      'codegraph_callers',
+      'codegraph_explore',
+      'codegraph_node',
+      'codegraph_search',
+    ]);
+  });
+
+  it('re-enables an unlisted tool via the allowlist (impact)', () => {
+    process.env[ENV] = 'explore,impact';
+    expect(listed()).toEqual(['codegraph_explore', 'codegraph_impact']);
   });
 
   it('filters ListTools to the allowlisted short names', () => {
@@ -36,9 +46,10 @@ describe('CODEGRAPH_MCP_TOOLS allowlist', () => {
     expect(listed()).toEqual(['codegraph_explore', 'codegraph_search']);
   });
 
-  it('treats an empty/whitespace value as unset (full surface)', () => {
+  it('treats an empty/whitespace value as unset (default surface)', () => {
     process.env[ENV] = '   ';
-    expect(listed().length).toBeGreaterThanOrEqual(8);
+    expect(listed()).toHaveLength(4);
+    expect(listed()).toContain('codegraph_explore');
   });
 
   it('rejects a disabled tool on execute (defense in depth)', async () => {
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
index 1e5503614..c0cb8ea6e 100644
--- a/src/mcp/server-instructions.ts
+++ b/src/mcp/server-instructions.ts
@@ -47,18 +47,17 @@ typically one to a few calls; a grep/read exploration is dozens.
 - **Almost any question — "how does X work", architecture, a bug, "what/where is X", or surveying an area** → \`codegraph_explore\` (PRIMARY — call FIRST; ONE capped call returns the verbatim source of the relevant symbols grouped by file; most often the ONLY call you need)
 - **"How does X reach/become Y? / the flow / the path from X to Y"** → \`codegraph_explore\`, naming the symbols that span the flow (e.g. \`mutateElement renderScene\`) — it surfaces the call path among them, including dynamic-dispatch hops (callbacks, React re-render, JSX children) grep can't follow
 - **"What is the symbol named X?" (just its location)** → \`codegraph_search\`
-- **"What calls this?" / "What does this call?" / "What would changing this break?"** → \`codegraph_callers\` / \`codegraph_callees\` / \`codegraph_impact\`. Callers includes where a function is **registered as a callback** (passed as an argument, assigned to a function pointer/field, listed in a handler table) — labeled "via callback registration" — so a function with no direct calls is NOT dead if it's wired up somewhere. When several UNRELATED symbols share a name (one \`UserService\` per monorepo app), these tools report **one section per definition** (never a merged list) — pass \`file\` to focus the definition you mean
+- **"What calls this?" / "What would changing this break?"** → \`codegraph_callers\` — EVERY call site with file:line, including where a function is **registered as a callback** (passed as an argument, assigned to a function pointer/field, listed in a handler table) — labeled "via callback registration" — so a function with no direct calls is NOT dead if it's wired up somewhere. When several UNRELATED symbols share a name (one \`UserService\` per monorepo app), it reports **one section per definition** (never a merged list) — pass \`file\` to focus the definition you mean. The wider blast radius arrives automatically on \`codegraph_explore\` (its "Blast radius" section) and \`codegraph_node\` (the dependents note)
+- **"What does this call?"** → \`codegraph_node\` with that symbol and \`includeCode: true\` — the body IS the callee list, and the caller/callee trail comes with it
 - **Reading a source FILE (any time you'd use the \`Read\` tool)** → \`codegraph_node\` with a \`file\` path and no \`symbol\`. It returns the file's **current source with line numbers — the same \`<n>\\t<line>\` shape \`Read\` gives you, safe to \`Edit\` from** — narrowable with \`offset\`/\`limit\` exactly like \`Read\`, PLUS a one-line note of which files depend on it. Same bytes as \`Read\`, faster (served from the index), with the blast radius attached. Use it **instead of \`Read\`** for indexed source files; fall back to \`Read\` only for what codegraph doesn't index (configs, docs). Pass \`symbolsOnly: true\` for just the file's structure.
 - **About to read or edit a symbol you can name** → \`codegraph_node\` with that \`symbol\` (SECONDARY — the after-explore depth tool): the verbatim source (\`includeCode: true\`) PLUS its caller/callee trail, so before changing it you see what calls it and what your edit would break. For an OVERLOADED name it returns EVERY matching definition's body in one call, so you never Read a file to find the right overload
-- **"What's in directory X?"** → \`codegraph_files\`
-- **"Is the index ready / what's its size?"** → \`codegraph_status\`
 
 ## Common chains
 
 - **Flow / "how does X reach Y"**: ONE \`codegraph_explore\` with the symbol names spanning the flow — it surfaces the call path among them (riding dynamic-dispatch hops) AND returns their source. No need to reconstruct the path with \`codegraph_search\` + \`codegraph_callers\`.
 - **Onboarding / understanding any area**: ONE \`codegraph_explore\` is usually the whole answer. Only follow up — \`codegraph_node\` for a specific symbol — if something is still unclear.
-- **Refactor planning**: \`codegraph_search\` → \`codegraph_callers\` → \`codegraph_impact\`. The blast-radius answer comes from impact, not from walking callers manually.
-- **Debugging a regression**: \`codegraph_callers\` of the suspected symbol; widen with \`codegraph_impact\` if an unexpected call appears.
+- **Refactor planning**: \`codegraph_callers\` for the complete call-site list to update; the wider blast radius is already attached to \`codegraph_explore\` / \`codegraph_node\` output.
+- **Debugging a regression**: \`codegraph_callers\` of the suspected symbol; \`codegraph_node\` on anything unexpected that appears.
 
 ## Anti-patterns
 
@@ -67,7 +66,7 @@ typically one to a few calls; a grep/read exploration is dozens.
 - **Don't chain \`codegraph_search\` + \`codegraph_node\`** to understand an area — ONE \`codegraph_explore\` returns the relevant symbols' source together in a single round-trip.
 - **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns them all grouped by file, while each separate call re-reads the whole context and costs far more. Use \`codegraph_node\` for a single symbol.
 - **Don't reach for the \`Read\` tool on an indexed source file** — \`codegraph_node\` with a \`file\` reads it for you (same \`<n>\\t<line>\` source, \`offset\`/\`limit\` like Read, faster, with its blast radius), and with a \`symbol\` it returns the source plus the caller/callee trail. Reach for raw \`Read\` only for what codegraph doesn't index (configs, docs) or when the staleness banner flags a file as pending re-index.
-- **After editing, check the staleness banner.** When a tool response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Every file NOT in that banner is fresh, so still trust codegraph. \`codegraph_status\` also lists pending files under "Pending sync".
+- **After editing, check the staleness banner.** When a tool response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Every file NOT in that banner is fresh, so still trust codegraph.
 
 ## Limitations
 
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 87287829b..9468d8d64 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -606,11 +606,37 @@ export const tools: ToolDefinition[] = [
  */
 export function getStaticTools(): ToolDefinition[] {
   const raw = process.env.CODEGRAPH_MCP_TOOLS;
-  if (!raw || !raw.trim()) return tools;
+  if (!raw || !raw.trim()) {
+    return tools.filter(t => DEFAULT_MCP_TOOLS.has(t.name.replace(/^codegraph_/, '')));
+  }
   const allow = new Set(raw.split(',').map(s => s.trim().replace(/^codegraph_/, '')).filter(Boolean));
   return allow.size ? tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : tools;
 }
 
+/**
+ * The MCP tools served by DEFAULT (short names). The other defined tools
+ * (callees, impact, files, status) remain fully functional — handlers stay,
+ * the library API and CLI are untouched, and `CODEGRAPH_MCP_TOOLS` re-enables
+ * any of them — they just aren't LISTED to agents anymore.
+ *
+ * Evidence for the cut (the "adapt the tool to the agent" principle —
+ * fewer tools = fewer mis-picks, and presence itself steers):
+ * - `codegraph_impact` appears in ZERO recorded eval runs ever — its
+ *   blast-radius info already arrives inline on explore (the "Blast radius"
+ *   section) and node (the dependents note), so agents never need the
+ *   standalone tool.
+ * - `codegraph_callees` is redundant by construction: a symbol's body (which
+ *   node returns) IS its callee list, plus the caller/callee trail.
+ * - `codegraph_files` / `codegraph_status`: the tiny-repo audit (see
+ *   getTools) found they "reduce to one grep"; staleness banners already
+ *   inline the pending-sync info on every read tool, and the CLI covers
+ *   diagnostics.
+ * - `codegraph_callers` stays: exhaustive call-site enumeration (every
+ *   caller with file:line, callback registrations labeled, one section per
+ *   same-named definition) is the one job explore/node don't replicate.
+ */
+const DEFAULT_MCP_TOOLS = new Set(['explore', 'node', 'search', 'callers']);
+
 /**
  * Tool handler that executes tools against a CodeGraph instance
  *
@@ -703,9 +729,12 @@ export class ToolHandler {
    */
   getTools(): ToolDefinition[] {
     const allow = this.toolAllowlist();
+    // No explicit allowlist → the default 4-tool surface (see
+    // DEFAULT_MCP_TOOLS for the evidence). An allowlist replaces the
+    // default entirely, so any defined tool can be re-enabled.
     let visible = allow
       ? tools.filter(t => allow.has(t.name.replace(/^codegraph_/, '')))
-      : tools;
+      : tools.filter(t => DEFAULT_MCP_TOOLS.has(t.name.replace(/^codegraph_/, '')));
     if (!this.cg) return visible;
 
     try {
@@ -713,9 +742,10 @@ export class ToolHandler {
       const budget = getExploreBudget(stats.fileCount);
 
       // Tiny-repo tool gating: on projects under TINY_REPO_FILE_THRESHOLD
-      // files, only expose the 5 core tools (search, context, node,
-      // explore, trace). The 5 omitted tools (callers, callees, impact,
-      // status, files) reduce to one grep at this scale.
+      // files, only expose the core trio (search, node, explore) — one
+      // below even the 4-tool default: at this scale callers, too, reduces
+      // to one grep. (Historical note: the audit below ran when context and
+      // trace still existed; its "5 core tools" are today's trio.)
       //
       // n=2 audits ruled out cutting below 5 tools:
       // - 3-tool gate (search + context + trace): cost regressed on

From 8170d181f258358927f82514adbc1b5b6ae9ce42 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Thu, 11 Jun 2026 21:50:41 -0500
Subject: [PATCH 46/51] =?UTF-8?q?feat(cli+installer):=20codegraph=20explor?=
 =?UTF-8?q?e/node=20CLI=20+=20instructions-file=20block=20=E2=80=94=20suba?=
 =?UTF-8?q?gent=20&=20non-MCP=20reach=20(#704)=20(#819)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Task-tool subagents never see the MCP initialize instructions and hold
the MCP tools only as deferred names they rarely think to load — so
delegated work bypassed codegraph almost entirely (measured ~1 of 9
forced-delegation runs touched it; the rest did 30-50 grep/read calls).
Two additions close the gap:

- CLI: `codegraph explore` and `codegraph node` call the same ToolHandler
  as the MCP tools and print identical output — the graph for any agent
  with a shell (subagents, Gemini CLI, raw Codex, humans).
- Installer: each agent target (claude/codex/gemini/opencode) writes a
  short marker-fenced CodeGraph section into its instructions file —
  the one channel subagents DO receive — naming both surfaces. Upsert
  self-heals the stale pre-#529 long block; uninstall strips it; re-runs
  are byte-equal unchanged. (#529's duplication argument bounded the
  size: four lines, commands only.)

A/B (excalidraw, sonnet/high, forced Explore-agent delegation): without
the block, subagent codegraph usage ~1/9 runs; with it, 4/4 — subagents
ToolSearch-load the MCP tools and run explore 5-7x, best runs with ZERO
Read/grep (80-95s vs 150-197s baseline). The block's mechanism: the
parent relays the note into the task prompt, making the deferred tool
names salient.

Contract tests updated to the new expectations (write + self-heal
replace the #529 strip-only behavior); README install/guidance sections
refreshed (they also still described the pre-#817/#818 tool surface).

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                           |  1 +
 README.md                              | 12 ++--
 __tests__/installer-targets.test.ts    | 71 +++++++++++--------
 src/bin/codegraph.ts                   | 97 ++++++++++++++++++++++++++
 src/installer/instructions-template.ts | 44 +++++++++---
 src/installer/targets/claude.ts        | 16 ++---
 src/installer/targets/codex.ts         | 10 +--
 src/installer/targets/gemini.ts        | 10 +--
 src/installer/targets/opencode.ts      | 10 +--
 src/installer/targets/shared.ts        | 25 +++++++
 10 files changed, 226 insertions(+), 70 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9981cfd3b..fbf4c5d4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### New Features
 
+- **Subagents and non-MCP agents can now reach CodeGraph.** Two new CLI commands — `codegraph explore "<symbols or question>"` and `codegraph node <symbol-or-file>` — print exactly what the matching MCP tools return (relevant symbols' source + call paths; one symbol's source + callers; file reads with line numbers), so any agent with a shell can use the graph. And `codegraph install` now writes a small marker-fenced CodeGraph section into each agent's instructions file (`CLAUDE.md` / `AGENTS.md` / `GEMINI.md`) pointing at both surfaces — that file is what Task-tool subagents actually see, where the MCP server's own guidance only reaches the main agent. Measured on a delegated code-exploration task: subagents went from almost never using CodeGraph (~1 in 9 runs) to using it in every run, including runs with zero grep/file-reading fallback. The section is small, survives your own content, upgrades cleanly from the old long block, and `codegraph uninstall` removes it. Thanks @liuyao37511. (#704)
 - **The MCP tool list is now a focused default of four** — `codegraph_explore`, `codegraph_node`, `codegraph_search`, and `codegraph_callers`. The other four (`codegraph_callees`, `codegraph_impact`, `codegraph_files`, `codegraph_status`) remain fully functional — the CLI and library API are unchanged, and `CODEGRAPH_MCP_TOOLS` re-enables any of them — but they're no longer listed to agents by default: measured agent behavior shows they're never or rarely picked, and the information they carry already arrives inline on the tools agents do use (explore's blast-radius section, node's dependents note, a symbol's own body as its callee list). A leaner list saves context tokens every session and steers agents to the right tool by presence alone.
 - **CodeGraph now goes quiet instead of failing loudly in unindexed projects.** When an AI agent's session starts in a workspace that has no CodeGraph index, the MCP server now announces itself as inactive with a short note and lists no tools at all — instead of presenting the full toolset and erroring on every call, which taught agents to distrust CodeGraph even where it works. Querying another project that isn't indexed likewise returns clear guidance (use your regular tools for that codebase; the user can run `codegraph init` there to enable CodeGraph) instead of an error, and genuine internal errors now tell the agent to retry once rather than give up on CodeGraph entirely. Indexing stays your decision — agents are told not to run it themselves. (#769)
 - **Astro projects are now indexed.** `.astro` files previously weren't parsed at all — on a typical Astro site that left most of the codebase invisible to search, impact, and `codegraph_explore`. CodeGraph now extracts the TypeScript frontmatter (functions, imports, `getStaticPaths`, …) and client-side `<script>` blocks, captures function calls and `<Component>` usages in template markup so cross-component dependencies trace end-to-end, resolves the `Astro` global and `astro:*` module imports as framework-provided, and maps `src/pages/` file-based routing to route nodes (`.astro` pages and `.ts` endpoints, including `[param]` and `[...rest]` dynamic segments, with underscore-prefixed files correctly excluded). Validated on two real-world Astro sites with 93% measured cross-file coverage and every page mapping to its route. Thanks @xingwangzhe. (#768) (Astro)
diff --git a/README.md b/README.md
index e5e0088ae..08ef5bc35 100644
--- a/README.md
+++ b/README.md
@@ -326,7 +326,7 @@ The installer will:
 - Ask which agent(s) to configure — auto-detects installed ones from: **Claude Code**, **Cursor**, **Codex CLI**, **opencode**, **Hermes Agent**, **Gemini CLI**, **Antigravity IDE**, **Kiro**
 - Prompt to install `codegraph` on your PATH (so agents can launch the MCP server)
 - Ask whether configs apply to all your projects or just this one
-- Write each chosen agent's MCP server config (the codegraph usage guide is delivered by the MCP server itself, so no instructions file is added to `CLAUDE.md` / `AGENTS.md` / etc.)
+- Write each chosen agent's MCP server config, plus a small marker-fenced CodeGraph section in the agent's instructions file (`CLAUDE.md` / `AGENTS.md` / `GEMINI.md`) — that's how subagents and non-MCP agents learn the `codegraph explore` / `codegraph node` commands, since the MCP server's own guidance only reaches the main agent. Removed cleanly by `codegraph uninstall`.
 - Set up auto-allow permissions when Claude Code is one of the targets
 - Initialize your current project (local installs only)
 
@@ -406,14 +406,14 @@ npm install -g @colbymchenry/codegraph
 <details>
 <summary><strong>Agent Tool Guidance</strong></summary>
 
-CodeGraph's MCP server delivers its usage guidance to your agent **automatically**, in the MCP `initialize` response — there's no instructions file to manage and nothing is added to your `CLAUDE.md` / `AGENTS.md` / `GEMINI.md`. In short, it tells the agent to:
+CodeGraph's MCP server delivers its usage guidance to your agent **automatically**, in the MCP `initialize` response. In short, it tells the agent to:
 
 - **Answer structural questions directly with CodeGraph** — it *is* the pre-built index, so a grep/read loop just repeats work it already did. Treat the returned source as already read.
-- **Pick the tool by intent:** `codegraph_explore` for almost anything — "how does X work", a flow/"how does X reach Y", or surveying an area (one call returns the relevant symbols' source grouped by file); `codegraph_search` to just locate a symbol; `codegraph_callers`/`codegraph_callees` to walk call flow; `codegraph_impact` before editing; `codegraph_node` for one specific symbol's full source (it returns every overload for an ambiguous name).
+- **Pick the tool by intent:** `codegraph_explore` for almost anything — "how does X work", a flow/"how does X reach Y", or surveying an area (one call returns the relevant symbols' source grouped by file); `codegraph_search` to just locate a symbol; `codegraph_callers` for every call site (including callback registrations); `codegraph_node` for one symbol's full source + callers, or to read a file like the Read tool.
 - **Trust the results — don't re-verify with grep**, and check the staleness banner after edits.
-- If `.codegraph/` doesn't exist yet, offer to run `codegraph init -i`.
+- In a workspace with no index, CodeGraph announces itself inactive and serves no tools — indexing stays your decision.
 
-The exact text is `src/mcp/server-instructions.ts` — the single source of truth.
+The exact text is `src/mcp/server-instructions.ts` — the single source of truth for the main agent. Because subagents and non-MCP harnesses never see the MCP guidance, the installer also writes a four-line marker-fenced section into the agent's instructions file pointing at the `codegraph explore` / `codegraph node` CLI equivalents.
 
 </details>
 
@@ -464,6 +464,8 @@ codegraph index [path]            # Full index (--force to re-index, --quiet for
 codegraph sync [path]             # Incremental update
 codegraph status [path]           # Show statistics
 codegraph query <search>          # Search symbols (--kind, --limit, --json)
+codegraph explore <query>         # Relevant symbols' source + call paths in one shot (same output as the codegraph_explore MCP tool)
+codegraph node <symbol|file>      # One symbol's source + callers, or read a file with line numbers (same output as codegraph_node)
 codegraph files [path]            # Show file structure (--format, --filter, --max-depth, --json)
 codegraph callers <symbol>        # Find what calls a function/method (--limit, --json)
 codegraph callees <symbol>        # Find what a function/method calls (--limit, --json)
diff --git a/__tests__/installer-targets.test.ts b/__tests__/installer-targets.test.ts
index 27fcbd6e8..eed76515e 100644
--- a/__tests__/installer-targets.test.ts
+++ b/__tests__/installer-targets.test.ts
@@ -192,20 +192,23 @@ describe('Installer targets — partial-state idempotency', () => {
     fs.rmSync(tmpCwd, { recursive: true, force: true });
   });
 
-  it('codex: install writes config.toml but never an AGENTS.md instructions file (#529)', () => {
+  it('codex: install writes config.toml AND the AGENTS.md codegraph block (#704)', () => {
     const codex = getTarget('codex')!;
     const first = codex.install('global', { autoAllow: false });
     const agentsMd = path.join(tmpHome, '.codex', 'AGENTS.md');
-    // No instructions file is created, and no file action references it.
-    expect(fs.existsSync(agentsMd)).toBe(false);
-    expect(first.files.some((f) => f.path.endsWith('AGENTS.md'))).toBe(false);
     expect(first.files.some((f) => f.path.endsWith('config.toml'))).toBe(true);
-    // Re-install is fully unchanged (config.toml only, nothing to strip).
+    // The short instructions block IS written (subagents / non-MCP
+    // harnesses read AGENTS.md but never the MCP initialize instructions).
+    expect(fs.existsSync(agentsMd)).toBe(true);
+    const body = fs.readFileSync(agentsMd, 'utf-8');
+    expect(body).toContain('## CodeGraph');
+    expect(body).toContain('codegraph explore');
+    // Re-install is fully unchanged (byte-equal block → idempotent).
     const second = codex.install('global', { autoAllow: false });
     for (const f of second.files) expect(f.action).toBe('unchanged');
   });
 
-  it('codex: install strips a legacy AGENTS.md codegraph block, keeping user content (#529)', () => {
+  it('codex: install replaces a legacy AGENTS.md codegraph block with the current one, keeping user content', () => {
     const codex = getTarget('codex')!;
     const dir = path.join(tmpHome, '.codex');
     fs.mkdirSync(dir, { recursive: true });
@@ -217,10 +220,11 @@ describe('Installer targets — partial-state idempotency', () => {
     const body = fs.readFileSync(agentsMd, 'utf-8');
     expect(body).toContain('# My codex notes');
     expect(body).toContain('Be terse.');
-    expect(body).not.toContain('CODEGRAPH_START');
-    // The strip is reported as a 'removed' action on AGENTS.md.
+    // Self-heal: the stale pre-#529 body is gone, the current block is in.
+    expect(body).not.toContain('Prefer `codegraph_search`');
+    expect(body).toContain('codegraph explore');
     const mdEntry = result.files.find((f) => f.path.endsWith('AGENTS.md'));
-    expect(mdEntry?.action).toBe('removed');
+    expect(mdEntry?.action).toBe('updated');
   });
 
   it('opencode: prefers .jsonc when both .json and .jsonc exist', () => {
@@ -290,15 +294,16 @@ describe('Installer targets — partial-state idempotency', () => {
     expect(fs.readFileSync(file, 'utf-8')).toBe(afterInstall);
   });
 
-  it('opencode: install does NOT write an AGENTS.md instructions file (#529)', () => {
+  it('opencode: install writes the AGENTS.md codegraph block (#704)', () => {
     const opencode = getTarget('opencode')!;
     const result = opencode.install('global', { autoAllow: true });
     const agentsMd = path.join(tmpHome, '.config', 'opencode', 'AGENTS.md');
-    expect(fs.existsSync(agentsMd)).toBe(false);
-    expect(result.files.some((f) => f.path.endsWith('AGENTS.md'))).toBe(false);
+    expect(fs.existsSync(agentsMd)).toBe(true);
+    expect(fs.readFileSync(agentsMd, 'utf-8')).toContain('codegraph explore');
+    expect(result.files.find((f) => f.path.endsWith('AGENTS.md'))?.action).toBe('created');
   });
 
-  it('opencode: install strips a legacy AGENTS.md codegraph block, preserving user content (#529)', () => {
+  it('opencode: install replaces a legacy AGENTS.md codegraph block, preserving user content', () => {
     const opencode = getTarget('opencode')!;
     const dir = path.join(tmpHome, '.config', 'opencode');
     fs.mkdirSync(dir, { recursive: true });
@@ -310,8 +315,9 @@ describe('Installer targets — partial-state idempotency', () => {
     const body = fs.readFileSync(agentsMd, 'utf-8');
     expect(body).toContain('# My personal opencode instructions');
     expect(body).toContain('Always respond in pirate.');
-    expect(body).not.toContain('CODEGRAPH_START');
-    expect(result.files.find((f) => f.path.endsWith('AGENTS.md'))?.action).toBe('removed');
+    expect(body).not.toContain('Prefer `codegraph_search`');
+    expect(body).toContain('codegraph explore');
+    expect(result.files.find((f) => f.path.endsWith('AGENTS.md'))?.action).toBe('updated');
   });
 
   it('opencode: uninstall strips a leftover codegraph block from AGENTS.md, keeping user content', () => {
@@ -329,24 +335,25 @@ describe('Installer targets — partial-state idempotency', () => {
     expect(body).not.toContain('CODEGRAPH_START');
   });
 
-  it('opencode: local install writes ./opencode.jsonc and never an ./AGENTS.md (#529)', () => {
+  it('opencode: local install writes ./opencode.jsonc and the ./AGENTS.md block (#704)', () => {
     const opencode = getTarget('opencode')!;
     const result = opencode.install('local', { autoAllow: true });
     const paths = result.files.map((f) => f.path.replace(/\\/g, '/'));
     // macOS realpath shenanigans (/var vs /private/var) — suffix match.
     expect(paths.some((p) => p.endsWith('/opencode.jsonc'))).toBe(true);
-    expect(paths.some((p) => p.endsWith('/AGENTS.md'))).toBe(false);
-    expect(fs.existsSync(path.join(process.cwd(), 'AGENTS.md'))).toBe(false);
+    expect(paths.some((p) => p.endsWith('/AGENTS.md'))).toBe(true);
+    expect(fs.existsSync(path.join(process.cwd(), 'AGENTS.md'))).toBe(true);
   });
 
-  it('gemini: install writes settings.json (mcpServers.codegraph) and no GEMINI.md (#529)', () => {
+  it('gemini: install writes settings.json (mcpServers.codegraph) and the GEMINI.md block (#704)', () => {
     const gemini = getTarget('gemini')!;
     const result = gemini.install('global', { autoAllow: true });
     const settings = path.join(tmpHome, '.gemini', 'settings.json');
     const geminiMd = path.join(tmpHome, '.gemini', 'GEMINI.md');
     expect(result.files.some((f) => f.path === settings)).toBe(true);
-    expect(result.files.some((f) => f.path === geminiMd)).toBe(false);
-    expect(fs.existsSync(geminiMd)).toBe(false);
+    expect(result.files.some((f) => f.path === geminiMd)).toBe(true);
+    expect(fs.existsSync(geminiMd)).toBe(true);
+    expect(fs.readFileSync(geminiMd, 'utf-8')).toContain('codegraph explore');
 
     const cfg = JSON.parse(fs.readFileSync(settings, 'utf-8'));
     expect(cfg.mcpServers.codegraph).toEqual({ type: 'stdio', command: 'codegraph', args: ['serve', '--mcp'] });
@@ -383,13 +390,13 @@ describe('Installer targets — partial-state idempotency', () => {
     expect(after.mcpServers).toBeUndefined();
   });
 
-  it('gemini: local install writes ./.gemini/settings.json and never a ./GEMINI.md (#529)', () => {
+  it('gemini: local install writes ./.gemini/settings.json and the project-root ./GEMINI.md block (#704)', () => {
     const gemini = getTarget('gemini')!;
     const result = gemini.install('local', { autoAllow: true });
     const paths = result.files.map((f) => f.path.replace(/\\/g, '/'));
     expect(paths.some((p) => p.endsWith('/.gemini/settings.json'))).toBe(true);
-    expect(paths.some((p) => p.endsWith('/GEMINI.md'))).toBe(false);
-    expect(fs.existsSync(path.join(process.cwd(), 'GEMINI.md'))).toBe(false);
+    expect(paths.some((p) => p.endsWith('/GEMINI.md'))).toBe(true);
+    expect(fs.existsSync(path.join(process.cwd(), 'GEMINI.md'))).toBe(true);
   });
 
   it('gemini: uninstall strips a leftover GEMINI.md codegraph block, keeping user content', () => {
@@ -880,15 +887,18 @@ describe('Installer targets — partial-state idempotency', () => {
     expect(cfg.mcpServers.codegraph).toBeDefined();
   });
 
-  it('claude: install does NOT create a CLAUDE.md instructions file (#529)', () => {
+  it('claude: install creates the CLAUDE.md codegraph block (#704)', () => {
     const claude = getTarget('claude')!;
     const result = claude.install('local', { autoAllow: false });
     const claudeMd = path.join(tmpCwd, '.claude', 'CLAUDE.md');
-    expect(fs.existsSync(claudeMd)).toBe(false);
-    expect(result.files.some((f) => f.path.endsWith('CLAUDE.md'))).toBe(false);
+    expect(fs.existsSync(claudeMd)).toBe(true);
+    const body = fs.readFileSync(claudeMd, 'utf-8');
+    expect(body).toContain('## CodeGraph');
+    expect(body).toContain('codegraph explore');
+    expect(result.files.find((f) => f.path.endsWith('CLAUDE.md'))?.action).toBe('created');
   });
 
-  it('claude: install strips a legacy CLAUDE.md codegraph block, keeping user content (#529)', () => {
+  it('claude: install replaces a legacy CLAUDE.md codegraph block, keeping user content', () => {
     const claude = getTarget('claude')!;
     const claudeMd = path.join(tmpCwd, '.claude', 'CLAUDE.md');
     fs.mkdirSync(path.dirname(claudeMd), { recursive: true });
@@ -899,8 +909,9 @@ describe('Installer targets — partial-state idempotency', () => {
     const body = fs.readFileSync(claudeMd, 'utf-8');
     expect(body).toContain('# My project rules');
     expect(body).toContain('Use tabs.');
-    expect(body).not.toContain('CODEGRAPH_START');
-    expect(result.files.find((f) => f.path.endsWith('CLAUDE.md'))?.action).toBe('removed');
+    expect(body).not.toContain('Prefer `codegraph_search`');
+    expect(body).toContain('codegraph explore');
+    expect(result.files.find((f) => f.path.endsWith('CLAUDE.md'))?.action).toBe('updated');
   });
 
   it('claude: global install targets ~/.claude.json (user scope)', () => {
diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts
index 879dbb078..a4fcbde6a 100644
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -896,6 +896,103 @@ program
     }
   });
 
+/**
+ * codegraph explore <query...>
+ *
+ * The CLI face of the MCP codegraph_explore tool — same handler, same
+ * output (source of the relevant symbols grouped by file + the call path
+ * among them). Exists so agents WITHOUT the MCP tools — Task-tool
+ * subagents (which don't inherit MCP tools, #704) and non-MCP harnesses —
+ * can reach the graph through a plain shell command.
+ */
+program
+  .command('explore <query...>')
+  .description('Explore an area: relevant symbols\' source + call paths in one shot (same output as the codegraph_explore MCP tool)')
+  .option('-p, --path <path>', 'Project path')
+  .option('--max-files <number>', 'Maximum number of files to include source from')
+  .action(async (queryParts: string[], options: { path?: string; maxFiles?: string }) => {
+    const projectPath = resolveProjectPath(options.path);
+
+    try {
+      if (!isInitialized(projectPath)) {
+        error(`CodeGraph not initialized in ${projectPath} — run 'codegraph init' first.`);
+        process.exit(1);
+      }
+
+      const { default: CodeGraph } = await loadCodeGraph();
+      const cg = await CodeGraph.open(projectPath);
+      const { ToolHandler } = await import('../mcp/tools');
+      const handler = new ToolHandler(cg);
+
+      const args: Record<string, unknown> = { query: queryParts.join(' ') };
+      if (options.maxFiles) args.maxFiles = parseInt(options.maxFiles, 10);
+      const result = await handler.execute('codegraph_explore', args);
+
+      console.log(result.content[0]?.text ?? '');
+      cg.destroy();
+      if (result.isError) process.exit(1);
+    } catch (err) {
+      error(`Explore failed: ${err instanceof Error ? err.message : String(err)}`);
+      process.exit(1);
+    }
+  });
+
+/**
+ * codegraph node <name>
+ *
+ * The CLI face of the MCP codegraph_node tool: one symbol's source +
+ * caller/callee trail, or a whole file with line numbers + dependents
+ * (Read-parity). Same subagent/non-MCP rationale as `explore`.
+ */
+program
+  .command('node <name>')
+  .description('One symbol\'s source + caller/callee trail, or read a file with line numbers + dependents (same output as the codegraph_node MCP tool)')
+  .option('-p, --path <path>', 'Project path')
+  .option('-f, --file <file>', 'Treat as file mode (or disambiguate a symbol to this file)')
+  .option('--offset <number>', 'File mode: 1-based start line')
+  .option('--limit <number>', 'File mode: maximum lines')
+  .option('--symbols-only', 'File mode: just the symbol map + dependents')
+  .action(async (name: string, options: { path?: string; file?: string; offset?: string; limit?: string; symbolsOnly?: boolean }) => {
+    const projectPath = resolveProjectPath(options.path);
+
+    try {
+      if (!isInitialized(projectPath)) {
+        error(`CodeGraph not initialized in ${projectPath} — run 'codegraph init' first.`);
+        process.exit(1);
+      }
+
+      const { default: CodeGraph } = await loadCodeGraph();
+      const cg = await CodeGraph.open(projectPath);
+      const { ToolHandler } = await import('../mcp/tools');
+      const handler = new ToolHandler(cg);
+
+      // A name with a path separator is a file read; otherwise a symbol
+      // (use --file for basename-only file reads or to pin an overload).
+      const args: Record<string, unknown> = {};
+      if (options.file) {
+        args.file = options.file;
+        if (name && name !== options.file) args.symbol = name;
+      } else if (name.includes('/')) {
+        args.file = name;
+      } else {
+        args.symbol = name;
+        args.includeCode = true;
+      }
+      if (options.offset) args.offset = parseInt(options.offset, 10);
+      if (options.limit) args.limit = parseInt(options.limit, 10);
+      if (options.symbolsOnly) args.symbolsOnly = true;
+
+      const result = await handler.execute('codegraph_node', args);
+
+      console.log(result.content[0]?.text ?? '');
+      cg.destroy();
+      if (result.isError) process.exit(1);
+    } catch (err) {
+      error(`Node lookup failed: ${err instanceof Error ? err.message : String(err)}`);
+      process.exit(1);
+    }
+  });
+
 /**
  * codegraph files [path]
  */
diff --git a/src/installer/instructions-template.ts b/src/installer/instructions-template.ts
index e4040927c..74fa7ae09 100644
--- a/src/installer/instructions-template.ts
+++ b/src/installer/instructions-template.ts
@@ -1,18 +1,40 @@
 /**
- * Marker constants for the legacy agent-instructions block.
+ * The marker-fenced agent-instructions block the installer writes into each
+ * agent's instructions file (CLAUDE.md / AGENTS.md / GEMINI.md).
  *
- * Codegraph used to write a `## CodeGraph` usage guide into each
- * agent's instructions file (CLAUDE.md / AGENTS.md / GEMINI.md /
- * codegraph.mdc / Kiro steering doc). That duplicated the guidance the
- * MCP server already emits in its `initialize` response — every agent
- * read the same playbook twice each turn (issue #529). The installer no
- * longer writes an instructions file; the MCP server instructions in
- * `mcp/server-instructions.ts` are the single source of truth.
+ * History: pre-#529 the installer wrote a full usage playbook here, which
+ * duplicated the MCP `initialize` instructions for the main agent — so it
+ * was removed and `mcp/server-instructions.ts` became the single source of
+ * truth. A much smaller block returned for #704, because the MCP
+ * instructions cannot reach two audiences that the instructions FILE does
+ * reach:
  *
- * These markers are retained so install (self-heal on upgrade) and
- * uninstall can find and strip the block a previous install wrote.
+ *  - **Task-tool subagents** — they receive the project instructions file
+ *    in their context but NOT the MCP initialize instructions. They hold
+ *    the codegraph MCP tools only as deferred names and rarely think to
+ *    load them: measured on a forced-delegation flow question (excalidraw,
+ *    sonnet, high effort), subagents loaded + used codegraph in ~1 of 9
+ *    runs without this block, and consistently with it — including runs
+ *    with zero Read/grep fallback.
+ *  - **Non-MCP harnesses** — agents with no MCP client at all can still
+ *    run the `codegraph explore` / `codegraph node` CLI, which prints the
+ *    same output as the MCP tools.
+ *
+ * Keep this block SHORT. The main agent reads it every turn on top of the
+ * server instructions — the #529 duplication-cost argument still bounds
+ * its size. Command names and the two surfaces, nothing more.
  */
 
-/** Markers used by the marker-based section removal. */
+/** Markers used by the marker-based section write/removal. */
 export const CODEGRAPH_SECTION_START = '<!-- CODEGRAPH_START -->';
 export const CODEGRAPH_SECTION_END = '<!-- CODEGRAPH_END -->';
+
+/** The full block, markers included, exactly as written to disk. */
+export const CODEGRAPH_INSTRUCTIONS_BLOCK = `${CODEGRAPH_SECTION_START}
+## CodeGraph
+
+This repository is indexed by CodeGraph — a pre-built code knowledge graph. Reach for it BEFORE grep/find or reading files when you need to understand or locate code:
+
+- **MCP tools** (when available): \`codegraph_explore\` answers most code questions in one call — the relevant symbols' verbatim source plus the call paths between them. \`codegraph_node\` returns one symbol's source + callers, or reads a whole file with line numbers. If the tools are listed but deferred, load them by name via tool search.
+- **Shell** (always works): \`codegraph explore "<symbol names or question>"\` and \`codegraph node <symbol-or-file>\` print the same output.
+${CODEGRAPH_SECTION_END}`;
diff --git a/src/installer/targets/claude.ts b/src/installer/targets/claude.ts
index 3259dea1b..bd65d9274 100644
--- a/src/installer/targets/claude.ts
+++ b/src/installer/targets/claude.ts
@@ -34,6 +34,7 @@ import {
   readJsonFile,
   removeMarkedSection,
   writeJsonFile,
+  upsertInstructionsEntry,
 } from './shared';
 import {
   CODEGRAPH_SECTION_END,
@@ -120,15 +121,12 @@ class ClaudeCodeTarget implements AgentTarget {
     const hookCleanup = cleanupLegacyHooks(loc);
     if (hookCleanup.action === 'removed') files.push(hookCleanup);
 
-    // 3. CLAUDE.md instructions — no longer written. The codegraph
-    // usage guidance now ships solely in the MCP server's `initialize`
-    // response (see `mcp/server-instructions.ts`), which Claude Code
-    // surfaces in the system prompt automatically. Writing it into
-    // CLAUDE.md as well meant the agent read the same playbook twice
-    // every turn (issue #529). Strip any block a previous install left
-    // behind so an upgrade self-heals — same idiom as the hook cleanup.
-    const instrCleanup = removeInstructionsEntry(loc);
-    if (instrCleanup.action === 'removed') files.push(instrCleanup);
+    // 3. CLAUDE.md instructions — the short marker-fenced CodeGraph
+    // block (#704). The MCP initialize instructions reach only the main
+    // agent; CLAUDE.md is what Task-tool subagents (and non-MCP
+    // harnesses) actually see, so the block carries the codegraph
+    // pointers there. Upsert self-heals a stale pre-#529 long block.
+    files.push(upsertInstructionsEntry(instructionsPath(loc)));
 
     return { files };
   }
diff --git a/src/installer/targets/codex.ts b/src/installer/targets/codex.ts
index ccd9bf64e..0a2b8c9c8 100644
--- a/src/installer/targets/codex.ts
+++ b/src/installer/targets/codex.ts
@@ -28,6 +28,7 @@ import {
   atomicWriteFileSync,
   getMcpServerConfig,
   removeMarkedSection,
+  upsertInstructionsEntry,
 } from './shared';
 import {
   CODEGRAPH_SECTION_END,
@@ -83,11 +84,10 @@ class CodexTarget implements AgentTarget {
 
     files.push(writeMcpEntry());
 
-    // AGENTS.md is no longer written — the codegraph usage guidance
-    // ships in the MCP server's `initialize` response (issue #529).
-    // Strip a block a previous install left so an upgrade self-heals.
-    const instrCleanup = removeInstructionsEntry();
-    if (instrCleanup.action === 'removed') files.push(instrCleanup);
+    // AGENTS.md gets the short marker-fenced CodeGraph block (#704):
+    // subagents and non-MCP harnesses read AGENTS.md but never the MCP
+    // initialize instructions. Upsert self-heals a stale pre-#529 block.
+    files.push(upsertInstructionsEntry(instructionsPath()));
 
     return { files };
   }
diff --git a/src/installer/targets/gemini.ts b/src/installer/targets/gemini.ts
index b6cc3bdd5..6db543f39 100644
--- a/src/installer/targets/gemini.ts
+++ b/src/installer/targets/gemini.ts
@@ -38,6 +38,7 @@ import {
   readJsonFile,
   removeMarkedSection,
   writeJsonFile,
+  upsertInstructionsEntry,
 } from './shared';
 import {
   CODEGRAPH_SECTION_END,
@@ -84,11 +85,10 @@ class GeminiTarget implements AgentTarget {
     const files: WriteResult['files'] = [];
     files.push(writeMcpEntry(loc));
 
-    // GEMINI.md is no longer written — the codegraph usage guidance
-    // ships in the MCP server's `initialize` response (issue #529).
-    // Strip a block a previous install left so an upgrade self-heals.
-    const instrCleanup = removeInstructionsEntry(loc);
-    if (instrCleanup.action === 'removed') files.push(instrCleanup);
+    // GEMINI.md gets the short marker-fenced CodeGraph block (#704):
+    // subagents and non-MCP harnesses read GEMINI.md but never the MCP
+    // initialize instructions. Upsert self-heals a stale pre-#529 block.
+    files.push(upsertInstructionsEntry(instructionsPath(loc)));
 
     return { files };
   }
diff --git a/src/installer/targets/opencode.ts b/src/installer/targets/opencode.ts
index 5ec97436d..2db070960 100644
--- a/src/installer/targets/opencode.ts
+++ b/src/installer/targets/opencode.ts
@@ -41,6 +41,7 @@ import {
   atomicWriteFileSync,
   jsonDeepEqual,
   removeMarkedSection,
+  upsertInstructionsEntry,
 } from './shared';
 import {
   CODEGRAPH_SECTION_END,
@@ -127,11 +128,10 @@ class OpencodeTarget implements AgentTarget {
     const files: WriteResult['files'] = [];
     files.push(writeMcpEntry(loc));
 
-    // AGENTS.md is no longer written — the codegraph usage guidance
-    // ships in the MCP server's `initialize` response (issue #529).
-    // Strip a block a previous install left so an upgrade self-heals.
-    const instrCleanup = removeInstructionsEntry(loc);
-    if (instrCleanup.action === 'removed') files.push(instrCleanup);
+    // AGENTS.md gets the short marker-fenced CodeGraph block (#704):
+    // subagents and non-MCP harnesses read AGENTS.md but never the MCP
+    // initialize instructions. Upsert self-heals a stale pre-#529 block.
+    files.push(upsertInstructionsEntry(instructionsPath(loc)));
 
     return { files };
   }
diff --git a/src/installer/targets/shared.ts b/src/installer/targets/shared.ts
index 342e85049..0c854682a 100644
--- a/src/installer/targets/shared.ts
+++ b/src/installer/targets/shared.ts
@@ -10,6 +10,11 @@
 
 import * as fs from 'fs';
 import * as path from 'path';
+import {
+  CODEGRAPH_INSTRUCTIONS_BLOCK,
+  CODEGRAPH_SECTION_START,
+  CODEGRAPH_SECTION_END,
+} from '../instructions-template';
 
 /**
  * The MCP-server config block codegraph injects. Same shape across
@@ -167,6 +172,26 @@ export function replaceOrAppendMarkedSection(
   return 'appended';
 }
 
+/**
+ * Upsert the CodeGraph instructions block into an agent instructions
+ * file (CLAUDE.md / AGENTS.md / GEMINI.md). The one write shared by
+ * every target: self-heals a stale pre-#529 long block (markers match →
+ * replaced by the current short one), appends after existing user
+ * content otherwise, and reports `unchanged` on byte-equal re-runs so
+ * install stays idempotent. See `instructions-template.ts` for why this
+ * block exists (#704: subagents + non-MCP harnesses never see the MCP
+ * initialize instructions).
+ */
+export function upsertInstructionsEntry(file: string): { path: string; action: 'created' | 'updated' | 'unchanged' } {
+  const action = replaceOrAppendMarkedSection(
+    file,
+    CODEGRAPH_INSTRUCTIONS_BLOCK,
+    CODEGRAPH_SECTION_START,
+    CODEGRAPH_SECTION_END,
+  );
+  return { path: file, action: action === 'appended' ? 'updated' : action };
+}
+
 /**
  * Inverse of `replaceOrAppendMarkedSection`. Strips the marker
  * block from `filePath` if present. If the file becomes empty after

From e2f1fe4b2a549118ccc79af63545d60670f85f5e Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Fri, 12 Jun 2026 00:20:29 -0500
Subject: [PATCH 47/51] =?UTF-8?q?fix(installer):=20instructions=20block=20?=
 =?UTF-8?q?is=20scope-neutral=20=E2=80=94=20conditional=20on=20.codegraph/?=
 =?UTF-8?q?=20existing=20(#820)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A global install writes the block into user-scope files
(~/.claude/CLAUDE.md, ~/.codex/AGENTS.md) that apply to EVERY repo the
user opens — the unconditional "This repository is indexed" claim was
false in unindexed ones and would send subagents into failing codegraph
calls, the exact noise the unindexed-session policy (#817) eliminates.
Now: "In repositories indexed by CodeGraph (a .codegraph/ directory
exists) …" plus an explicit skip-entirely line for the no-index case.

Residual: the delegation A/B validated the assertive project-scoped
wording; the conditional form keeps the same active ingredients (the
codegraph name + both command surfaces in the relay-able slot) — fold a
re-check into the next delegation A/B run.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/installer/instructions-template.ts | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/installer/instructions-template.ts b/src/installer/instructions-template.ts
index 74fa7ae09..54f066ea2 100644
--- a/src/installer/instructions-template.ts
+++ b/src/installer/instructions-template.ts
@@ -29,12 +29,23 @@
 export const CODEGRAPH_SECTION_START = '<!-- CODEGRAPH_START -->';
 export const CODEGRAPH_SECTION_END = '<!-- CODEGRAPH_END -->';
 
-/** The full block, markers included, exactly as written to disk. */
+/**
+ * The full block, markers included, exactly as written to disk.
+ *
+ * The wording is deliberately CONDITIONAL ("in repositories indexed by…"):
+ * a global install writes this into a user-scope file (~/.claude/CLAUDE.md,
+ * ~/.codex/AGENTS.md) that applies to every project the user opens —
+ * including unindexed ones, where an unconditional "this repository is
+ * indexed" claim would send subagents into failing codegraph calls (the
+ * noise the unindexed-session policy exists to prevent).
+ */
 export const CODEGRAPH_INSTRUCTIONS_BLOCK = `${CODEGRAPH_SECTION_START}
 ## CodeGraph
 
-This repository is indexed by CodeGraph — a pre-built code knowledge graph. Reach for it BEFORE grep/find or reading files when you need to understand or locate code:
+In repositories indexed by CodeGraph (a \`.codegraph/\` directory exists at the repo root), reach for it BEFORE grep/find or reading files when you need to understand or locate code:
 
 - **MCP tools** (when available): \`codegraph_explore\` answers most code questions in one call — the relevant symbols' verbatim source plus the call paths between them. \`codegraph_node\` returns one symbol's source + callers, or reads a whole file with line numbers. If the tools are listed but deferred, load them by name via tool search.
 - **Shell** (always works): \`codegraph explore "<symbol names or question>"\` and \`codegraph node <symbol-or-file>\` print the same output.
+
+If there is no \`.codegraph/\` directory, skip CodeGraph entirely — indexing is the user's decision.
 ${CODEGRAPH_SECTION_END}`;

From adcb862f8e93b2f3c9ab0a91dcb94f0a68ebbd8f Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Fri, 12 Jun 2026 00:24:08 -0500
Subject: [PATCH 48/51] fix(cli): explore/node not-indexed error stops agents
 from running init themselves (#821)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The message said "run 'codegraph init' first" — an instruction-shaped
error that invites an agent hitting it (e.g. a subagent following the
global instructions block into an unindexed repo) to index the project
uninvited: minutes of CPU and a surprise .codegraph/ the user never
asked for. Every other layer already encodes indexing-is-the-user's-
decision (the MCP NotIndexedError guidance, the inactive instructions,
the conditional block); the CLI now matches: continue with your usual
tools, do not run init yourself, the project owner can enable it.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/bin/codegraph.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts
index a4fcbde6a..b6a3262a5 100644
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -915,7 +915,7 @@ program
 
     try {
       if (!isInitialized(projectPath)) {
-        error(`CodeGraph not initialized in ${projectPath} — run 'codegraph init' first.`);
+        error(`CodeGraph isn't available here — no .codegraph/ index exists in ${projectPath}. If you are an AI agent: continue with your usual tools; indexing is the user's decision, do not run it yourself. (The project owner can enable CodeGraph with 'codegraph init'.)`);
         process.exit(1);
       }
 
@@ -957,7 +957,7 @@ program
 
     try {
       if (!isInitialized(projectPath)) {
-        error(`CodeGraph not initialized in ${projectPath} — run 'codegraph init' first.`);
+        error(`CodeGraph isn't available here — no .codegraph/ index exists in ${projectPath}. If you are an AI agent: continue with your usual tools; indexing is the user's decision, do not run it yourself. (The project owner can enable CodeGraph with 'codegraph init'.)`);
         process.exit(1);
       }
 

From 01717854f5e973701aa97fa6b4b781a831846245 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Fri, 12 Jun 2026 00:30:56 -0500
Subject: [PATCH 49/51] fix(cli): codegraph node accepts Windows backslash
 paths in file mode (#822)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The file-vs-symbol heuristic only matched '/' — `codegraph node
src\auth\session.ts` on Windows fell through to symbol mode and found
nothing. Both separators now route to file mode, normalized to forward
slashes (the form the index stores). Symbols never contain either
separator in any indexed language.

Validated: macOS smoke (explore/node symbol/node file/unindexed
refusal) + Linux Docker (same smoke + full suite, 1428 passed).
Windows VM validation queued — the Parallels guest is currently
unreachable (control commands are Pro-gated; needs a manual start).

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/bin/codegraph.ts | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts
index b6a3262a5..1dbbca210 100644
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -968,12 +968,15 @@ program
 
       // A name with a path separator is a file read; otherwise a symbol
       // (use --file for basename-only file reads or to pin an overload).
+      // Both separators: Windows users type src\auth\session.ts. Symbols
+      // never contain either ('/' isn't an identifier char anywhere we
+      // index; C++ scope is '::', JS members '.').
       const args: Record<string, unknown> = {};
       if (options.file) {
         args.file = options.file;
         if (name && name !== options.file) args.symbol = name;
-      } else if (name.includes('/')) {
-        args.file = name;
+      } else if (name.includes('/') || name.includes('\\')) {
+        args.file = name.replace(/\\/g, '/');
       } else {
         args.symbol = name;
         args.includeCode = true;

From 7ef2ea9c11a6a477212751cea2760fd76ddc9440 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Fri, 12 Jun 2026 00:36:20 -0500
Subject: [PATCH 50/51] docs(readme): MCP Tools table reflects the 4-tool
 default surface (#818) (#823)

The table still listed all 8 tools; it now shows the default four
(explore/node/search/callers, with node's Read-parity file mode), the
CODEGRAPH_MCP_TOOLS re-enable path + CLI equivalents for the unlisted
four, and the inactive-when-unindexed behavior (#817).

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 08ef5bc35..bb86a697b 100644
--- a/README.md
+++ b/README.md
@@ -507,18 +507,18 @@ fi
 
 ## MCP Tools
 
-When running as an MCP server, CodeGraph exposes these tools to Claude Code:
+When running as an MCP server, CodeGraph exposes a focused set of four tools — measured agent behavior showed a leaner list steers agents to the right tool and saves context every session:
 
 | Tool | Purpose |
 |------|---------|
 | `codegraph_explore` | **Primary.** Answer almost any question in one call — "how does X work", a flow ("how does X reach Y"), or surveying an area — returning the relevant symbols' verbatim source grouped by file, plus a relationship map and blast radius. Surfaces dynamic-dispatch hops (callbacks, React re-render, interface→impl) grep can't follow. |
+| `codegraph_node` | One symbol's full source + caller/callee trail (every overload for an ambiguous name) — or pass a file path to **read a whole file like the Read tool** (same line-numbered output, `offset`/`limit`), with its dependents attached. |
 | `codegraph_search` | Find symbols by name across the codebase |
-| `codegraph_callers` | Find what calls a function |
-| `codegraph_callees` | Find what a function calls |
-| `codegraph_impact` | Analyze what code is affected by changing a symbol |
-| `codegraph_node` | Get one specific symbol's details + full source (returns every overload for an ambiguous name) |
-| `codegraph_files` | Get indexed file structure (faster than filesystem scanning) |
-| `codegraph_status` | Check index health and statistics |
+| `codegraph_callers` | Every call site of a function — including where it's registered as a callback — with one section per definition when several share a name |
+
+Four more tools (`codegraph_callees`, `codegraph_impact`, `codegraph_files`, `codegraph_status`) stay fully functional but unlisted by default — measured across eval runs, agents never or rarely picked them, and their information already arrives inline on the four above (explore's blast-radius section, node's dependents note, a symbol's body as its callee list). Re-enable any of them with the `CODEGRAPH_MCP_TOOLS` environment variable (e.g. `CODEGRAPH_MCP_TOOLS=explore,node,search,callers,impact`), or use their CLI equivalents (`codegraph callees` / `impact` / `files` / `status`).
+
+In a workspace with no `.codegraph/` index, the server announces itself inactive and lists **no** tools — agents work normally with their built-in tools, and indexing stays your decision.
 
 ---
 

From 7db4c1d2f868fe4ee306dbe86234a706d0f30c66 Mon Sep 17 00:00:00 2001
From: Colby Mchenry <me@colbymchenry.com>
Date: Fri, 12 Jun 2026 00:43:32 -0500
Subject: [PATCH 51/51] test(mcp): unindexed suite teardown survives Windows
 file locking (#824)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spawn-based tests failed on Windows with EPERM in afterEach — the
SIGKILL'd server child briefly holds the temp cwd/SQLite handles when
rmSync runs (the documented class that fails mcp-initialize/mcp-roots
teardowns). Await the child's exit (3s cap) and retry the removal
(maxRetries/retryDelay); assertions were already passing on Windows.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 __tests__/mcp-unindexed.test.ts | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/__tests__/mcp-unindexed.test.ts b/__tests__/mcp-unindexed.test.ts
index 52b4d1ccb..2b0019d6d 100644
--- a/__tests__/mcp-unindexed.test.ts
+++ b/__tests__/mcp-unindexed.test.ts
@@ -27,7 +27,12 @@ function spawnServer(cwd: string): ChildProcessWithoutNullStreams {
     // Direct (in-process) mode — the unindexed path never has a daemon
     // anyway (the daemon socket lives in .codegraph/), and this keeps the
     // suite from leaking a detached daemon in the indexed test.
-    env: { ...process.env, CODEGRAPH_NO_DAEMON: '1' },
+    // CODEGRAPH_WASM_RELAUNCHED skips the --liftoff-only re-exec: without
+    // it the server runs as a GRANDCHILD that survives child.kill() on
+    // Windows and holds the temp cwd/SQLite handles, failing teardown with
+    // EPERM no matter how long rmSync retries (the class documented for
+    // the mcp-initialize/mcp-roots suites).
+    env: { ...process.env, CODEGRAPH_NO_DAEMON: '1', CODEGRAPH_WASM_RELAUNCHED: '1' },
   }) as ChildProcessWithoutNullStreams;
 }
 
@@ -85,12 +90,20 @@ describe('Unindexed-workspace session policy', () => {
     tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-unindexed-'));
   });
 
-  afterEach(() => {
+  afterEach(async () => {
     if (child) {
+      // Wait for the child to actually exit before removing its cwd — on
+      // Windows a just-killed process briefly holds the directory/SQLite
+      // handles, and an immediate rmSync fails the teardown with EPERM
+      // (the documented file-locking class that fails the sibling
+      // mcp-initialize/mcp-roots suites). kill + await exit + retried
+      // removal keeps this suite green on Windows.
+      const exited = new Promise<void>((resolve) => child!.once('exit', () => resolve()));
       child.kill('SIGKILL');
+      await Promise.race([exited, new Promise((r) => setTimeout(r, 3000))]);
       child = null;
     }
-    fs.rmSync(tempDir, { recursive: true, force: true });
+    fs.rmSync(tempDir, { recursive: true, force: true, maxRetries: 10, retryDelay: 200 });
   });
 
   it('initialize returns the short "inactive" instructions, not the playbook', async () => {