From 6837f22e071f3ce71b555878d9cf706c15f9bf0f Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Tue, 23 Jun 2026 02:18:08 +0000 Subject: [PATCH 1/4] fix(agent/agentcontext): match codex instruction-file discovery Instruction-file resolution over-injected context: the resolver walked the working directory recursively, matched filenames case-insensitively, and emitted symlinked duplicates. Resolving the repo root produced six instruction sources (AGENTS.md plus its CLAUDE.md and .cursorrules symlinks, a nested site/AGENTS.md and site/CLAUDE.md, and the lower-case docs/reference/api/agents.md API doc) that amounted to one file's worth of guidance. Align with codex, which finds the project root by walking up to .git and reads at most one AGENTS.md per ancestor directory rather than descending into subdirectories: - Recognize instruction files only at a scan root's top level. Skills and .mcp.json keep recursive discovery, since they are a Coder extension with no codex equivalent. - Match instruction basenames case-sensitively, so a lower-case agents.md is not mistaken for an instruction file. - Attribute each resource to its resolved symlink target so CLAUDE.md and .cursorrules symlinked to AGENTS.md collapse into one resource via the existing ID-based dedup instead of shipping identical content multiple times. On resolve failure the original path is kept so the error points at the offending link. Resolving the repo root now yields a single instruction resource. --- agent/agentcontext/doc.go | 6 +- agent/agentcontext/resolve.go | 60 +++++++++++--- agent/agentcontext/resolve_test.go | 129 +++++++++++++++++++++-------- 3 files changed, 151 insertions(+), 44 deletions(-) diff --git a/agent/agentcontext/doc.go b/agent/agentcontext/doc.go index 5ed7ebf043af5..03bbdd6b98d91 100644 --- a/agent/agentcontext/doc.go +++ b/agent/agentcontext/doc.go @@ -9,7 +9,11 @@ // built-in defaults. // - A resolver that classifies files under each scan root into // typed Resources (instruction files, skills, MCP configs, -// MCP servers). +// MCP servers). Instruction files (AGENTS.md, CLAUDE.md, +// .cursorrules) are recognized only at the top level of a +// scan root, mirroring codex, which does not descend into +// subdirectories to collect nested instruction files. Skills +// and MCP configs are discovered recursively. // - A unified recursive fsnotify watcher that signals a // re-resolve when any recognized file changes. // - An HTTP API at /api/v0/context/sources for source CRUD diff --git a/agent/agentcontext/resolve.go b/agent/agentcontext/resolve.go index 86d3f4002dade..e9bbd5a7e975f 100644 --- a/agent/agentcontext/resolve.go +++ b/agent/agentcontext/resolve.go @@ -45,8 +45,12 @@ const ( // File-name conventions recognized by the v1 resolver. var ( - // instructionFileNames are picked up from any scan root. - // Matching is case-insensitive on the basename. + // instructionFileNames are picked up from the top level of a + // scan root. Matching is case-sensitive on the basename, + // mirroring codex: it keys on the exact name "AGENTS.md" and + // never case-folds, so a lower-case agents.md (for example a + // generated API reference doc) is not mistaken for an + // instruction file. instructionFileNames = []string{ "AGENTS.md", "CLAUDE.md", @@ -77,16 +81,27 @@ var skipDirNames = map[string]struct{}{ } // recognizedInstructionFile reports whether name is one of the -// instruction-file conventions, case-insensitively. +// instruction-file conventions. Matching is case-sensitive: +// codex keys on the exact basename "AGENTS.md", so a lower-case +// agents.md is intentionally not recognized. func recognizedInstructionFile(name string) bool { for _, candidate := range instructionFileNames { - if strings.EqualFold(name, candidate) { + if name == candidate { return true } } return false } +// atScanRoot reports whether path sits directly at scanRoot: +// either path is the scan root itself (a single-file root) or +// its parent directory is the scan root. Instruction files are +// recognized only at this top level, never deeper in the tree, +// mirroring codex's lack of downward recursion. +func atScanRoot(path, scanRoot string) bool { + return path == scanRoot || filepath.Dir(path) == scanRoot +} + // Resolver walks one or more scan roots and produces a snapshot // of every recognized resource it finds. The Resolver is // stateless; the Manager owns the scan-root list and orchestrates @@ -246,7 +261,11 @@ func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Reso continue } if !info.IsDir() { - // Single-file roots are classified directly. + // Single-file roots are classified directly. A + // file that is itself the scan root counts as + // top-level (see atScanRoot), so instruction + // files added as an explicit source are still + // recognized. if res, ok := r.classifyFile(root.Path, root.Path, info, root.UserSource); ok { if _, dup := seenID[res.ID]; !dup { seenID[res.ID] = struct{}{} @@ -278,10 +297,11 @@ func (r *Resolver) walkDir(ctx context.Context, root ScanRoot, out *[]Resource, if err != nil { // Surface the error as Unreadable when we can // associate it with a single recognized file; - // otherwise let the walk continue. + // otherwise let the walk continue. Instruction + // files only count at the scan-root top level. if d != nil && !d.IsDir() { kind, recognized := kindFromFilename(d.Name()) - if recognized { + if recognized && (kind != KindInstructionFile || atScanRoot(path, root.Path)) { res := Resource{ ID: resourceID(kind, path), Kind: kind, @@ -403,10 +423,18 @@ func resolveReadTarget(path string, info fs.FileInfo, scanRoot string) (readPath // classifyFile inspects a single file path and produces a // Resource when the basename matches a recognized convention. +// Instruction files are recognized only when path sits directly +// at the scan root (see atScanRoot), because codex does not +// descend into subdirectories to collect nested AGENTS.md files. +// Skills and MCP configs are a Coder extension and remain +// discoverable at any depth. func (r *Resolver) classifyFile(scanRoot, path string, info fs.FileInfo, userSource string) (Resource, bool) { name := info.Name() switch { case recognizedInstructionFile(name): + if !atScanRoot(path, scanRoot) { + return Resource{}, false + } return r.readInstructionFile(scanRoot, path, info, userSource), true case name == mcpConfigFileName: return r.readMCPConfig(scanRoot, path, info, userSource), true @@ -529,14 +557,26 @@ func validateMCPConfig(data []byte) error { // post-processing (e.g. firstLine for instruction files) by // inspecting Status==StatusOK. func (r *Resolver) readFileResource(kind ResourceKind, scanRoot, path string, info fs.FileInfo, userSource string) Resource { + readPath, readInfo, ok, status, errMsg := resolveReadTarget(path, info, scanRoot) + // Attribute the resource to the resolved target rather than + // the path we walked. When several names point at the same + // file (e.g. CLAUDE.md and .cursorrules symlinked to + // AGENTS.md), they share an ID and collapse to a single + // resource via the walk's ID-based dedup, instead of shipping + // identical content multiple times. On resolve failure the + // original path is kept so the error points at the offending + // link. + idPath := path + if ok { + idPath = readPath + } res := Resource{ - ID: resourceID(kind, path), + ID: resourceID(kind, idPath), Kind: kind, - Source: path, + Source: idPath, SizeBytes: safeUint64(info.Size()), SourcePath: userSource, } - readPath, readInfo, ok, status, errMsg := resolveReadTarget(path, info, scanRoot) if !ok { res.Status = status res.Error = errMsg diff --git a/agent/agentcontext/resolve_test.go b/agent/agentcontext/resolve_test.go index 9c6213d21bedc..9aa4cdb1a3fe5 100644 --- a/agent/agentcontext/resolve_test.go +++ b/agent/agentcontext/resolve_test.go @@ -54,7 +54,11 @@ func TestResolver_ProjectAGENTSFile(t *testing.T) { require.NotEqual(t, [32]byte{}, got.ContentHash) } -func TestResolver_CaseInsensitiveInstructionNames(t *testing.T) { +// TestResolver_InstructionNamesAreCaseSensitive verifies the +// resolver matches instruction filenames exactly, mirroring +// codex. A lower-case agents.md (for example a generated API +// reference doc) must not be mistaken for an instruction file. +func TestResolver_InstructionNamesAreCaseSensitive(t *testing.T) { t.Parallel() dir := t.TempDir() mustWriteFile(t, filepath.Join(dir, "agents.md"), "lower\n") @@ -63,7 +67,8 @@ func TestResolver_CaseInsensitiveInstructionNames(t *testing.T) { r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) - require.Len(t, snap.Resources, 2) + require.Len(t, snap.Resources, 1) + require.Equal(t, filepath.Join(dir, "CLAUDE.md"), snap.Resources[0].Source) } func TestResolver_SkillsContainerEmitsEachSubdir(t *testing.T) { @@ -150,10 +155,11 @@ func TestResolver_MCPConfigEmitted(t *testing.T) { } // TestResolver_SymlinkInsideScanRootAllowed exercises the -// monorepo case where AGENTS.md is symlinked to shared content -// inside the same workspace tree. The target lives under the -// scan root, so the resolver follows the symlink and emits the -// target bytes as if the symlink were a regular file. +// monorepo case where a top-level AGENTS.md is symlinked to +// shared content elsewhere inside the same workspace tree. The +// target lives under the scan root, so the resolver follows the +// symlink, emits the target bytes, and attributes the resource +// to the resolved target path. func TestResolver_SymlinkInsideScanRootAllowed(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("symlinks require admin privileges on Windows runners") @@ -169,15 +175,59 @@ func TestResolver_SymlinkInsideScanRootAllowed(t *testing.T) { r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) - require.Len(t, snap.Resources, 2) - var linked agentcontext.Resource - for _, res := range snap.Resources { - if res.Source == link { - linked = res - } + // The nested target is not independently recognized (only the + // top-level symlink is), so exactly one resource is emitted, + // carrying the target bytes and attributed to the target. + require.Len(t, snap.Resources, 1) + got := snap.Resources[0] + require.Equal(t, agentcontext.StatusOK, got.Status) + require.Equal(t, target, got.Source) + require.Equal(t, "shared monorepo guidance", string(got.Payload)) +} + +// TestResolver_SymlinkedInstructionFilesDeduplicated reproduces +// the common repo layout where CLAUDE.md and .cursorrules are +// symlinks to a single AGENTS.md. All three resolve to the same +// file, so the resolver must emit one instruction resource +// attributed to the real AGENTS.md rather than three copies of +// identical content. +func TestResolver_SymlinkedInstructionFilesDeduplicated(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("symlinks require admin privileges on Windows runners") } - require.Equal(t, agentcontext.StatusOK, linked.Status) - require.Equal(t, "shared monorepo guidance", string(linked.Payload)) + t.Parallel() + dir := t.TempDir() + agents := filepath.Join(dir, "AGENTS.md") + mustWriteFile(t, agents, "the one true guidance") + require.NoError(t, os.Symlink(agents, filepath.Join(dir, "CLAUDE.md"))) + require.NoError(t, os.Symlink(agents, filepath.Join(dir, ".cursorrules"))) + + r := &agentcontext.Resolver{} + snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + + require.Len(t, snap.Resources, 1) + got := snap.Resources[0] + require.Equal(t, agentcontext.KindInstructionFile, got.Kind) + require.Equal(t, agents, got.Source) + require.Equal(t, "the one true guidance", string(got.Payload)) +} + +// TestResolver_InstructionFilesOnlyAtScanRoot verifies the +// resolver does not descend into subdirectories to collect +// nested instruction files, mirroring codex. A nested +// site/AGENTS.md is ignored while the top-level one is kept. +func TestResolver_InstructionFilesOnlyAtScanRoot(t *testing.T) { + t.Parallel() + dir := t.TempDir() + mustWriteFile(t, filepath.Join(dir, "AGENTS.md"), "root") + mustWriteFile(t, filepath.Join(dir, "site", "AGENTS.md"), "nested") + + r := &agentcontext.Resolver{} + snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + + require.Len(t, snap.Resources, 1) + require.Equal(t, filepath.Join(dir, "AGENTS.md"), snap.Resources[0].Source) + require.Equal(t, "root", string(snap.Resources[0].Payload)) } // TestResolver_SymlinkOutsideScanRootRejected guards the @@ -250,18 +300,23 @@ func TestResolver_OversizeInstructionFile(t *testing.T) { func TestResolver_AggregateCapExcludes(t *testing.T) { t.Parallel() - dir := t.TempDir() - mustWriteFile(t, filepath.Join(dir, "AGENTS.md"), "small") - subA := filepath.Join(dir, "a") - subB := filepath.Join(dir, "b") - mustWriteFile(t, filepath.Join(subA, "AGENTS.md"), "AAAA") - mustWriteFile(t, filepath.Join(subB, "AGENTS.md"), "BBBB") - - // Aggregate cap of 9 bytes lets the first two through but - // excludes the third regardless of which order they - // appear. + // Instruction files are only read at a scan root's top level, + // so each contributing file lives at its own scan root. + dirRoot := t.TempDir() + dirA := t.TempDir() + dirB := t.TempDir() + mustWriteFile(t, filepath.Join(dirRoot, "AGENTS.md"), "small") + mustWriteFile(t, filepath.Join(dirA, "AGENTS.md"), "AAAA") + mustWriteFile(t, filepath.Join(dirB, "AGENTS.md"), "BBBB") + + // Aggregate cap of 9 bytes lets two of the three (5+4) bytes + // through but excludes the third regardless of order. r := &agentcontext.Resolver{MaxSnapshotBytes: 9} - snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + snap := r.Resolve([]agentcontext.ScanRoot{ + {Path: dirRoot}, + {Path: dirA}, + {Path: dirB}, + }) var excluded int for _, res := range snap.Resources { @@ -274,14 +329,17 @@ func TestResolver_AggregateCapExcludes(t *testing.T) { func TestResolver_CountCapExcludes(t *testing.T) { t.Parallel() - dir := t.TempDir() + // Instruction files are only read at a scan root's top level, + // so spread the five files across five scan roots. + roots := make([]agentcontext.ScanRoot, 0, 5) for i := 0; i < 5; i++ { - sub := filepath.Join(dir, "dir", string('a'+rune(i))) - mustWriteFile(t, filepath.Join(sub, "AGENTS.md"), "x") + d := t.TempDir() + mustWriteFile(t, filepath.Join(d, "AGENTS.md"), "x") + roots = append(roots, agentcontext.ScanRoot{Path: d}) } r := &agentcontext.Resolver{MaxResources: 3} - snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + snap := r.Resolve(roots) require.Len(t, snap.Resources, 5) var excluded int @@ -296,15 +354,20 @@ func TestResolver_CountCapExcludes(t *testing.T) { func TestResolver_SkipsVendorAndNodeModules(t *testing.T) { t.Parallel() dir := t.TempDir() - mustWriteFile(t, filepath.Join(dir, "AGENTS.md"), "root") - mustWriteFile(t, filepath.Join(dir, "node_modules", "deep", "AGENTS.md"), "should not appear") - mustWriteFile(t, filepath.Join(dir, "vendor", "AGENTS.md"), "should not appear either") + // MCP configs are discovered recursively, so they exercise + // the skip-dir logic that instruction files (top-level only) + // no longer reach. Configs under node_modules/ and vendor/ + // must be ignored while one in an ordinary subdirectory is + // still found. + mustWriteFile(t, filepath.Join(dir, "sub", ".mcp.json"), `{"mcpServers": {}}`) + mustWriteFile(t, filepath.Join(dir, "node_modules", "deep", ".mcp.json"), `{"mcpServers": {}}`) + mustWriteFile(t, filepath.Join(dir, "vendor", ".mcp.json"), `{"mcpServers": {}}`) r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) require.Len(t, snap.Resources, 1) - require.Equal(t, filepath.Join(dir, "AGENTS.md"), snap.Resources[0].Source) + require.Equal(t, filepath.Join(dir, "sub", ".mcp.json"), snap.Resources[0].Source) } func TestResolver_UserSourceAttribution(t *testing.T) { From 84b27cbc74b74669b4e0eb65bd5ee3382491bbe0 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Tue, 23 Jun 2026 03:16:34 +0000 Subject: [PATCH 2/4] refactor(agent/agentcontext): replace recursive scan with codex-style discovery The resolver previously walked the working directory recursively to depth 8 and matched any nested instruction file, .mcp.json, or skills/ directory. codex never walks the working tree downward: it reads fixed locations and walks up to the project root. Replace the recursive engine with codex-style discovery. - Manager walks up from the working dir to the nearest .git ancestor (a directory, or a worktree/submodule .git file) and feeds the root->cwd chain as separate scan roots. - Resolver inspects only each scan root's top level for instruction files and .mcp.json, and discovers skills from fixed container locations (skills, .agents/skills, .claude/skills, .codex/skills), one skill per immediate subdirectory. The recursive walkDir, skipDirNames, MaxScanDepth, and isSkillsContainer are removed. - Watcher mirrors the same fixed-location set instead of recursively watching every scan root, so it no longer walks node_modules and other large trees. .mcp.json is kept (no codex config.toml support). Resolving the repo root now yields AGENTS.md, .mcp.json, and the .agents/skills and .claude/skills skills, with no nested instruction-file noise. --- agent/agentcontext/doc.go | 22 ++- agent/agentcontext/export_test.go | 4 + agent/agentcontext/manager.go | 57 +++++- agent/agentcontext/manager_test.go | 65 +++++++ agent/agentcontext/resolve.go | 294 +++++++++++------------------ agent/agentcontext/resolve_test.go | 43 ++++- agent/agentcontext/watcher.go | 90 ++++----- 7 files changed, 321 insertions(+), 254 deletions(-) diff --git a/agent/agentcontext/doc.go b/agent/agentcontext/doc.go index 03bbdd6b98d91..3d775854c39fb 100644 --- a/agent/agentcontext/doc.go +++ b/agent/agentcontext/doc.go @@ -6,16 +6,18 @@ // "RFC: Workspace Context Sources for Coder Agents". It owns: // // - User-declared scan roots (Sources) layered on top of -// built-in defaults. -// - A resolver that classifies files under each scan root into -// typed Resources (instruction files, skills, MCP configs, -// MCP servers). Instruction files (AGENTS.md, CLAUDE.md, -// .cursorrules) are recognized only at the top level of a -// scan root, mirroring codex, which does not descend into -// subdirectories to collect nested instruction files. Skills -// and MCP configs are discovered recursively. -// - A unified recursive fsnotify watcher that signals a -// re-resolve when any recognized file changes. +// built-in defaults, plus the project ancestor chain from the +// git root down to the working directory. +// - A resolver that classifies files at fixed locations under +// each scan root into typed Resources (instruction files, +// skills, MCP configs, MCP servers). Discovery mirrors codex: +// instruction files (AGENTS.md, CLAUDE.md, .cursorrules) and +// .mcp.json are read only at a scan root's top level, skills +// only from fixed container directories (skills, .agents/skills, +// .claude/skills, .codex/skills), and the resolver never walks +// the working directory tree downward. +// - A fixed-location fsnotify watcher that signals a re-resolve +// when any recognized file changes. // - An HTTP API at /api/v0/context/sources for source CRUD // and /api/v0/context/resync for synchronous push barriers. // - A Pusher abstraction so the latest Snapshot can be shipped diff --git a/agent/agentcontext/export_test.go b/agent/agentcontext/export_test.go index 3d7da1e96cbba..1b969ecc4d77e 100644 --- a/agent/agentcontext/export_test.go +++ b/agent/agentcontext/export_test.go @@ -5,3 +5,7 @@ package agentcontext // this signal; the agent calls Run synchronously after wiring // the Manager. Tests use it to coordinate without polling. func ManagerStarted(m *Manager) <-chan struct{} { return m.started() } + +// ProjectChainForTest exposes the unexported walk-up helper so +// external _test packages can assert the ancestor chain directly. +func ProjectChainForTest(workingDir string) []string { return projectChain(workingDir) } diff --git a/agent/agentcontext/manager.go b/agent/agentcontext/manager.go index e61367bdc7178..cd9765570c63b 100644 --- a/agent/agentcontext/manager.go +++ b/agent/agentcontext/manager.go @@ -2,6 +2,9 @@ package agentcontext import ( "context" + "os" + "path/filepath" + "slices" "strings" "sync" "time" @@ -219,7 +222,6 @@ func (m *Manager) Run(ctx context.Context) error { Logger: m.logger.Named("watcher"), Clock: m.clock, Debounce: m.debounce, - MaxDepth: m.resolver.MaxDepth, OnChange: m.signal, }) if err != nil { @@ -554,6 +556,50 @@ func (m *Manager) Trigger() { m.signal() } +// projectMarker is the directory entry that bounds the +// instruction-file walk-up. Matching codex, discovery climbs from +// the working directory to the nearest ancestor that contains a +// .git entry and treats that as the project root. The entry may be +// a directory (a normal clone) or a file (a worktree or submodule +// gitdir pointer), so both are accepted. +const projectMarker = ".git" + +// projectChain returns the directories to scan for project-scoped +// context, ordered from the project root down to workingDir +// inclusive. The project root is the nearest ancestor of +// workingDir (inclusive) that contains a .git entry. When no such +// ancestor exists the chain is just workingDir, so discovery never +// climbs above the workspace into unrelated parent directories. +func projectChain(workingDir string) []string { + workingDir = filepath.Clean(workingDir) + root := workingDir + for dir := workingDir; ; { + if _, err := os.Lstat(filepath.Join(dir, projectMarker)); err == nil { + root = dir + break + } + parent := filepath.Dir(dir) + if parent == dir { + // Reached the filesystem root without a marker; + // scan only the working directory. + return []string{workingDir} + } + dir = parent + } + // Collect cwd -> root, then reverse to root -> cwd so callers + // see ancestors before descendants. + var chain []string + for dir := workingDir; ; { + chain = append(chain, dir) + if dir == root { + break + } + dir = filepath.Dir(dir) + } + slices.Reverse(chain) + return chain +} + // scanRootsLocked returns the list of ScanRoots to feed the // resolver and watcher. The Manager's mutex must be held. func (m *Manager) scanRootsLocked() []ScanRoot { @@ -561,7 +607,14 @@ func (m *Manager) scanRootsLocked() []ScanRoot { out := make([]ScanRoot, 0, 1+len(builtinRoots)+len(m.sources)) if m.workingDir != nil { if wd := strings.TrimSpace(m.workingDir()); wd != "" { - out = append(out, ScanRoot{Path: wd}) + // Walk up to the project root and scan each + // directory from the root down to the working dir, + // mirroring codex. Each chain dir is its own scan + // root, so the resolver reads instruction files and + // .mcp.json at every ancestor without descending. + for _, dir := range projectChain(wd) { + out = append(out, ScanRoot{Path: dir}) + } } } for _, r := range builtinRoots { diff --git a/agent/agentcontext/manager_test.go b/agent/agentcontext/manager_test.go index 08ce803ead9e9..eda62dc7448f6 100644 --- a/agent/agentcontext/manager_test.go +++ b/agent/agentcontext/manager_test.go @@ -437,3 +437,68 @@ func TestManager_MCPResourcesAppliesToSnapshot(t *testing.T) { } require.True(t, found, "expected MCP server resource in snapshot") } + +func TestProjectChain_WalksUpToGitRoot(t *testing.T) { + t.Parallel() + root := testutil.TempDirResolved(t) + require.NoError(t, os.MkdirAll(filepath.Join(root, ".git"), 0o755)) + cwd := filepath.Join(root, "a", "b") + require.NoError(t, os.MkdirAll(cwd, 0o755)) + + require.Equal(t, []string{ + root, + filepath.Join(root, "a"), + cwd, + }, agentcontext.ProjectChainForTest(cwd)) +} + +func TestProjectChain_GitFileBoundaryIsHonored(t *testing.T) { + t.Parallel() + // A worktree or submodule has .git as a file, not a directory. + root := testutil.TempDirResolved(t) + require.NoError(t, os.WriteFile(filepath.Join(root, ".git"), []byte("gitdir: elsewhere"), 0o600)) + cwd := filepath.Join(root, "sub") + require.NoError(t, os.MkdirAll(cwd, 0o755)) + + require.Equal(t, []string{root, cwd}, agentcontext.ProjectChainForTest(cwd)) +} + +func TestProjectChain_NoGitFallsBackToWorkingDir(t *testing.T) { + t.Parallel() + cwd := filepath.Join(testutil.TempDirResolved(t), "x", "y") + require.NoError(t, os.MkdirAll(cwd, 0o755)) + + require.Equal(t, []string{cwd}, agentcontext.ProjectChainForTest(cwd)) +} + +// TestManager_WalkUpReadsAncestorInstructionFiles confirms the +// Manager scans every directory from the git root down to the +// working directory, and ignores instruction files in sibling +// subtrees that are not on the chain. +func TestManager_WalkUpReadsAncestorInstructionFiles(t *testing.T) { + t.Parallel() + root := testutil.TempDirResolved(t) + require.NoError(t, os.MkdirAll(filepath.Join(root, ".git"), 0o755)) + mustWriteFile(t, filepath.Join(root, "AGENTS.md"), "root rules") + cwd := filepath.Join(root, "service") + require.NoError(t, os.MkdirAll(cwd, 0o755)) + mustWriteFile(t, filepath.Join(cwd, "AGENTS.md"), "service rules") + // A sibling subtree is not on the root->cwd chain. + mustWriteFile(t, filepath.Join(root, "other", "AGENTS.md"), "other rules") + + m := newTestManager(t, agentcontext.ManagerOptions{ + WorkingDir: func() string { return cwd }, + }) + + snap := m.Snapshot() + var sources []string + for _, r := range snap.Resources { + if r.Kind == agentcontext.KindInstructionFile { + sources = append(sources, r.Source) + } + } + require.ElementsMatch(t, []string{ + filepath.Join(root, "AGENTS.md"), + filepath.Join(cwd, "AGENTS.md"), + }, sources) +} diff --git a/agent/agentcontext/resolve.go b/agent/agentcontext/resolve.go index e9bbd5a7e975f..6bd26051b9050 100644 --- a/agent/agentcontext/resolve.go +++ b/agent/agentcontext/resolve.go @@ -5,7 +5,6 @@ import ( "context" "crypto/sha256" "encoding/json" - "errors" "fmt" "io" "io/fs" @@ -36,11 +35,6 @@ const ( // DefaultMaxResources is the resource count cap. Resources // past this cap are emitted with Status == StatusExcluded. DefaultMaxResources = 500 - // DefaultMaxScanDepth bounds how deep the recursive walk - // descends from each scan root. The default avoids runaway - // scans in node_modules / vendor / .git trees while still - // covering realistic monorepo layouts. - DefaultMaxScanDepth = 8 ) // File-name conventions recognized by the v1 resolver. @@ -56,28 +50,24 @@ var ( "CLAUDE.md", ".cursorrules", } - // mcpConfigFileName is recognized at any depth under a - // scan root. + // mcpConfigFileName is recognized at the top level of a scan + // root only, not at arbitrary depth. mcpConfigFileName = ".mcp.json" // skillMetaFileName is the file inside a skill directory // that carries the skill front-matter. skillMetaFileName = "SKILL.md" ) -// skipDirNames are directory basenames that the recursive walk -// never descends into. The list mirrors what most language -// tool-chains treat as opaque. -var skipDirNames = map[string]struct{}{ - ".git": {}, - ".hg": {}, - ".svn": {}, - "node_modules": {}, - "vendor": {}, - "target": {}, - "dist": {}, - "build": {}, - ".venv": {}, - "__pycache__": {}, +// skillContainerRelPaths are the directories, relative to a scan +// root, under which skills are discovered. A skill is an immediate +// subdirectory of a container that holds a SKILL.md. The list +// covers the cross-tool conventions Coder supports; codex itself +// uses .agents/skills and .codex/skills. +var skillContainerRelPaths = []string{ + "skills", + filepath.Join(".agents", "skills"), + filepath.Join(".claude", "skills"), + filepath.Join(".codex", "skills"), } // recognizedInstructionFile reports whether name is one of the @@ -93,13 +83,24 @@ func recognizedInstructionFile(name string) bool { return false } -// atScanRoot reports whether path sits directly at scanRoot: -// either path is the scan root itself (a single-file root) or -// its parent directory is the scan root. Instruction files are -// recognized only at this top level, never deeper in the tree, -// mirroring codex's lack of downward recursion. -func atScanRoot(path, scanRoot string) bool { - return path == scanRoot || filepath.Dir(path) == scanRoot +// skillContainersFor returns the existing skill-container +// directories reachable from rootPath without recursing the tree: +// rootPath itself when it is already a "skills" directory, plus +// each skillContainerRelPaths entry that exists. Skills live in +// the immediate children of a container, so the resolver and the +// watcher both stop here. +func skillContainersFor(rootPath string) []string { + var out []string + if filepath.Base(rootPath) == "skills" { + out = append(out, rootPath) + } + for _, rel := range skillContainerRelPaths { + container := filepath.Join(rootPath, rel) + if info, err := os.Stat(container); err == nil && info.IsDir() { + out = append(out, container) + } + } + return out } // Resolver walks one or more scan roots and produces a snapshot @@ -116,9 +117,6 @@ type Resolver struct { // MaxResources caps the resource count. Use // DefaultMaxResources if zero. MaxResources int - // MaxDepth caps the directory walk depth. Use - // DefaultMaxScanDepth if zero. - MaxDepth int // MCPResources, when non-nil, is consulted after the // filesystem pass and returns the KindMCPServer resources // for live MCP servers. It must not block: the resolver @@ -217,15 +215,20 @@ func (r *Resolver) normalize() *Resolver { if out.MaxResources == 0 { out.MaxResources = DefaultMaxResources } - if out.MaxDepth == 0 { - out.MaxDepth = DefaultMaxScanDepth - } return &out } -// walk traverses every scan root and produces an unordered -// resource list. Aggregate caps are applied separately. The ctx -// is checked between roots so callers can bail out promptly. +// walk visits every scan root and produces an unordered resource +// list. Aggregate caps are applied separately. The ctx is checked +// between roots so callers can bail out promptly. +// +// Discovery is deliberately shallow. codex never walks the working +// directory downward; it reads fixed locations and walks up to the +// project root. The Manager performs the walk-up by feeding the +// root->cwd chain as separate scan roots, and for each root the +// resolver inspects only that directory's top level (instruction +// files and .mcp.json) plus a fixed set of skill-container +// locations. func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Resource, snapErrs []string) { // Dedup roots by canonical path. The first occurrence // wins so user-added roots that overlap with a built-in @@ -243,136 +246,89 @@ func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Reso dedup = append(dedup, root) } - // Deduplicate resources across roots by ID. Without this, - // a built-in root and a user root that both cover the - // same project tree would double-count AGENTS.md. + // Deduplicate resources across roots by ID so two roots that + // resolve to the same file (e.g. overlapping ancestors, or a + // built-in root nested under a project root) do not + // double-count it. seenID := make(map[string]struct{}) for _, root := range dedup { if err := ctx.Err(); err != nil { return nil, []string{err.Error()} } - info, err := os.Stat(root.Path) - if err != nil { - // Missing roots silently fall through. The user - // either added a path that does not exist yet or - // removed it later. The watcher will surface - // re-creation as a change event. - continue - } - if !info.IsDir() { - // Single-file roots are classified directly. A - // file that is itself the scan root counts as - // top-level (see atScanRoot), so instruction - // files added as an explicit source are still - // recognized. - if res, ok := r.classifyFile(root.Path, root.Path, info, root.UserSource); ok { - if _, dup := seenID[res.ID]; !dup { - seenID[res.ID] = struct{}{} - resources = append(resources, res) - } - } - continue - } - walkErr := r.walkDir(ctx, root, &resources, seenID) - if walkErr != nil { - snapErrs = append(snapErrs, fmt.Sprintf("walk %q: %s", root.Path, walkErr)) - } + r.discoverIn(root, &resources, seenID) } return resources, snapErrs } -// walkDir performs the recursive descent for a single scan -// directory. It honors r.MaxDepth and skipDirNames. The ctx is -// checked inside the WalkDir callback so cancellation -// terminates the walk even mid-root. -func (r *Resolver) walkDir(ctx context.Context, root ScanRoot, out *[]Resource, seenID map[string]struct{}) error { - rootDepth := strings.Count(filepath.Clean(root.Path), string(os.PathSeparator)) - maxDepth := rootDepth + r.MaxDepth - - return filepath.WalkDir(root.Path, func(path string, d fs.DirEntry, err error) error { - if ctxErr := ctx.Err(); ctxErr != nil { - return ctxErr - } - if err != nil { - // Surface the error as Unreadable when we can - // associate it with a single recognized file; - // otherwise let the walk continue. Instruction - // files only count at the scan-root top level. - if d != nil && !d.IsDir() { - kind, recognized := kindFromFilename(d.Name()) - if recognized && (kind != KindInstructionFile || atScanRoot(path, root.Path)) { - res := Resource{ - ID: resourceID(kind, path), - Kind: kind, - Source: path, - SizeBytes: 0, - Status: StatusUnreadable, - Error: err.Error(), - SourcePath: root.UserSource, - } - if _, dup := seenID[res.ID]; !dup { - seenID[res.ID] = struct{}{} - *out = append(*out, res) - } - } - } - if errors.Is(err, fs.ErrPermission) { - // Permission errors on a directory: skip the - // subtree but continue walking siblings. - if d != nil && d.IsDir() { - return fs.SkipDir - } - } - return nil +// discoverIn inspects a single scan root. A root that points at a +// file is classified directly. A directory root contributes its +// top-level instruction files and .mcp.json plus skills from the +// fixed container locations under it. The walk goes no deeper. +func (r *Resolver) discoverIn(root ScanRoot, out *[]Resource, seenID map[string]struct{}) { + info, err := os.Stat(root.Path) + if err != nil { + // Missing roots silently fall through. The user either + // added a path that does not exist yet or removed it + // later; the watcher surfaces re-creation as a change. + return + } + if !info.IsDir() { + if res, ok := r.classifyFile(root.Path, root.Path, info, root.UserSource); ok { + appendResource(out, seenID, res) } + return + } + r.discoverTopLevelFiles(root, out, seenID) + for _, container := range skillContainersFor(root.Path) { + r.emitSkillsFromContainer(container, root, out, seenID) + } +} - if d.IsDir() { - if strings.Count(path, string(os.PathSeparator)) > maxDepth { - return fs.SkipDir - } - if _, skip := skipDirNames[d.Name()]; skip && path != root.Path { - return fs.SkipDir - } - // If we are entering a "skills container" - // directory (".agents/skills", "~/.coder/skills", - // "plugins//skills"), eagerly emit skill - // resources for its immediate subdirectories. - if isSkillsContainer(path) { - r.emitSkillsFromContainer(path, root, out, seenID) - } - return nil +// discoverTopLevelFiles classifies the instruction files and +// .mcp.json that sit directly in root.Path. Nested files are +// ignored: instruction files and MCP configs are recognized only +// at a scan root's top level. +func (r *Resolver) discoverTopLevelFiles(root ScanRoot, out *[]Resource, seenID map[string]struct{}) { + entries, err := os.ReadDir(root.Path) + if err != nil { + return + } + for _, e := range entries { + name := e.Name() + isInstruction := recognizedInstructionFile(name) + if !isInstruction && name != mcpConfigFileName { + continue } - - // Regular file. - info, statErr := d.Info() - if statErr != nil { - return nil + // A directory that happens to share a recognized basename + // is not a resource. resolveReadTarget separately rejects + // symlinks whose targets are not regular files. + if e.IsDir() { + continue } - if res, ok := r.classifyFile(root.Path, path, info, root.UserSource); ok { - if _, dup := seenID[res.ID]; dup { - return nil - } - seenID[res.ID] = struct{}{} - *out = append(*out, res) + info, err := e.Info() + if err != nil { + continue } - return nil - }) + path := filepath.Join(root.Path, name) + var res Resource + if isInstruction { + res = r.readInstructionFile(root.Path, path, info, root.UserSource) + } else { + res = r.readMCPConfig(root.Path, path, info, root.UserSource) + } + appendResource(out, seenID, res) + } } -// kindFromFilename maps a file basename to its ResourceKind. -// recognized=false when the name matches no convention. -func kindFromFilename(name string) (kind ResourceKind, recognized bool) { - switch { - case recognizedInstructionFile(name): - return KindInstructionFile, true - case name == mcpConfigFileName: - return KindMCPConfig, true - case name == skillMetaFileName: - return KindSkill, true - default: - return 0, false +// appendResource adds res to out unless an earlier resource +// already claimed its ID. +func appendResource(out *[]Resource, seenID map[string]struct{}, res Resource) { + if _, dup := seenID[res.ID]; dup { + return } + seenID[res.ID] = struct{}{} + *out = append(*out, res) } // resolveReadTarget produces the path and FileInfo that should @@ -421,31 +377,22 @@ func resolveReadTarget(path string, info fs.FileInfo, scanRoot string) (readPath return target, tgtInfo, true, StatusOK, "" } -// classifyFile inspects a single file path and produces a +// classifyFile inspects a single-file scan root and produces a // Resource when the basename matches a recognized convention. -// Instruction files are recognized only when path sits directly -// at the scan root (see atScanRoot), because codex does not -// descend into subdirectories to collect nested AGENTS.md files. -// Skills and MCP configs are a Coder extension and remain -// discoverable at any depth. +// Directory roots are handled by discoverIn; this is reached only +// for sources that point directly at a file. func (r *Resolver) classifyFile(scanRoot, path string, info fs.FileInfo, userSource string) (Resource, bool) { name := info.Name() switch { case recognizedInstructionFile(name): - if !atScanRoot(path, scanRoot) { - return Resource{}, false - } return r.readInstructionFile(scanRoot, path, info, userSource), true case name == mcpConfigFileName: return r.readMCPConfig(scanRoot, path, info, userSource), true case name == skillMetaFileName: - // SKILL.md outside a skills container is still a - // valid skill if its parent directory name matches - // the front-matter name. emitSkillsFromContainer - // already handles the common case; here we cover - // "user adds a single SKILL.md file as a source". - res, ok := r.readSkillMeta(scanRoot, path, info, userSource) - return res, ok + // SKILL.md as an explicit single-file source is still a + // valid skill when its parent directory name matches the + // front-matter name. + return r.readSkillMeta(scanRoot, path, info, userSource) default: return Resource{}, false } @@ -689,11 +636,7 @@ func (r *Resolver) emitSkillsFromContainer(container string, root ScanRoot, out if !ok { continue } - if _, dup := seenID[res.ID]; dup { - continue - } - seenID[res.ID] = struct{}{} - *out = append(*out, res) + appendResource(out, seenID, res) } } @@ -785,15 +728,6 @@ func excluded(r Resource, reason string) Resource { return r } -// isSkillsContainer reports whether dir is a recognized skills -// container directory whose immediate children carry SKILL.md -// files. Both bare "skills" and nested "/skills" -// directories qualify (e.g. ".agents/skills", -// "plugins/foo/skills"). -func isSkillsContainer(dir string) bool { - return filepath.Base(dir) == "skills" -} - // resourceID builds a stable resource ID. Kind plus canonical // source path is enough; sources never collide across kinds for // v1 because each kind owns a distinct file-name pattern. diff --git a/agent/agentcontext/resolve_test.go b/agent/agentcontext/resolve_test.go index 9aa4cdb1a3fe5..0254a113c4d1c 100644 --- a/agent/agentcontext/resolve_test.go +++ b/agent/agentcontext/resolve_test.go @@ -351,23 +351,48 @@ func TestResolver_CountCapExcludes(t *testing.T) { require.Equal(t, 2, excluded) } -func TestResolver_SkipsVendorAndNodeModules(t *testing.T) { +// TestResolver_MCPConfigOnlyAtScanRoot verifies that .mcp.json is +// recognized only at a scan root's top level. A nested config is +// ignored because the resolver no longer walks the tree. +func TestResolver_MCPConfigOnlyAtScanRoot(t *testing.T) { t.Parallel() dir := t.TempDir() - // MCP configs are discovered recursively, so they exercise - // the skip-dir logic that instruction files (top-level only) - // no longer reach. Configs under node_modules/ and vendor/ - // must be ignored while one in an ordinary subdirectory is - // still found. + mustWriteFile(t, filepath.Join(dir, ".mcp.json"), `{"mcpServers": {}}`) mustWriteFile(t, filepath.Join(dir, "sub", ".mcp.json"), `{"mcpServers": {}}`) - mustWriteFile(t, filepath.Join(dir, "node_modules", "deep", ".mcp.json"), `{"mcpServers": {}}`) - mustWriteFile(t, filepath.Join(dir, "vendor", ".mcp.json"), `{"mcpServers": {}}`) r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) require.Len(t, snap.Resources, 1) - require.Equal(t, filepath.Join(dir, "sub", ".mcp.json"), snap.Resources[0].Source) + require.Equal(t, agentcontext.KindMCPConfig, snap.Resources[0].Kind) + require.Equal(t, filepath.Join(dir, ".mcp.json"), snap.Resources[0].Source) +} + +// TestResolver_SkillsOnlyFromFixedContainers verifies skills are +// discovered from the fixed container locations (skills, +// .agents/skills, .claude/skills, .codex/skills) and never from an +// arbitrary skills/ directory nested elsewhere in the tree. +func TestResolver_SkillsOnlyFromFixedContainers(t *testing.T) { + t.Parallel() + dir := t.TempDir() + mustWriteSkill(t, filepath.Join(dir, "skills"), "water-plants", "p") + mustWriteSkill(t, filepath.Join(dir, ".agents", "skills"), "make-coffee", "c") + mustWriteSkill(t, filepath.Join(dir, ".claude", "skills"), "fold-laundry", "l") + mustWriteSkill(t, filepath.Join(dir, ".codex", "skills"), "walk-dog", "d") + // A skills/ directory buried under an arbitrary path is not a + // fixed container location and must be ignored. + mustWriteSkill(t, filepath.Join(dir, "pkg", "skills"), "buried", "b") + + r := &agentcontext.Resolver{} + snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + + var names []string + for _, res := range snap.Resources { + require.Equal(t, agentcontext.KindSkill, res.Kind) + names = append(names, filepath.Base(res.Source)) + } + require.ElementsMatch(t, + []string{"water-plants", "make-coffee", "fold-laundry", "walk-dog"}, names) } func TestResolver_UserSourceAttribution(t *testing.T) { diff --git a/agent/agentcontext/watcher.go b/agent/agentcontext/watcher.go index 4a4836e4adb73..ec1e4bb5a8c0b 100644 --- a/agent/agentcontext/watcher.go +++ b/agent/agentcontext/watcher.go @@ -3,10 +3,8 @@ package agentcontext import ( "context" "errors" - "io/fs" "os" "path/filepath" - "strings" "sync" "syscall" "time" @@ -24,33 +22,27 @@ import ( // uses so behavior is consistent across the agent. const DefaultWatchDebounce = 250 * time.Millisecond -// WatcherOptions parameterizes the recursive watcher. +// WatcherOptions parameterizes the watcher. type WatcherOptions struct { Logger slog.Logger Clock quartz.Clock Debounce time.Duration - // MaxDepth caps the recursion depth when discovering - // subdirectories to watch. Zero defaults to - // DefaultMaxScanDepth. Callers wiring the watcher to a - // Resolver should pass the resolver's MaxDepth so the - // watcher never misses edits below the scan horizon. - MaxDepth int // OnChange runs at most once per debounce window. The // caller must not block; the recommended pattern is a // non-blocking send on a re-resolve trigger channel. OnChange func() } -// Watcher is a recursive fsnotify wrapper. fsnotify does not -// support recursive watches natively on Linux, so we walk every -// scan root at sync time and register each subdirectory -// individually. Inotify ENOSPC degrades the watcher into a -// poll-only mode that still re-resolves on Sync calls. +// Watcher is a fixed-location fsnotify wrapper. It watches only +// the directories that can hold recognized resources (each scan +// root plus its skill containers and immediate skill dirs) rather +// than walking the tree, mirroring the resolver's fixed-location +// discovery. Inotify ENOSPC degrades the watcher into a poll-only +// mode that still re-resolves on Sync calls. type Watcher struct { logger slog.Logger clock quartz.Clock debounce time.Duration - maxDepth int onChange func() mu sync.Mutex @@ -77,10 +69,6 @@ func NewWatcher(opts WatcherOptions) (*Watcher, error) { if clock == nil { clock = quartz.NewReal() } - maxDepth := opts.MaxDepth - if maxDepth <= 0 { - maxDepth = DefaultMaxScanDepth - } w, err := fsnotify.NewWatcher() if err != nil { @@ -92,7 +80,6 @@ func NewWatcher(opts WatcherOptions) (*Watcher, error) { logger: opts.Logger, clock: clock, debounce: debounce, - maxDepth: maxDepth, onChange: opts.OnChange, watched: make(map[string]struct{}), degraded: "fsnotify init failed: " + err.Error(), @@ -106,7 +93,6 @@ func NewWatcher(opts WatcherOptions) (*Watcher, error) { logger: opts.Logger, clock: clock, debounce: debounce, - maxDepth: maxDepth, onChange: opts.OnChange, watcher: w, watched: make(map[string]struct{}), @@ -135,17 +121,18 @@ func (w *Watcher) Degraded() string { return w.degraded } -// Sync replaces the set of watched directories with a fresh -// recursive walk of every scan root. Files are not watched -// directly; watching the parent directory catches creates, -// renames, removes, and writes that touch any recognized -// basename. Files that are themselves scan roots are handled by -// watching their parent. +// Sync replaces the set of watched directories with the fixed +// locations that can hold recognized resources: each scan root, +// its skill containers, and the immediate skill subdirectories. +// Files are not watched directly; watching the parent directory +// catches creates, renames, removes, and writes that touch any +// recognized basename. Files that are themselves scan roots are +// handled by watching their parent. // // Sync is idempotent and safe to call repeatedly. The lock is -// released around the recursive directory walk so concurrent -// Close, schedule, and the run goroutine are not blocked by a -// slow filesystem. +// released around the directory scan so concurrent Close, +// schedule, and the run goroutine are not blocked by a slow +// filesystem. func (w *Watcher) Sync(ctx context.Context, roots []ScanRoot) { w.mu.Lock() if w.closed { @@ -168,9 +155,9 @@ func (w *Watcher) Sync(ctx context.Context, roots []ScanRoot) { } w.mu.Unlock() - // collectDirs touches the filesystem (filepath.WalkDir on - // every scan root). Compute the desired set outside the - // mutex so a slow walk does not block the run goroutine, + // collectDirs touches the filesystem (stat/ReadDir on every + // scan root and skill container). Compute the desired set + // outside the mutex so it does not block the run goroutine, // Close, or schedule. desired := w.collectDirs(roots) @@ -324,10 +311,13 @@ func (w *Watcher) schedule() { w.mu.Unlock() } -// collectDirs walks every scan root and returns the set of -// directories to watch. The maximum depth uses the watcher's -// configured maxDepth so it mirrors the resolver's horizon. -func (w *Watcher) collectDirs(roots []ScanRoot) map[string]struct{} { +// collectDirs returns the set of directories to watch. Discovery +// is fixed-location, mirroring the resolver: for each scan root we +// watch the root directory itself (catching top-level instruction +// and .mcp.json changes), plus every existing skill container and +// its immediate skill subdirectories (catching skill add/remove +// and SKILL.md writes). The watcher never recurses the tree. +func (*Watcher) collectDirs(roots []ScanRoot) map[string]struct{} { out := make(map[string]struct{}) for _, root := range roots { if root.Path == "" { @@ -346,25 +336,19 @@ func (w *Watcher) collectDirs(roots []ScanRoot) map[string]struct{} { out[filepath.Dir(root.Path)] = struct{}{} continue } - // Walk the directory and collect every descendant - // directory up to the depth cap. - rootDepth := strings.Count(filepath.Clean(root.Path), string(os.PathSeparator)) - _ = filepath.WalkDir(root.Path, func(path string, d fs.DirEntry, err error) error { + out[root.Path] = struct{}{} + for _, container := range skillContainersFor(root.Path) { + out[container] = struct{}{} + entries, err := os.ReadDir(container) if err != nil { - return nil - } - if !d.IsDir() { - return nil - } - if _, skip := skipDirNames[d.Name()]; skip && path != root.Path { - return fs.SkipDir + continue } - if strings.Count(path, string(os.PathSeparator))-rootDepth > w.maxDepth { - return fs.SkipDir + for _, e := range entries { + if e.IsDir() { + out[filepath.Join(container, e.Name())] = struct{}{} + } } - out[path] = struct{}{} - return nil - }) + } } return out } From cda7f002dc98f267bdab69f72f1b1a455e0b3d00 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Tue, 23 Jun 2026 03:22:06 +0000 Subject: [PATCH 3/4] test(agent/agentcontext): cover user sources not walking up to git root --- agent/agentcontext/manager_test.go | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/agent/agentcontext/manager_test.go b/agent/agentcontext/manager_test.go index eda62dc7448f6..b16aca95bd3b6 100644 --- a/agent/agentcontext/manager_test.go +++ b/agent/agentcontext/manager_test.go @@ -502,3 +502,39 @@ func TestManager_WalkUpReadsAncestorInstructionFiles(t *testing.T) { filepath.Join(cwd, "AGENTS.md"), }, sources) } + +// TestManager_UserSourceDoesNotWalkUp confirms an explicitly added +// source is scanned at exactly that directory. Walk-up to the +// project (.git) root applies only to the working directory, so +// adding a subdirectory such as ./site inside a repo scans ./site +// itself and does not pull in the repo-root instruction files. +func TestManager_UserSourceDoesNotWalkUp(t *testing.T) { + t.Parallel() + // repo is a git project with a root AGENTS.md and a nested + // site/AGENTS.md. The user adds the site subdirectory directly. + repo := testutil.TempDirResolved(t) + require.NoError(t, os.MkdirAll(filepath.Join(repo, ".git"), 0o755)) + mustWriteFile(t, filepath.Join(repo, "AGENTS.md"), "root rules") + site := filepath.Join(repo, "site") + mustWriteFile(t, filepath.Join(site, "AGENTS.md"), "site rules") + + // The working directory is unrelated and contributes nothing, + // isolating the user-source behavior under test. + wd := testutil.TempDirResolved(t) + + m := newTestManager(t, agentcontext.ManagerOptions{ + WorkingDir: func() string { return wd }, + InitialSources: []agentcontext.Source{{Path: site}}, + }) + + snap := m.Snapshot() + var sources []string + for _, r := range snap.Resources { + if r.Kind == agentcontext.KindInstructionFile { + sources = append(sources, r.Source) + } + } + // Only the added subdirectory's AGENTS.md is present; the + // repo-root AGENTS.md is absent, proving no walk-up occurred. + require.Equal(t, []string{filepath.Join(site, "AGENTS.md")}, sources) +} From f062097284f14e9cf95358177171233a6ad25956 Mon Sep 17 00:00:00 2001 From: Kyle Carberry Date: Tue, 23 Jun 2026 03:30:52 +0000 Subject: [PATCH 4/4] refactor(agent/agentcontext): drop project-root walk-up, scan working dir directly --- agent/agentcontext/doc.go | 7 +-- agent/agentcontext/export_test.go | 4 -- agent/agentcontext/manager.go | 63 +++------------------ agent/agentcontext/manager_test.go | 90 ++++-------------------------- agent/agentcontext/resolve.go | 9 ++- 5 files changed, 25 insertions(+), 148 deletions(-) diff --git a/agent/agentcontext/doc.go b/agent/agentcontext/doc.go index 3d775854c39fb..63cbedab41a57 100644 --- a/agent/agentcontext/doc.go +++ b/agent/agentcontext/doc.go @@ -6,16 +6,15 @@ // "RFC: Workspace Context Sources for Coder Agents". It owns: // // - User-declared scan roots (Sources) layered on top of -// built-in defaults, plus the project ancestor chain from the -// git root down to the working directory. +// built-in defaults and the working directory. // - A resolver that classifies files at fixed locations under // each scan root into typed Resources (instruction files, -// skills, MCP configs, MCP servers). Discovery mirrors codex: +// skills, MCP configs, MCP servers). Discovery is shallow: // instruction files (AGENTS.md, CLAUDE.md, .cursorrules) and // .mcp.json are read only at a scan root's top level, skills // only from fixed container directories (skills, .agents/skills, // .claude/skills, .codex/skills), and the resolver never walks -// the working directory tree downward. +// the tree downward or up to a parent directory. // - A fixed-location fsnotify watcher that signals a re-resolve // when any recognized file changes. // - An HTTP API at /api/v0/context/sources for source CRUD diff --git a/agent/agentcontext/export_test.go b/agent/agentcontext/export_test.go index 1b969ecc4d77e..3d7da1e96cbba 100644 --- a/agent/agentcontext/export_test.go +++ b/agent/agentcontext/export_test.go @@ -5,7 +5,3 @@ package agentcontext // this signal; the agent calls Run synchronously after wiring // the Manager. Tests use it to coordinate without polling. func ManagerStarted(m *Manager) <-chan struct{} { return m.started() } - -// ProjectChainForTest exposes the unexported walk-up helper so -// external _test packages can assert the ancestor chain directly. -func ProjectChainForTest(workingDir string) []string { return projectChain(workingDir) } diff --git a/agent/agentcontext/manager.go b/agent/agentcontext/manager.go index cd9765570c63b..6dfeb5ee0cc79 100644 --- a/agent/agentcontext/manager.go +++ b/agent/agentcontext/manager.go @@ -2,9 +2,6 @@ package agentcontext import ( "context" - "os" - "path/filepath" - "slices" "strings" "sync" "time" @@ -556,50 +553,6 @@ func (m *Manager) Trigger() { m.signal() } -// projectMarker is the directory entry that bounds the -// instruction-file walk-up. Matching codex, discovery climbs from -// the working directory to the nearest ancestor that contains a -// .git entry and treats that as the project root. The entry may be -// a directory (a normal clone) or a file (a worktree or submodule -// gitdir pointer), so both are accepted. -const projectMarker = ".git" - -// projectChain returns the directories to scan for project-scoped -// context, ordered from the project root down to workingDir -// inclusive. The project root is the nearest ancestor of -// workingDir (inclusive) that contains a .git entry. When no such -// ancestor exists the chain is just workingDir, so discovery never -// climbs above the workspace into unrelated parent directories. -func projectChain(workingDir string) []string { - workingDir = filepath.Clean(workingDir) - root := workingDir - for dir := workingDir; ; { - if _, err := os.Lstat(filepath.Join(dir, projectMarker)); err == nil { - root = dir - break - } - parent := filepath.Dir(dir) - if parent == dir { - // Reached the filesystem root without a marker; - // scan only the working directory. - return []string{workingDir} - } - dir = parent - } - // Collect cwd -> root, then reverse to root -> cwd so callers - // see ancestors before descendants. - var chain []string - for dir := workingDir; ; { - chain = append(chain, dir) - if dir == root { - break - } - dir = filepath.Dir(dir) - } - slices.Reverse(chain) - return chain -} - // scanRootsLocked returns the list of ScanRoots to feed the // resolver and watcher. The Manager's mutex must be held. func (m *Manager) scanRootsLocked() []ScanRoot { @@ -607,14 +560,14 @@ func (m *Manager) scanRootsLocked() []ScanRoot { out := make([]ScanRoot, 0, 1+len(builtinRoots)+len(m.sources)) if m.workingDir != nil { if wd := strings.TrimSpace(m.workingDir()); wd != "" { - // Walk up to the project root and scan each - // directory from the root down to the working dir, - // mirroring codex. Each chain dir is its own scan - // root, so the resolver reads instruction files and - // .mcp.json at every ancestor without descending. - for _, dir := range projectChain(wd) { - out = append(out, ScanRoot{Path: dir}) - } + // The working directory is a single scan root. The + // resolver reads its top-level instruction files and + // .mcp.json plus the fixed skill containers under it; + // it neither descends into subdirectories nor climbs + // to parent directories. Additional directories are + // added explicitly as Sources or via the seeding env + // vars. + out = append(out, ScanRoot{Path: wd}) } } for _, r := range builtinRoots { diff --git a/agent/agentcontext/manager_test.go b/agent/agentcontext/manager_test.go index b16aca95bd3b6..7f4883f70eef0 100644 --- a/agent/agentcontext/manager_test.go +++ b/agent/agentcontext/manager_test.go @@ -438,44 +438,11 @@ func TestManager_MCPResourcesAppliesToSnapshot(t *testing.T) { require.True(t, found, "expected MCP server resource in snapshot") } -func TestProjectChain_WalksUpToGitRoot(t *testing.T) { - t.Parallel() - root := testutil.TempDirResolved(t) - require.NoError(t, os.MkdirAll(filepath.Join(root, ".git"), 0o755)) - cwd := filepath.Join(root, "a", "b") - require.NoError(t, os.MkdirAll(cwd, 0o755)) - - require.Equal(t, []string{ - root, - filepath.Join(root, "a"), - cwd, - }, agentcontext.ProjectChainForTest(cwd)) -} - -func TestProjectChain_GitFileBoundaryIsHonored(t *testing.T) { - t.Parallel() - // A worktree or submodule has .git as a file, not a directory. - root := testutil.TempDirResolved(t) - require.NoError(t, os.WriteFile(filepath.Join(root, ".git"), []byte("gitdir: elsewhere"), 0o600)) - cwd := filepath.Join(root, "sub") - require.NoError(t, os.MkdirAll(cwd, 0o755)) - - require.Equal(t, []string{root, cwd}, agentcontext.ProjectChainForTest(cwd)) -} - -func TestProjectChain_NoGitFallsBackToWorkingDir(t *testing.T) { - t.Parallel() - cwd := filepath.Join(testutil.TempDirResolved(t), "x", "y") - require.NoError(t, os.MkdirAll(cwd, 0o755)) - - require.Equal(t, []string{cwd}, agentcontext.ProjectChainForTest(cwd)) -} - -// TestManager_WalkUpReadsAncestorInstructionFiles confirms the -// Manager scans every directory from the git root down to the -// working directory, and ignores instruction files in sibling -// subtrees that are not on the chain. -func TestManager_WalkUpReadsAncestorInstructionFiles(t *testing.T) { +// TestManager_WorkingDirScannedShallow confirms the working +// directory is a single scan root: its top-level instruction files +// are read, but the resolver neither climbs to an ancestor (no +// walk-up to a .git project root) nor descends into subdirectories. +func TestManager_WorkingDirScannedShallow(t *testing.T) { t.Parallel() root := testutil.TempDirResolved(t) require.NoError(t, os.MkdirAll(filepath.Join(root, ".git"), 0o755)) @@ -483,8 +450,8 @@ func TestManager_WalkUpReadsAncestorInstructionFiles(t *testing.T) { cwd := filepath.Join(root, "service") require.NoError(t, os.MkdirAll(cwd, 0o755)) mustWriteFile(t, filepath.Join(cwd, "AGENTS.md"), "service rules") - // A sibling subtree is not on the root->cwd chain. - mustWriteFile(t, filepath.Join(root, "other", "AGENTS.md"), "other rules") + // A subdirectory below the working dir must not be descended. + mustWriteFile(t, filepath.Join(cwd, "nested", "AGENTS.md"), "nested rules") m := newTestManager(t, agentcontext.ManagerOptions{ WorkingDir: func() string { return cwd }, @@ -497,44 +464,7 @@ func TestManager_WalkUpReadsAncestorInstructionFiles(t *testing.T) { sources = append(sources, r.Source) } } - require.ElementsMatch(t, []string{ - filepath.Join(root, "AGENTS.md"), - filepath.Join(cwd, "AGENTS.md"), - }, sources) -} - -// TestManager_UserSourceDoesNotWalkUp confirms an explicitly added -// source is scanned at exactly that directory. Walk-up to the -// project (.git) root applies only to the working directory, so -// adding a subdirectory such as ./site inside a repo scans ./site -// itself and does not pull in the repo-root instruction files. -func TestManager_UserSourceDoesNotWalkUp(t *testing.T) { - t.Parallel() - // repo is a git project with a root AGENTS.md and a nested - // site/AGENTS.md. The user adds the site subdirectory directly. - repo := testutil.TempDirResolved(t) - require.NoError(t, os.MkdirAll(filepath.Join(repo, ".git"), 0o755)) - mustWriteFile(t, filepath.Join(repo, "AGENTS.md"), "root rules") - site := filepath.Join(repo, "site") - mustWriteFile(t, filepath.Join(site, "AGENTS.md"), "site rules") - - // The working directory is unrelated and contributes nothing, - // isolating the user-source behavior under test. - wd := testutil.TempDirResolved(t) - - m := newTestManager(t, agentcontext.ManagerOptions{ - WorkingDir: func() string { return wd }, - InitialSources: []agentcontext.Source{{Path: site}}, - }) - - snap := m.Snapshot() - var sources []string - for _, r := range snap.Resources { - if r.Kind == agentcontext.KindInstructionFile { - sources = append(sources, r.Source) - } - } - // Only the added subdirectory's AGENTS.md is present; the - // repo-root AGENTS.md is absent, proving no walk-up occurred. - require.Equal(t, []string{filepath.Join(site, "AGENTS.md")}, sources) + // Only the working directory's own AGENTS.md is present: the + // ancestor root and the nested subdirectory are both excluded. + require.Equal(t, []string{filepath.Join(cwd, "AGENTS.md")}, sources) } diff --git a/agent/agentcontext/resolve.go b/agent/agentcontext/resolve.go index 6bd26051b9050..78bf234b2f011 100644 --- a/agent/agentcontext/resolve.go +++ b/agent/agentcontext/resolve.go @@ -222,13 +222,12 @@ func (r *Resolver) normalize() *Resolver { // list. Aggregate caps are applied separately. The ctx is checked // between roots so callers can bail out promptly. // -// Discovery is deliberately shallow. codex never walks the working -// directory downward; it reads fixed locations and walks up to the -// project root. The Manager performs the walk-up by feeding the -// root->cwd chain as separate scan roots, and for each root the +// Discovery is deliberately shallow. For each scan root the // resolver inspects only that directory's top level (instruction // files and .mcp.json) plus a fixed set of skill-container -// locations. +// locations under it. It never descends into subdirectories and +// never climbs to a parent directory; additional directories must +// be added explicitly as scan roots. func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Resource, snapErrs []string) { // Dedup roots by canonical path. The first occurrence // wins so user-added roots that overlap with a built-in