diff --git a/agent/agentcontext/doc.go b/agent/agentcontext/doc.go index 5ed7ebf043af5..63cbedab41a57 100644 --- a/agent/agentcontext/doc.go +++ b/agent/agentcontext/doc.go @@ -6,12 +6,17 @@ // "RFC: Workspace Context Sources for Coder Agents". It owns: // // - User-declared scan roots (Sources) layered on top of -// built-in defaults. -// - A resolver that classifies files under each scan root into -// typed Resources (instruction files, skills, MCP configs, -// MCP servers). -// - A unified recursive fsnotify watcher that signals a -// re-resolve when any recognized file changes. +// built-in defaults and the working directory. +// - A resolver that classifies files at fixed locations under +// each scan root into typed Resources (instruction files, +// skills, MCP configs, MCP servers). Discovery is shallow: +// instruction files (AGENTS.md, CLAUDE.md, .cursorrules) and +// .mcp.json are read only at a scan root's top level, skills +// only from fixed container directories (skills, .agents/skills, +// .claude/skills, .codex/skills), and the resolver never walks +// the tree downward or up to a parent directory. +// - A fixed-location fsnotify watcher that signals a re-resolve +// when any recognized file changes. // - An HTTP API at /api/v0/context/sources for source CRUD // and /api/v0/context/resync for synchronous push barriers. // - A Pusher abstraction so the latest Snapshot can be shipped diff --git a/agent/agentcontext/manager.go b/agent/agentcontext/manager.go index e61367bdc7178..6dfeb5ee0cc79 100644 --- a/agent/agentcontext/manager.go +++ b/agent/agentcontext/manager.go @@ -219,7 +219,6 @@ func (m *Manager) Run(ctx context.Context) error { Logger: m.logger.Named("watcher"), Clock: m.clock, Debounce: m.debounce, - MaxDepth: m.resolver.MaxDepth, OnChange: m.signal, }) if err != nil { @@ -561,6 +560,13 @@ func (m *Manager) scanRootsLocked() []ScanRoot { out := make([]ScanRoot, 0, 1+len(builtinRoots)+len(m.sources)) if m.workingDir != nil { if wd := strings.TrimSpace(m.workingDir()); wd != "" { + // The working directory is a single scan root. The + // resolver reads its top-level instruction files and + // .mcp.json plus the fixed skill containers under it; + // it neither descends into subdirectories nor climbs + // to parent directories. Additional directories are + // added explicitly as Sources or via the seeding env + // vars. out = append(out, ScanRoot{Path: wd}) } } diff --git a/agent/agentcontext/manager_test.go b/agent/agentcontext/manager_test.go index 08ce803ead9e9..7f4883f70eef0 100644 --- a/agent/agentcontext/manager_test.go +++ b/agent/agentcontext/manager_test.go @@ -437,3 +437,34 @@ func TestManager_MCPResourcesAppliesToSnapshot(t *testing.T) { } require.True(t, found, "expected MCP server resource in snapshot") } + +// TestManager_WorkingDirScannedShallow confirms the working +// directory is a single scan root: its top-level instruction files +// are read, but the resolver neither climbs to an ancestor (no +// walk-up to a .git project root) nor descends into subdirectories. +func TestManager_WorkingDirScannedShallow(t *testing.T) { + t.Parallel() + root := testutil.TempDirResolved(t) + require.NoError(t, os.MkdirAll(filepath.Join(root, ".git"), 0o755)) + mustWriteFile(t, filepath.Join(root, "AGENTS.md"), "root rules") + cwd := filepath.Join(root, "service") + require.NoError(t, os.MkdirAll(cwd, 0o755)) + mustWriteFile(t, filepath.Join(cwd, "AGENTS.md"), "service rules") + // A subdirectory below the working dir must not be descended. + mustWriteFile(t, filepath.Join(cwd, "nested", "AGENTS.md"), "nested rules") + + m := newTestManager(t, agentcontext.ManagerOptions{ + WorkingDir: func() string { return cwd }, + }) + + snap := m.Snapshot() + var sources []string + for _, r := range snap.Resources { + if r.Kind == agentcontext.KindInstructionFile { + sources = append(sources, r.Source) + } + } + // Only the working directory's own AGENTS.md is present: the + // ancestor root and the nested subdirectory are both excluded. + require.Equal(t, []string{filepath.Join(cwd, "AGENTS.md")}, sources) +} diff --git a/agent/agentcontext/resolve.go b/agent/agentcontext/resolve.go index 86d3f4002dade..78bf234b2f011 100644 --- a/agent/agentcontext/resolve.go +++ b/agent/agentcontext/resolve.go @@ -5,7 +5,6 @@ import ( "context" "crypto/sha256" "encoding/json" - "errors" "fmt" "io" "io/fs" @@ -36,57 +35,74 @@ const ( // DefaultMaxResources is the resource count cap. Resources // past this cap are emitted with Status == StatusExcluded. DefaultMaxResources = 500 - // DefaultMaxScanDepth bounds how deep the recursive walk - // descends from each scan root. The default avoids runaway - // scans in node_modules / vendor / .git trees while still - // covering realistic monorepo layouts. - DefaultMaxScanDepth = 8 ) // File-name conventions recognized by the v1 resolver. var ( - // instructionFileNames are picked up from any scan root. - // Matching is case-insensitive on the basename. + // instructionFileNames are picked up from the top level of a + // scan root. Matching is case-sensitive on the basename, + // mirroring codex: it keys on the exact name "AGENTS.md" and + // never case-folds, so a lower-case agents.md (for example a + // generated API reference doc) is not mistaken for an + // instruction file. instructionFileNames = []string{ "AGENTS.md", "CLAUDE.md", ".cursorrules", } - // mcpConfigFileName is recognized at any depth under a - // scan root. + // mcpConfigFileName is recognized at the top level of a scan + // root only, not at arbitrary depth. mcpConfigFileName = ".mcp.json" // skillMetaFileName is the file inside a skill directory // that carries the skill front-matter. skillMetaFileName = "SKILL.md" ) -// skipDirNames are directory basenames that the recursive walk -// never descends into. The list mirrors what most language -// tool-chains treat as opaque. -var skipDirNames = map[string]struct{}{ - ".git": {}, - ".hg": {}, - ".svn": {}, - "node_modules": {}, - "vendor": {}, - "target": {}, - "dist": {}, - "build": {}, - ".venv": {}, - "__pycache__": {}, +// skillContainerRelPaths are the directories, relative to a scan +// root, under which skills are discovered. A skill is an immediate +// subdirectory of a container that holds a SKILL.md. The list +// covers the cross-tool conventions Coder supports; codex itself +// uses .agents/skills and .codex/skills. +var skillContainerRelPaths = []string{ + "skills", + filepath.Join(".agents", "skills"), + filepath.Join(".claude", "skills"), + filepath.Join(".codex", "skills"), } // recognizedInstructionFile reports whether name is one of the -// instruction-file conventions, case-insensitively. +// instruction-file conventions. Matching is case-sensitive: +// codex keys on the exact basename "AGENTS.md", so a lower-case +// agents.md is intentionally not recognized. func recognizedInstructionFile(name string) bool { for _, candidate := range instructionFileNames { - if strings.EqualFold(name, candidate) { + if name == candidate { return true } } return false } +// skillContainersFor returns the existing skill-container +// directories reachable from rootPath without recursing the tree: +// rootPath itself when it is already a "skills" directory, plus +// each skillContainerRelPaths entry that exists. Skills live in +// the immediate children of a container, so the resolver and the +// watcher both stop here. +func skillContainersFor(rootPath string) []string { + var out []string + if filepath.Base(rootPath) == "skills" { + out = append(out, rootPath) + } + for _, rel := range skillContainerRelPaths { + container := filepath.Join(rootPath, rel) + if info, err := os.Stat(container); err == nil && info.IsDir() { + out = append(out, container) + } + } + return out +} + // Resolver walks one or more scan roots and produces a snapshot // of every recognized resource it finds. The Resolver is // stateless; the Manager owns the scan-root list and orchestrates @@ -101,9 +117,6 @@ type Resolver struct { // MaxResources caps the resource count. Use // DefaultMaxResources if zero. MaxResources int - // MaxDepth caps the directory walk depth. Use - // DefaultMaxScanDepth if zero. - MaxDepth int // MCPResources, when non-nil, is consulted after the // filesystem pass and returns the KindMCPServer resources // for live MCP servers. It must not block: the resolver @@ -202,15 +215,19 @@ func (r *Resolver) normalize() *Resolver { if out.MaxResources == 0 { out.MaxResources = DefaultMaxResources } - if out.MaxDepth == 0 { - out.MaxDepth = DefaultMaxScanDepth - } return &out } -// walk traverses every scan root and produces an unordered -// resource list. Aggregate caps are applied separately. The ctx -// is checked between roots so callers can bail out promptly. +// walk visits every scan root and produces an unordered resource +// list. Aggregate caps are applied separately. The ctx is checked +// between roots so callers can bail out promptly. +// +// Discovery is deliberately shallow. For each scan root the +// resolver inspects only that directory's top level (instruction +// files and .mcp.json) plus a fixed set of skill-container +// locations under it. It never descends into subdirectories and +// never climbs to a parent directory; additional directories must +// be added explicitly as scan roots. func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Resource, snapErrs []string) { // Dedup roots by canonical path. The first occurrence // wins so user-added roots that overlap with a built-in @@ -228,131 +245,89 @@ func (r *Resolver) walk(ctx context.Context, roots []ScanRoot) (resources []Reso dedup = append(dedup, root) } - // Deduplicate resources across roots by ID. Without this, - // a built-in root and a user root that both cover the - // same project tree would double-count AGENTS.md. + // Deduplicate resources across roots by ID so two roots that + // resolve to the same file (e.g. overlapping ancestors, or a + // built-in root nested under a project root) do not + // double-count it. seenID := make(map[string]struct{}) for _, root := range dedup { if err := ctx.Err(); err != nil { return nil, []string{err.Error()} } - info, err := os.Stat(root.Path) - if err != nil { - // Missing roots silently fall through. The user - // either added a path that does not exist yet or - // removed it later. The watcher will surface - // re-creation as a change event. - continue - } - if !info.IsDir() { - // Single-file roots are classified directly. - if res, ok := r.classifyFile(root.Path, root.Path, info, root.UserSource); ok { - if _, dup := seenID[res.ID]; !dup { - seenID[res.ID] = struct{}{} - resources = append(resources, res) - } - } - continue - } - walkErr := r.walkDir(ctx, root, &resources, seenID) - if walkErr != nil { - snapErrs = append(snapErrs, fmt.Sprintf("walk %q: %s", root.Path, walkErr)) - } + r.discoverIn(root, &resources, seenID) } return resources, snapErrs } -// walkDir performs the recursive descent for a single scan -// directory. It honors r.MaxDepth and skipDirNames. The ctx is -// checked inside the WalkDir callback so cancellation -// terminates the walk even mid-root. -func (r *Resolver) walkDir(ctx context.Context, root ScanRoot, out *[]Resource, seenID map[string]struct{}) error { - rootDepth := strings.Count(filepath.Clean(root.Path), string(os.PathSeparator)) - maxDepth := rootDepth + r.MaxDepth - - return filepath.WalkDir(root.Path, func(path string, d fs.DirEntry, err error) error { - if ctxErr := ctx.Err(); ctxErr != nil { - return ctxErr - } - if err != nil { - // Surface the error as Unreadable when we can - // associate it with a single recognized file; - // otherwise let the walk continue. - if d != nil && !d.IsDir() { - kind, recognized := kindFromFilename(d.Name()) - if recognized { - res := Resource{ - ID: resourceID(kind, path), - Kind: kind, - Source: path, - SizeBytes: 0, - Status: StatusUnreadable, - Error: err.Error(), - SourcePath: root.UserSource, - } - if _, dup := seenID[res.ID]; !dup { - seenID[res.ID] = struct{}{} - *out = append(*out, res) - } - } - } - if errors.Is(err, fs.ErrPermission) { - // Permission errors on a directory: skip the - // subtree but continue walking siblings. - if d != nil && d.IsDir() { - return fs.SkipDir - } - } - return nil +// discoverIn inspects a single scan root. A root that points at a +// file is classified directly. A directory root contributes its +// top-level instruction files and .mcp.json plus skills from the +// fixed container locations under it. The walk goes no deeper. +func (r *Resolver) discoverIn(root ScanRoot, out *[]Resource, seenID map[string]struct{}) { + info, err := os.Stat(root.Path) + if err != nil { + // Missing roots silently fall through. The user either + // added a path that does not exist yet or removed it + // later; the watcher surfaces re-creation as a change. + return + } + if !info.IsDir() { + if res, ok := r.classifyFile(root.Path, root.Path, info, root.UserSource); ok { + appendResource(out, seenID, res) } + return + } + r.discoverTopLevelFiles(root, out, seenID) + for _, container := range skillContainersFor(root.Path) { + r.emitSkillsFromContainer(container, root, out, seenID) + } +} - if d.IsDir() { - if strings.Count(path, string(os.PathSeparator)) > maxDepth { - return fs.SkipDir - } - if _, skip := skipDirNames[d.Name()]; skip && path != root.Path { - return fs.SkipDir - } - // If we are entering a "skills container" - // directory (".agents/skills", "~/.coder/skills", - // "plugins//skills"), eagerly emit skill - // resources for its immediate subdirectories. - if isSkillsContainer(path) { - r.emitSkillsFromContainer(path, root, out, seenID) - } - return nil +// discoverTopLevelFiles classifies the instruction files and +// .mcp.json that sit directly in root.Path. Nested files are +// ignored: instruction files and MCP configs are recognized only +// at a scan root's top level. +func (r *Resolver) discoverTopLevelFiles(root ScanRoot, out *[]Resource, seenID map[string]struct{}) { + entries, err := os.ReadDir(root.Path) + if err != nil { + return + } + for _, e := range entries { + name := e.Name() + isInstruction := recognizedInstructionFile(name) + if !isInstruction && name != mcpConfigFileName { + continue } - - // Regular file. - info, statErr := d.Info() - if statErr != nil { - return nil + // A directory that happens to share a recognized basename + // is not a resource. resolveReadTarget separately rejects + // symlinks whose targets are not regular files. + if e.IsDir() { + continue } - if res, ok := r.classifyFile(root.Path, path, info, root.UserSource); ok { - if _, dup := seenID[res.ID]; dup { - return nil - } - seenID[res.ID] = struct{}{} - *out = append(*out, res) + info, err := e.Info() + if err != nil { + continue } - return nil - }) + path := filepath.Join(root.Path, name) + var res Resource + if isInstruction { + res = r.readInstructionFile(root.Path, path, info, root.UserSource) + } else { + res = r.readMCPConfig(root.Path, path, info, root.UserSource) + } + appendResource(out, seenID, res) + } } -// kindFromFilename maps a file basename to its ResourceKind. -// recognized=false when the name matches no convention. -func kindFromFilename(name string) (kind ResourceKind, recognized bool) { - switch { - case recognizedInstructionFile(name): - return KindInstructionFile, true - case name == mcpConfigFileName: - return KindMCPConfig, true - case name == skillMetaFileName: - return KindSkill, true - default: - return 0, false +// appendResource adds res to out unless an earlier resource +// already claimed its ID. +func appendResource(out *[]Resource, seenID map[string]struct{}, res Resource) { + if _, dup := seenID[res.ID]; dup { + return } + seenID[res.ID] = struct{}{} + *out = append(*out, res) } // resolveReadTarget produces the path and FileInfo that should @@ -401,8 +376,10 @@ func resolveReadTarget(path string, info fs.FileInfo, scanRoot string) (readPath return target, tgtInfo, true, StatusOK, "" } -// classifyFile inspects a single file path and produces a +// classifyFile inspects a single-file scan root and produces a // Resource when the basename matches a recognized convention. +// Directory roots are handled by discoverIn; this is reached only +// for sources that point directly at a file. func (r *Resolver) classifyFile(scanRoot, path string, info fs.FileInfo, userSource string) (Resource, bool) { name := info.Name() switch { @@ -411,13 +388,10 @@ func (r *Resolver) classifyFile(scanRoot, path string, info fs.FileInfo, userSou case name == mcpConfigFileName: return r.readMCPConfig(scanRoot, path, info, userSource), true case name == skillMetaFileName: - // SKILL.md outside a skills container is still a - // valid skill if its parent directory name matches - // the front-matter name. emitSkillsFromContainer - // already handles the common case; here we cover - // "user adds a single SKILL.md file as a source". - res, ok := r.readSkillMeta(scanRoot, path, info, userSource) - return res, ok + // SKILL.md as an explicit single-file source is still a + // valid skill when its parent directory name matches the + // front-matter name. + return r.readSkillMeta(scanRoot, path, info, userSource) default: return Resource{}, false } @@ -529,14 +503,26 @@ func validateMCPConfig(data []byte) error { // post-processing (e.g. firstLine for instruction files) by // inspecting Status==StatusOK. func (r *Resolver) readFileResource(kind ResourceKind, scanRoot, path string, info fs.FileInfo, userSource string) Resource { + readPath, readInfo, ok, status, errMsg := resolveReadTarget(path, info, scanRoot) + // Attribute the resource to the resolved target rather than + // the path we walked. When several names point at the same + // file (e.g. CLAUDE.md and .cursorrules symlinked to + // AGENTS.md), they share an ID and collapse to a single + // resource via the walk's ID-based dedup, instead of shipping + // identical content multiple times. On resolve failure the + // original path is kept so the error points at the offending + // link. + idPath := path + if ok { + idPath = readPath + } res := Resource{ - ID: resourceID(kind, path), + ID: resourceID(kind, idPath), Kind: kind, - Source: path, + Source: idPath, SizeBytes: safeUint64(info.Size()), SourcePath: userSource, } - readPath, readInfo, ok, status, errMsg := resolveReadTarget(path, info, scanRoot) if !ok { res.Status = status res.Error = errMsg @@ -649,11 +635,7 @@ func (r *Resolver) emitSkillsFromContainer(container string, root ScanRoot, out if !ok { continue } - if _, dup := seenID[res.ID]; dup { - continue - } - seenID[res.ID] = struct{}{} - *out = append(*out, res) + appendResource(out, seenID, res) } } @@ -745,15 +727,6 @@ func excluded(r Resource, reason string) Resource { return r } -// isSkillsContainer reports whether dir is a recognized skills -// container directory whose immediate children carry SKILL.md -// files. Both bare "skills" and nested "/skills" -// directories qualify (e.g. ".agents/skills", -// "plugins/foo/skills"). -func isSkillsContainer(dir string) bool { - return filepath.Base(dir) == "skills" -} - // resourceID builds a stable resource ID. Kind plus canonical // source path is enough; sources never collide across kinds for // v1 because each kind owns a distinct file-name pattern. diff --git a/agent/agentcontext/resolve_test.go b/agent/agentcontext/resolve_test.go index 9c6213d21bedc..0254a113c4d1c 100644 --- a/agent/agentcontext/resolve_test.go +++ b/agent/agentcontext/resolve_test.go @@ -54,7 +54,11 @@ func TestResolver_ProjectAGENTSFile(t *testing.T) { require.NotEqual(t, [32]byte{}, got.ContentHash) } -func TestResolver_CaseInsensitiveInstructionNames(t *testing.T) { +// TestResolver_InstructionNamesAreCaseSensitive verifies the +// resolver matches instruction filenames exactly, mirroring +// codex. A lower-case agents.md (for example a generated API +// reference doc) must not be mistaken for an instruction file. +func TestResolver_InstructionNamesAreCaseSensitive(t *testing.T) { t.Parallel() dir := t.TempDir() mustWriteFile(t, filepath.Join(dir, "agents.md"), "lower\n") @@ -63,7 +67,8 @@ func TestResolver_CaseInsensitiveInstructionNames(t *testing.T) { r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) - require.Len(t, snap.Resources, 2) + require.Len(t, snap.Resources, 1) + require.Equal(t, filepath.Join(dir, "CLAUDE.md"), snap.Resources[0].Source) } func TestResolver_SkillsContainerEmitsEachSubdir(t *testing.T) { @@ -150,10 +155,11 @@ func TestResolver_MCPConfigEmitted(t *testing.T) { } // TestResolver_SymlinkInsideScanRootAllowed exercises the -// monorepo case where AGENTS.md is symlinked to shared content -// inside the same workspace tree. The target lives under the -// scan root, so the resolver follows the symlink and emits the -// target bytes as if the symlink were a regular file. +// monorepo case where a top-level AGENTS.md is symlinked to +// shared content elsewhere inside the same workspace tree. The +// target lives under the scan root, so the resolver follows the +// symlink, emits the target bytes, and attributes the resource +// to the resolved target path. func TestResolver_SymlinkInsideScanRootAllowed(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("symlinks require admin privileges on Windows runners") @@ -169,15 +175,59 @@ func TestResolver_SymlinkInsideScanRootAllowed(t *testing.T) { r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) - require.Len(t, snap.Resources, 2) - var linked agentcontext.Resource - for _, res := range snap.Resources { - if res.Source == link { - linked = res - } + // The nested target is not independently recognized (only the + // top-level symlink is), so exactly one resource is emitted, + // carrying the target bytes and attributed to the target. + require.Len(t, snap.Resources, 1) + got := snap.Resources[0] + require.Equal(t, agentcontext.StatusOK, got.Status) + require.Equal(t, target, got.Source) + require.Equal(t, "shared monorepo guidance", string(got.Payload)) +} + +// TestResolver_SymlinkedInstructionFilesDeduplicated reproduces +// the common repo layout where CLAUDE.md and .cursorrules are +// symlinks to a single AGENTS.md. All three resolve to the same +// file, so the resolver must emit one instruction resource +// attributed to the real AGENTS.md rather than three copies of +// identical content. +func TestResolver_SymlinkedInstructionFilesDeduplicated(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("symlinks require admin privileges on Windows runners") } - require.Equal(t, agentcontext.StatusOK, linked.Status) - require.Equal(t, "shared monorepo guidance", string(linked.Payload)) + t.Parallel() + dir := t.TempDir() + agents := filepath.Join(dir, "AGENTS.md") + mustWriteFile(t, agents, "the one true guidance") + require.NoError(t, os.Symlink(agents, filepath.Join(dir, "CLAUDE.md"))) + require.NoError(t, os.Symlink(agents, filepath.Join(dir, ".cursorrules"))) + + r := &agentcontext.Resolver{} + snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + + require.Len(t, snap.Resources, 1) + got := snap.Resources[0] + require.Equal(t, agentcontext.KindInstructionFile, got.Kind) + require.Equal(t, agents, got.Source) + require.Equal(t, "the one true guidance", string(got.Payload)) +} + +// TestResolver_InstructionFilesOnlyAtScanRoot verifies the +// resolver does not descend into subdirectories to collect +// nested instruction files, mirroring codex. A nested +// site/AGENTS.md is ignored while the top-level one is kept. +func TestResolver_InstructionFilesOnlyAtScanRoot(t *testing.T) { + t.Parallel() + dir := t.TempDir() + mustWriteFile(t, filepath.Join(dir, "AGENTS.md"), "root") + mustWriteFile(t, filepath.Join(dir, "site", "AGENTS.md"), "nested") + + r := &agentcontext.Resolver{} + snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + + require.Len(t, snap.Resources, 1) + require.Equal(t, filepath.Join(dir, "AGENTS.md"), snap.Resources[0].Source) + require.Equal(t, "root", string(snap.Resources[0].Payload)) } // TestResolver_SymlinkOutsideScanRootRejected guards the @@ -250,18 +300,23 @@ func TestResolver_OversizeInstructionFile(t *testing.T) { func TestResolver_AggregateCapExcludes(t *testing.T) { t.Parallel() - dir := t.TempDir() - mustWriteFile(t, filepath.Join(dir, "AGENTS.md"), "small") - subA := filepath.Join(dir, "a") - subB := filepath.Join(dir, "b") - mustWriteFile(t, filepath.Join(subA, "AGENTS.md"), "AAAA") - mustWriteFile(t, filepath.Join(subB, "AGENTS.md"), "BBBB") - - // Aggregate cap of 9 bytes lets the first two through but - // excludes the third regardless of which order they - // appear. + // Instruction files are only read at a scan root's top level, + // so each contributing file lives at its own scan root. + dirRoot := t.TempDir() + dirA := t.TempDir() + dirB := t.TempDir() + mustWriteFile(t, filepath.Join(dirRoot, "AGENTS.md"), "small") + mustWriteFile(t, filepath.Join(dirA, "AGENTS.md"), "AAAA") + mustWriteFile(t, filepath.Join(dirB, "AGENTS.md"), "BBBB") + + // Aggregate cap of 9 bytes lets two of the three (5+4) bytes + // through but excludes the third regardless of order. r := &agentcontext.Resolver{MaxSnapshotBytes: 9} - snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + snap := r.Resolve([]agentcontext.ScanRoot{ + {Path: dirRoot}, + {Path: dirA}, + {Path: dirB}, + }) var excluded int for _, res := range snap.Resources { @@ -274,14 +329,17 @@ func TestResolver_AggregateCapExcludes(t *testing.T) { func TestResolver_CountCapExcludes(t *testing.T) { t.Parallel() - dir := t.TempDir() + // Instruction files are only read at a scan root's top level, + // so spread the five files across five scan roots. + roots := make([]agentcontext.ScanRoot, 0, 5) for i := 0; i < 5; i++ { - sub := filepath.Join(dir, "dir", string('a'+rune(i))) - mustWriteFile(t, filepath.Join(sub, "AGENTS.md"), "x") + d := t.TempDir() + mustWriteFile(t, filepath.Join(d, "AGENTS.md"), "x") + roots = append(roots, agentcontext.ScanRoot{Path: d}) } r := &agentcontext.Resolver{MaxResources: 3} - snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + snap := r.Resolve(roots) require.Len(t, snap.Resources, 5) var excluded int @@ -293,18 +351,48 @@ func TestResolver_CountCapExcludes(t *testing.T) { require.Equal(t, 2, excluded) } -func TestResolver_SkipsVendorAndNodeModules(t *testing.T) { +// TestResolver_MCPConfigOnlyAtScanRoot verifies that .mcp.json is +// recognized only at a scan root's top level. A nested config is +// ignored because the resolver no longer walks the tree. +func TestResolver_MCPConfigOnlyAtScanRoot(t *testing.T) { t.Parallel() dir := t.TempDir() - mustWriteFile(t, filepath.Join(dir, "AGENTS.md"), "root") - mustWriteFile(t, filepath.Join(dir, "node_modules", "deep", "AGENTS.md"), "should not appear") - mustWriteFile(t, filepath.Join(dir, "vendor", "AGENTS.md"), "should not appear either") + mustWriteFile(t, filepath.Join(dir, ".mcp.json"), `{"mcpServers": {}}`) + mustWriteFile(t, filepath.Join(dir, "sub", ".mcp.json"), `{"mcpServers": {}}`) r := &agentcontext.Resolver{} snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) require.Len(t, snap.Resources, 1) - require.Equal(t, filepath.Join(dir, "AGENTS.md"), snap.Resources[0].Source) + require.Equal(t, agentcontext.KindMCPConfig, snap.Resources[0].Kind) + require.Equal(t, filepath.Join(dir, ".mcp.json"), snap.Resources[0].Source) +} + +// TestResolver_SkillsOnlyFromFixedContainers verifies skills are +// discovered from the fixed container locations (skills, +// .agents/skills, .claude/skills, .codex/skills) and never from an +// arbitrary skills/ directory nested elsewhere in the tree. +func TestResolver_SkillsOnlyFromFixedContainers(t *testing.T) { + t.Parallel() + dir := t.TempDir() + mustWriteSkill(t, filepath.Join(dir, "skills"), "water-plants", "p") + mustWriteSkill(t, filepath.Join(dir, ".agents", "skills"), "make-coffee", "c") + mustWriteSkill(t, filepath.Join(dir, ".claude", "skills"), "fold-laundry", "l") + mustWriteSkill(t, filepath.Join(dir, ".codex", "skills"), "walk-dog", "d") + // A skills/ directory buried under an arbitrary path is not a + // fixed container location and must be ignored. + mustWriteSkill(t, filepath.Join(dir, "pkg", "skills"), "buried", "b") + + r := &agentcontext.Resolver{} + snap := r.Resolve([]agentcontext.ScanRoot{{Path: dir}}) + + var names []string + for _, res := range snap.Resources { + require.Equal(t, agentcontext.KindSkill, res.Kind) + names = append(names, filepath.Base(res.Source)) + } + require.ElementsMatch(t, + []string{"water-plants", "make-coffee", "fold-laundry", "walk-dog"}, names) } func TestResolver_UserSourceAttribution(t *testing.T) { diff --git a/agent/agentcontext/watcher.go b/agent/agentcontext/watcher.go index 4a4836e4adb73..ec1e4bb5a8c0b 100644 --- a/agent/agentcontext/watcher.go +++ b/agent/agentcontext/watcher.go @@ -3,10 +3,8 @@ package agentcontext import ( "context" "errors" - "io/fs" "os" "path/filepath" - "strings" "sync" "syscall" "time" @@ -24,33 +22,27 @@ import ( // uses so behavior is consistent across the agent. const DefaultWatchDebounce = 250 * time.Millisecond -// WatcherOptions parameterizes the recursive watcher. +// WatcherOptions parameterizes the watcher. type WatcherOptions struct { Logger slog.Logger Clock quartz.Clock Debounce time.Duration - // MaxDepth caps the recursion depth when discovering - // subdirectories to watch. Zero defaults to - // DefaultMaxScanDepth. Callers wiring the watcher to a - // Resolver should pass the resolver's MaxDepth so the - // watcher never misses edits below the scan horizon. - MaxDepth int // OnChange runs at most once per debounce window. The // caller must not block; the recommended pattern is a // non-blocking send on a re-resolve trigger channel. OnChange func() } -// Watcher is a recursive fsnotify wrapper. fsnotify does not -// support recursive watches natively on Linux, so we walk every -// scan root at sync time and register each subdirectory -// individually. Inotify ENOSPC degrades the watcher into a -// poll-only mode that still re-resolves on Sync calls. +// Watcher is a fixed-location fsnotify wrapper. It watches only +// the directories that can hold recognized resources (each scan +// root plus its skill containers and immediate skill dirs) rather +// than walking the tree, mirroring the resolver's fixed-location +// discovery. Inotify ENOSPC degrades the watcher into a poll-only +// mode that still re-resolves on Sync calls. type Watcher struct { logger slog.Logger clock quartz.Clock debounce time.Duration - maxDepth int onChange func() mu sync.Mutex @@ -77,10 +69,6 @@ func NewWatcher(opts WatcherOptions) (*Watcher, error) { if clock == nil { clock = quartz.NewReal() } - maxDepth := opts.MaxDepth - if maxDepth <= 0 { - maxDepth = DefaultMaxScanDepth - } w, err := fsnotify.NewWatcher() if err != nil { @@ -92,7 +80,6 @@ func NewWatcher(opts WatcherOptions) (*Watcher, error) { logger: opts.Logger, clock: clock, debounce: debounce, - maxDepth: maxDepth, onChange: opts.OnChange, watched: make(map[string]struct{}), degraded: "fsnotify init failed: " + err.Error(), @@ -106,7 +93,6 @@ func NewWatcher(opts WatcherOptions) (*Watcher, error) { logger: opts.Logger, clock: clock, debounce: debounce, - maxDepth: maxDepth, onChange: opts.OnChange, watcher: w, watched: make(map[string]struct{}), @@ -135,17 +121,18 @@ func (w *Watcher) Degraded() string { return w.degraded } -// Sync replaces the set of watched directories with a fresh -// recursive walk of every scan root. Files are not watched -// directly; watching the parent directory catches creates, -// renames, removes, and writes that touch any recognized -// basename. Files that are themselves scan roots are handled by -// watching their parent. +// Sync replaces the set of watched directories with the fixed +// locations that can hold recognized resources: each scan root, +// its skill containers, and the immediate skill subdirectories. +// Files are not watched directly; watching the parent directory +// catches creates, renames, removes, and writes that touch any +// recognized basename. Files that are themselves scan roots are +// handled by watching their parent. // // Sync is idempotent and safe to call repeatedly. The lock is -// released around the recursive directory walk so concurrent -// Close, schedule, and the run goroutine are not blocked by a -// slow filesystem. +// released around the directory scan so concurrent Close, +// schedule, and the run goroutine are not blocked by a slow +// filesystem. func (w *Watcher) Sync(ctx context.Context, roots []ScanRoot) { w.mu.Lock() if w.closed { @@ -168,9 +155,9 @@ func (w *Watcher) Sync(ctx context.Context, roots []ScanRoot) { } w.mu.Unlock() - // collectDirs touches the filesystem (filepath.WalkDir on - // every scan root). Compute the desired set outside the - // mutex so a slow walk does not block the run goroutine, + // collectDirs touches the filesystem (stat/ReadDir on every + // scan root and skill container). Compute the desired set + // outside the mutex so it does not block the run goroutine, // Close, or schedule. desired := w.collectDirs(roots) @@ -324,10 +311,13 @@ func (w *Watcher) schedule() { w.mu.Unlock() } -// collectDirs walks every scan root and returns the set of -// directories to watch. The maximum depth uses the watcher's -// configured maxDepth so it mirrors the resolver's horizon. -func (w *Watcher) collectDirs(roots []ScanRoot) map[string]struct{} { +// collectDirs returns the set of directories to watch. Discovery +// is fixed-location, mirroring the resolver: for each scan root we +// watch the root directory itself (catching top-level instruction +// and .mcp.json changes), plus every existing skill container and +// its immediate skill subdirectories (catching skill add/remove +// and SKILL.md writes). The watcher never recurses the tree. +func (*Watcher) collectDirs(roots []ScanRoot) map[string]struct{} { out := make(map[string]struct{}) for _, root := range roots { if root.Path == "" { @@ -346,25 +336,19 @@ func (w *Watcher) collectDirs(roots []ScanRoot) map[string]struct{} { out[filepath.Dir(root.Path)] = struct{}{} continue } - // Walk the directory and collect every descendant - // directory up to the depth cap. - rootDepth := strings.Count(filepath.Clean(root.Path), string(os.PathSeparator)) - _ = filepath.WalkDir(root.Path, func(path string, d fs.DirEntry, err error) error { + out[root.Path] = struct{}{} + for _, container := range skillContainersFor(root.Path) { + out[container] = struct{}{} + entries, err := os.ReadDir(container) if err != nil { - return nil - } - if !d.IsDir() { - return nil - } - if _, skip := skipDirNames[d.Name()]; skip && path != root.Path { - return fs.SkipDir + continue } - if strings.Count(path, string(os.PathSeparator))-rootDepth > w.maxDepth { - return fs.SkipDir + for _, e := range entries { + if e.IsDir() { + out[filepath.Join(container, e.Name())] = struct{}{} + } } - out[path] = struct{}{} - return nil - }) + } } return out }