diff --git a/plugins/codex/prompts/adversarial-review.md b/plugins/codex/prompts/adversarial-review.md index c8f8123f..78668af6 100644 --- a/plugins/codex/prompts/adversarial-review.md +++ b/plugins/codex/prompts/adversarial-review.md @@ -32,6 +32,7 @@ Actively try to disprove the change. Look for violated invariants, missing guards, unhandled failure paths, and assumptions that stop being true under stress. Trace how bad inputs, retries, concurrent actions, or partially completed operations move through the code. If the user supplied a focus area, weight it heavily, but still report any other material issue you can defend. +{{REVIEW_COLLECTION_GUIDANCE}} diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 201d1c7a..f005a8c3 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -241,6 +241,7 @@ function buildAdversarialReviewPrompt(context, focusText) { REVIEW_KIND: "Adversarial Review", TARGET_LABEL: context.target.label, USER_FOCUS: focusText || "No extra focus provided.", + REVIEW_COLLECTION_GUIDANCE: context.collectionGuidance, REVIEW_INPUT: context.content }); } diff --git a/plugins/codex/scripts/lib/git.mjs b/plugins/codex/scripts/lib/git.mjs index 6fca2139..1749cfc8 100644 --- a/plugins/codex/scripts/lib/git.mjs +++ b/plugins/codex/scripts/lib/git.mjs @@ -2,9 +2,11 @@ import fs from "node:fs"; import path from "node:path"; import { isProbablyText } from "./fs.mjs"; -import { runCommand, runCommandChecked } from "./process.mjs"; +import { formatCommandFailure, runCommand, runCommandChecked } from "./process.mjs"; const MAX_UNTRACKED_BYTES = 24 * 1024; +const DEFAULT_INLINE_DIFF_MAX_FILES = 2; +const DEFAULT_INLINE_DIFF_MAX_BYTES = 256 * 1024; function git(cwd, args, options = {}) { return runCommand("git", args, { cwd, ...options }); @@ -14,6 +16,64 @@ function gitChecked(cwd, args, options = {}) { return runCommandChecked("git", args, { cwd, ...options }); } +function listUniqueFiles(...groups) { + return [...new Set(groups.flat().filter(Boolean))].sort(); +} + +function normalizeMaxInlineFiles(value) { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed < 0) { + return DEFAULT_INLINE_DIFF_MAX_FILES; + } + return Math.floor(parsed); +} + +function normalizeMaxInlineDiffBytes(value) { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed < 0) { + return DEFAULT_INLINE_DIFF_MAX_BYTES; + } + return Math.floor(parsed); +} + +function measureGitOutputBytes(cwd, args, maxBytes) { + const result = git(cwd, args, { maxBuffer: maxBytes + 1 }); + if (result.error && /** @type {NodeJS.ErrnoException} */ (result.error).code === "ENOBUFS") { + return maxBytes + 1; + } + if (result.error) { + throw result.error; + } + if (result.status !== 0) { + throw new Error(formatCommandFailure(result)); + } + return Buffer.byteLength(result.stdout, "utf8"); +} + +function measureCombinedGitOutputBytes(cwd, argSets, maxBytes) { + let totalBytes = 0; + for (const args of argSets) { + const remainingBytes = maxBytes - totalBytes; + if (remainingBytes < 0) { + return maxBytes + 1; + } + totalBytes += measureGitOutputBytes(cwd, args, remainingBytes); + if (totalBytes > maxBytes) { + return totalBytes; + } + } + return totalBytes; +} + +function buildBranchComparison(cwd, baseRef) { + const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim(); + return { + mergeBase, + commitRange: `${mergeBase}..HEAD`, + reviewRange: `${baseRef}...HEAD` + }; +} + export function ensureGitRepository(cwd) { const result = git(cwd, ["rev-parse", "--show-toplevel"]); const errorCode = result.error && "code" in result.error ? result.error.code : null; @@ -161,55 +221,115 @@ function formatUntrackedFile(cwd, relativePath) { return [`### ${relativePath}`, "```", buffer.toString("utf8").trimEnd(), "```"].join("\n"); } -function collectWorkingTreeContext(cwd, state) { - const status = gitChecked(cwd, ["status", "--short"]).stdout.trim(); - const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; - const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; - const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n"); - - const parts = [ - formatSection("Git Status", status), - formatSection("Staged Diff", stagedDiff), - formatSection("Unstaged Diff", unstagedDiff), - formatSection("Untracked Files", untrackedBody) - ]; +function collectWorkingTreeContext(cwd, state, options = {}) { + const includeDiff = options.includeDiff !== false; + const status = gitChecked(cwd, ["status", "--short", "--untracked-files=all"]).stdout.trim(); + const changedFiles = listUniqueFiles(state.staged, state.unstaged, state.untracked); + + let parts; + if (includeDiff) { + const stagedDiff = gitChecked(cwd, ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; + const unstagedDiff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff"]).stdout; + const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n"); + parts = [ + formatSection("Git Status", status), + formatSection("Staged Diff", stagedDiff), + formatSection("Unstaged Diff", unstagedDiff), + formatSection("Untracked Files", untrackedBody) + ]; + } else { + const stagedStat = gitChecked(cwd, ["diff", "--shortstat", "--cached"]).stdout.trim(); + const unstagedStat = gitChecked(cwd, ["diff", "--shortstat"]).stdout.trim(); + const untrackedBody = state.untracked.map((file) => formatUntrackedFile(cwd, file)).join("\n\n"); + parts = [ + formatSection("Git Status", status), + formatSection("Staged Diff Stat", stagedStat), + formatSection("Unstaged Diff Stat", unstagedStat), + formatSection("Changed Files", changedFiles.join("\n")), + formatSection("Untracked Files", untrackedBody) + ]; + } return { mode: "working-tree", summary: `Reviewing ${state.staged.length} staged, ${state.unstaged.length} unstaged, and ${state.untracked.length} untracked file(s).`, - content: parts.join("\n") + content: parts.join("\n"), + changedFiles }; } -function collectBranchContext(cwd, baseRef) { - const mergeBase = gitChecked(cwd, ["merge-base", "HEAD", baseRef]).stdout.trim(); - const commitRange = `${mergeBase}..HEAD`; +function collectBranchContext(cwd, baseRef, options = {}) { + const includeDiff = options.includeDiff !== false; + const comparison = options.comparison ?? buildBranchComparison(cwd, baseRef); const currentBranch = getCurrentBranch(cwd); - const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", commitRange]).stdout.trim(); - const diffStat = gitChecked(cwd, ["diff", "--stat", commitRange]).stdout.trim(); - const diff = gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", commitRange]).stdout; + const changedFiles = gitChecked(cwd, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean); + const logOutput = gitChecked(cwd, ["log", "--oneline", "--decorate", comparison.commitRange]).stdout.trim(); + const diffStat = gitChecked(cwd, ["diff", "--stat", comparison.commitRange]).stdout.trim(); return { mode: "branch", - summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${mergeBase}.`, - content: [ - formatSection("Commit Log", logOutput), - formatSection("Diff Stat", diffStat), - formatSection("Branch Diff", diff) - ].join("\n") + summary: `Reviewing branch ${currentBranch} against ${baseRef} from merge-base ${comparison.mergeBase}.`, + content: includeDiff + ? [ + formatSection("Commit Log", logOutput), + formatSection("Diff Stat", diffStat), + formatSection( + "Branch Diff", + gitChecked(cwd, ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange]).stdout + ) + ].join("\n") + : [ + formatSection("Commit Log", logOutput), + formatSection("Diff Stat", diffStat), + formatSection("Changed Files", changedFiles.join("\n")) + ].join("\n"), + changedFiles, + comparison }; } -export function collectReviewContext(cwd, target) { +function buildAdversarialCollectionGuidance(options = {}) { + if (options.includeDiff !== false) { + return "Use the repository context below as primary evidence."; + } + + return "The repository context below is a lightweight summary. Inspect the target diff yourself with read-only git commands before finalizing findings."; +} + +export function collectReviewContext(cwd, target, options = {}) { const repoRoot = getRepoRoot(cwd); - const state = getWorkingTreeState(cwd); - const currentBranch = getCurrentBranch(cwd); + const currentBranch = getCurrentBranch(repoRoot); + const maxInlineFiles = normalizeMaxInlineFiles(options.maxInlineFiles); + const maxInlineDiffBytes = normalizeMaxInlineDiffBytes(options.maxInlineDiffBytes); let details; + let includeDiff; + let diffBytes; if (target.mode === "working-tree") { - details = collectWorkingTreeContext(repoRoot, state); + const state = getWorkingTreeState(repoRoot); + diffBytes = measureCombinedGitOutputBytes( + repoRoot, + [ + ["diff", "--cached", "--binary", "--no-ext-diff", "--submodule=diff"], + ["diff", "--binary", "--no-ext-diff", "--submodule=diff"] + ], + maxInlineDiffBytes + ); + includeDiff = + options.includeDiff ?? + (listUniqueFiles(state.staged, state.unstaged, state.untracked).length <= maxInlineFiles && + diffBytes <= maxInlineDiffBytes); + details = collectWorkingTreeContext(repoRoot, state, { includeDiff }); } else { - details = collectBranchContext(repoRoot, target.baseRef); + const comparison = buildBranchComparison(repoRoot, target.baseRef); + const fileCount = gitChecked(repoRoot, ["diff", "--name-only", comparison.commitRange]).stdout.trim().split("\n").filter(Boolean).length; + diffBytes = measureGitOutputBytes( + repoRoot, + ["diff", "--binary", "--no-ext-diff", "--submodule=diff", comparison.commitRange], + maxInlineDiffBytes + ); + includeDiff = options.includeDiff ?? (fileCount <= maxInlineFiles && diffBytes <= maxInlineDiffBytes); + details = collectBranchContext(repoRoot, target.baseRef, { includeDiff, comparison }); } return { @@ -217,6 +337,10 @@ export function collectReviewContext(cwd, target) { repoRoot, branch: currentBranch, target, + fileCount: details.changedFiles.length, + diffBytes, + inputMode: includeDiff ? "inline-diff" : "self-collect", + collectionGuidance: buildAdversarialCollectionGuidance({ includeDiff }), ...details }; } diff --git a/plugins/codex/scripts/lib/process.mjs b/plugins/codex/scripts/lib/process.mjs index 58cedf67..af28d1cf 100644 --- a/plugins/codex/scripts/lib/process.mjs +++ b/plugins/codex/scripts/lib/process.mjs @@ -7,6 +7,7 @@ export function runCommand(command, args = [], options = {}) { env: options.env, encoding: "utf8", input: options.input, + maxBuffer: options.maxBuffer, stdio: options.stdio ?? "pipe", shell: process.platform === "win32" ? (process.env.SHELL || true) : false, windowsHide: true diff --git a/tests/git.test.mjs b/tests/git.test.mjs index dd6a7a4f..14ff2576 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -69,6 +69,23 @@ test("resolveReviewTarget requires an explicit base when no default branch can b ); }); +test("collectReviewContext keeps inline diffs for tiny adversarial reviews", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('INLINE_MARKER');\n"); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "inline-diff"); + assert.equal(context.fileCount, 1); + assert.match(context.collectionGuidance, /primary evidence/i); + assert.match(context.content, /INLINE_MARKER/); +}); + test("collectReviewContext skips untracked directories in working tree review", () => { const cwd = makeTempDir(); initGitRepo(cwd); @@ -101,3 +118,66 @@ test("collectReviewContext skips broken untracked symlinks instead of crashing", assert.match(context.content, /### broken-link/); assert.match(context.content, /skipped: broken symlink or unreadable file/i); }); + +test("collectReviewContext falls back to lightweight context for larger adversarial reviews", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "a.js", "b.js", "c.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "SELF_COLLECT_MARKER_A";\n'); + fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "SELF_COLLECT_MARKER_B";\n'); + fs.writeFileSync(path.join(cwd, "c.js"), 'export const value = "SELF_COLLECT_MARKER_C";\n'); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "self-collect"); + assert.equal(context.fileCount, 3); + assert.match(context.collectionGuidance, /lightweight summary/i); + assert.match(context.collectionGuidance, /read-only git commands/i); + assert.doesNotMatch(context.content, /SELF_COLLECT_MARKER_[ABC]/); + assert.match(context.content, /## Changed Files/); +}); + +test("collectReviewContext falls back to lightweight context for oversized single-file diffs", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "export const value = 'v1';\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), `export const value = '${"x".repeat(512)}';\n`); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target, { maxInlineDiffBytes: 128 }); + + assert.equal(context.fileCount, 1); + assert.equal(context.inputMode, "self-collect"); + assert.ok(context.diffBytes > 128); + assert.doesNotMatch(context.content, /xxx/); + assert.match(context.content, /## Changed Files/); +}); + +test("collectReviewContext keeps untracked file content in lightweight working tree context", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + for (const name of ["a.js", "b.js"]) { + fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "a.js", "b.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "TRACKED_MARKER_A";\n'); + fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "TRACKED_MARKER_B";\n'); + fs.writeFileSync(path.join(cwd, "new-risk.js"), 'export const value = "UNTRACKED_RISK_MARKER";\n'); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "self-collect"); + assert.equal(context.fileCount, 3); + assert.doesNotMatch(context.content, /TRACKED_MARKER_[AB]/); + assert.match(context.content, /## Untracked Files/); + assert.match(context.content, /UNTRACKED_RISK_MARKER/); +}); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 6000c892..9a8a1c88 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -148,6 +148,33 @@ test("adversarial review accepts the same base-branch targeting as review", () = assert.match(result.stdout, /Missing empty-state guard/); }); +test("adversarial review asks Codex to inspect larger diffs itself", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "src/a.js", "src/b.js", "src/c.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n'); + fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n'); + fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n'); + + const result = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.match(state.lastTurnStart.prompt, /lightweight summary/i); + assert.match(state.lastTurnStart.prompt, /read-only git commands/i); + assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/); +}); + test("review includes reasoning output when the app server returns it", () => { const repo = makeTempDir(); const binDir = makeTempDir();