From 4e4b444c49adda4a86e334205f567e701bb29fc3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 21 Mar 2026 21:58:46 +0000 Subject: [PATCH 1/2] Add autoresearch setup for ENF performance optimization Three-file autoresearch framework targeting ExecutableNormalizedOperationFactory throughput: program.md (strategy), run_benchmark.sh (metric), autoresearch.sh (loop). https://claude.ai/code/session_01GfoPorZWo99NczxzJTYh9Q --- autoresearch/autoresearch.sh | 141 ++++++++++++++++++++++++++++++++++ autoresearch/program.md | 75 ++++++++++++++++++ autoresearch/run_benchmark.sh | 41 ++++++++++ 3 files changed, 257 insertions(+) create mode 100755 autoresearch/autoresearch.sh create mode 100644 autoresearch/program.md create mode 100755 autoresearch/run_benchmark.sh diff --git a/autoresearch/autoresearch.sh b/autoresearch/autoresearch.sh new file mode 100755 index 000000000..701ee3d8b --- /dev/null +++ b/autoresearch/autoresearch.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# Autoresearch loop driver for graphql-java ENF optimization. +# +# This script runs an autonomous optimization loop using Claude Code (Sonnet) +# to iteratively improve ENF performance. +# +# Usage: +# ./autoresearch/autoresearch.sh [max_iterations] +# +# Prerequisites: +# - Claude Code CLI installed and authenticated +# - Java toolchain (JDK 25) available for builds +# +# The loop: +# 1. Get baseline benchmark score +# 2. Ask Claude to make ONE optimization +# 3. Run tests + benchmark +# 4. Keep if improved, revert if not +# 5. Repeat + +set -euo pipefail + +MAX_ITERATIONS="${1:-50}" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +LOG_FILE="$SCRIPT_DIR/results.tsv" +BEST_SCORE_FILE="$SCRIPT_DIR/.best_score" + +cd "$PROJECT_DIR" + +# Initialize log +if [ ! -f "$LOG_FILE" ]; then + echo -e "iteration\tcommit\tscore\tdelta\tstatus\tdescription" > "$LOG_FILE" +fi + +# Get baseline score +echo "=== Getting baseline score ===" +BASELINE=$(bash "$SCRIPT_DIR/run_benchmark.sh") +if [ "$BASELINE" = "FAILED" ]; then + echo "ERROR: Baseline benchmark failed. Fix issues before starting autoresearch." + exit 1 +fi +echo "Baseline: $BASELINE ops/s" +echo "$BASELINE" > "$BEST_SCORE_FILE" + +BEST_SCORE="$BASELINE" +COMMIT_BEFORE=$(git rev-parse HEAD) + +for i in $(seq 1 "$MAX_ITERATIONS"); do + echo "" + echo "========================================" + echo "=== Iteration $i / $MAX_ITERATIONS ===" + echo "=== Best score: $BEST_SCORE ops/s ===" + echo "========================================" + + # Save current state + COMMIT_BEFORE=$(git rev-parse HEAD) + + # Ask Claude (Sonnet) to make ONE optimization + # Using --print to run non-interactively + claude --model sonnet -p "$(cat </dev/null || echo "No previous iterations") + +YOUR TASK: Make exactly ONE focused optimization to the ENF code. +- Pick the most promising unused strategy from program.md +- Make a minimal, targeted change +- Do NOT run tests or benchmarks (the harness does that) +- Describe what you changed and why in a single line + +IMPORTANT: Only modify files under src/main/java/graphql/normalized/ or the utility +files mentioned in program.md. Make the change now. +EOF +)" + + # Check if anything changed + if git diff --quiet src/main/java/; then + echo "No changes made in iteration $i, skipping" + echo -e "$i\t-\t-\t-\tskipped\tno changes" >> "$LOG_FILE" + continue + fi + + # Run tests + echo "--- Running tests ---" + if ! ./gradlew test -q 2>&1 | tail -5; then + echo "Tests FAILED — reverting" + git checkout -- src/ + echo -e "$i\t-\t-\t-\treverted\ttests failed" >> "$LOG_FILE" + continue + fi + + # Run benchmark + echo "--- Running benchmark ---" + SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh") + if [ "$SCORE" = "FAILED" ]; then + echo "Benchmark FAILED — reverting" + git checkout -- src/ + echo -e "$i\t-\t-\t-\treverted\tbenchmark failed" >> "$LOG_FILE" + continue + fi + + # Compare (using awk for floating point) + IMPROVED=$(echo "$SCORE $BEST_SCORE" | awk '{print ($1 > $2) ? "yes" : "no"}') + DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}') + + if [ "$IMPROVED" = "yes" ]; then + echo "IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA)" + BEST_SCORE="$SCORE" + echo "$BEST_SCORE" > "$BEST_SCORE_FILE" + + # Get a description of the change + DESCRIPTION=$(git diff --stat src/main/java/ | head -1) + + # Commit the improvement + git add src/main/java/ + git commit -m "autoresearch: iteration $i — $DESCRIPTION [+$DELTA ops/s]" + + COMMIT=$(git rev-parse --short HEAD) + echo -e "$i\t$COMMIT\t$SCORE\t+$DELTA\tkept\t$DESCRIPTION" >> "$LOG_FILE" + else + echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting" + git checkout -- src/ + echo -e "$i\t-\t$SCORE\t$DELTA\treverted\tno improvement" >> "$LOG_FILE" + fi +done + +echo "" +echo "========================================" +echo "=== Autoresearch complete ===" +echo "=== Baseline: $BASELINE ops/s ===" +echo "=== Final best: $BEST_SCORE ops/s ===" +echo "=== Total improvement: $(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}') ops/s ===" +echo "========================================" +echo "" +echo "Results log: $LOG_FILE" diff --git a/autoresearch/program.md b/autoresearch/program.md new file mode 100644 index 000000000..9093cee29 --- /dev/null +++ b/autoresearch/program.md @@ -0,0 +1,75 @@ +# Autoresearch: Optimize ExecutableNormalizedOperationFactory Performance + +## Goal + +Improve the throughput (ops/sec) of `ENF1Performance.benchMarkThroughput` by making +targeted optimizations to the ENF creation pipeline. Every improvement must pass the +full test suite. + +## Metric + +- **Primary**: `ENF1Performance.benchMarkThroughput` — higher is better (ops/sec) +- Run with: `./gradlew jmhRun -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" -PjmhFork=1 -PjmhIterations=3 -PjmhWarmupIterations=2` +- A run takes ~2-3 minutes. Parse the score from JMH's output line containing `benchMarkThroughput`. + +## Scope — Files You May Modify + +Only modify files under `src/main/java/graphql/normalized/`: + +- `ExecutableNormalizedOperationFactory.java` (959 lines) — the main target +- `ENFMerger.java` (197 lines) — post-processing merge step +- `ExecutableNormalizedField.java` (700 lines) — the field data class +- `ExecutableNormalizedOperation.java` (199 lines) — the result container +- Supporting: `ArgumentMaker.java`, `NormalizedInputValue.java`, etc. + +Also consider utility classes these depend on: +- `graphql/collect/ImmutableKit.java` +- `graphql/util/FpKit.java` + +**Do NOT modify**: test files, benchmark files, schema files, build files. + +## Constraints + +1. **All tests must pass**: Run `./gradlew test` before benchmarking. If tests fail, revert. +2. **No new dependencies**: This is a firm project policy. +3. **No wildcard imports, no inner classes, no Optional**: Project coding standards. +4. **Preserve public API**: All `@PublicApi` method signatures must remain unchanged. +5. **Thread safety**: The factory is called concurrently. Don't introduce shared mutable state. +6. **Use `graphql.Assert`** not `Objects.requireNonNull`. + +## Optimization Strategies to Explore (ordered by expected impact) + +### High Impact +1. **Reduce object allocation in hot loops**: `buildEnfsRecursively()` and `collectFromSelectionSet()` create many intermediate collections (ArrayList, LinkedHashSet, LinkedHashMap). Consider pre-sizing or reusing. +2. **Avoid unnecessary Set/Map copies**: `groupByCommonParents()` creates grouped collections that could be more efficient. +3. **Replace stream operations with loops**: In hot paths, `.stream().collect()` has overhead from lambda allocation and iterator creation. Simple for-loops are faster. +4. **ImmutableListMultimap.Builder overhead**: The builders accumulate entries one-by-one. Consider whether bulk operations are possible. + +### Medium Impact +5. **Cache type lookups**: `Introspection.getFieldDef()` and `schema.getImplementations()` are called repeatedly for the same types. A local cache per factory invocation could help. +6. **Optimize ENFMerger**: The merge step does O(n) scans. Consider whether merge candidates can be identified during collection rather than post-processing. +7. **Lazy QueryDirectives creation**: Only create `QueryDirectivesImpl` when directives are actually present on a field. +8. **Reduce LinkedHashSet usage**: Where insertion order doesn't matter, plain HashSet is faster. + +### Lower Impact (but easy wins) +9. **Pre-size collections**: When the approximate size is known (e.g., number of selections), pre-size ArrayList/HashMap. +10. **Avoid unnecessary wrapping**: e.g., `Collections.singleton()` vs direct iteration. +11. **StringBuilder for string concatenation** in any hot-path string building. + +## How to Iterate + +1. Pick ONE strategy from above (start with #1) +2. Make a focused, minimal change +3. Run `./gradlew test` — if it fails, revert immediately +4. Run the benchmark — compare to previous best +5. If improved: commit with message "autoresearch: [+X.XX ops/s]" +6. If not improved: revert with `git checkout -- src/` +7. Move to next strategy + +## Important Notes + +- The factory creates a **new instance per call** (no shared state between invocations), so per-invocation caching is safe. +- `ExecutableNormalizedField` is intentionally `@Mutable` — the factory builds it up incrementally. +- The `ImmutableListMultimap` and `ImmutableMap` builders are finalized only at the end in the factory's constructor. +- Guava is an existing dependency — you can use Guava utilities but nothing else new. +- The `CollectedField`, `CollectedFieldGroup`, and `PossibleMerger` inner records are allocation-heavy — they're created per-field during traversal. diff --git a/autoresearch/run_benchmark.sh b/autoresearch/run_benchmark.sh new file mode 100755 index 000000000..4bd85f5d8 --- /dev/null +++ b/autoresearch/run_benchmark.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Runs the ENF1 throughput benchmark and extracts the score. +# Usage: ./autoresearch/run_benchmark.sh +# Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +cd "$PROJECT_DIR" + +echo "=== Running tests first ===" >&2 +if ! ./gradlew test -q 2>&1 | tail -5 >&2; then + echo "FAILED: tests did not pass" >&2 + echo "FAILED" + exit 1 +fi + +echo "=== Running ENF1 throughput benchmark ===" >&2 +BENCHMARK_OUTPUT=$(./gradlew jmhRun \ + -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" \ + -PjmhFork=1 \ + -PjmhIterations=3 \ + -PjmhWarmupIterations=2 \ + 2>&1) + +# Extract score from JMH output line like: +# ENF1Performance.benchMarkThroughput thrpt 3 XX.XXX ± Y.YYY ops/s +SCORE=$(echo "$BENCHMARK_OUTPUT" | grep -E "benchMarkThroughput\s+thrpt" | awk '{print $(NF-3)}') + +if [ -z "$SCORE" ]; then + echo "FAILED: could not extract benchmark score" >&2 + echo "Last 20 lines of output:" >&2 + echo "$BENCHMARK_OUTPUT" | tail -20 >&2 + echo "FAILED" + exit 1 +fi + +echo "Score: $SCORE ops/s" >&2 +echo "$SCORE" From 00f96f1c7bb17fc5218859cba9263b9c5fa1304b Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 21 Mar 2026 22:05:21 +0000 Subject: [PATCH 2/2] Fix autoresearch scripts for local Claude Code CLI usage - Use `claude --dangerously-skip-permissions --max-turns 20` for unattended operation - Separate test run from benchmark run (avoid running tests twice) - Add CLI availability check - Improve logging with printf instead of echo -e - Show percentage improvement in final summary https://claude.ai/code/session_01GfoPorZWo99NczxzJTYh9Q --- autoresearch/autoresearch.sh | 114 ++++++++++++++++++++++------------ autoresearch/run_benchmark.sh | 10 +-- 2 files changed, 79 insertions(+), 45 deletions(-) diff --git a/autoresearch/autoresearch.sh b/autoresearch/autoresearch.sh index 701ee3d8b..631084066 100755 --- a/autoresearch/autoresearch.sh +++ b/autoresearch/autoresearch.sh @@ -8,12 +8,20 @@ # ./autoresearch/autoresearch.sh [max_iterations] # # Prerequisites: -# - Claude Code CLI installed and authenticated +# - Claude Code CLI installed and authenticated (`claude` on PATH) # - Java toolchain (JDK 25) available for builds +# - Run from the graphql-java project root +# +# Permissions: +# The script uses `claude --dangerously-skip-permissions` so the agent can +# edit files without interactive approval prompts. This is safe here because: +# - The agent is scoped to src/main/java/ edits only (via prompt) +# - Tests gate every change (bad edits get reverted) +# - Git tracks everything # # The loop: # 1. Get baseline benchmark score -# 2. Ask Claude to make ONE optimization +# 2. Ask Claude (Sonnet) to make ONE optimization # 3. Run tests + benchmark # 4. Keep if improved, revert if not # 5. Repeat @@ -28,9 +36,15 @@ BEST_SCORE_FILE="$SCRIPT_DIR/.best_score" cd "$PROJECT_DIR" +# Verify claude CLI is available +if ! command -v claude &>/dev/null; then + echo "ERROR: 'claude' CLI not found on PATH. Install Claude Code first." + exit 1 +fi + # Initialize log if [ ! -f "$LOG_FILE" ]; then - echo -e "iteration\tcommit\tscore\tdelta\tstatus\tdescription" > "$LOG_FILE" + printf "iteration\tcommit\tscore\tdelta\tstatus\tdescription\n" > "$LOG_FILE" fi # Get baseline score @@ -44,7 +58,6 @@ echo "Baseline: $BASELINE ops/s" echo "$BASELINE" > "$BEST_SCORE_FILE" BEST_SCORE="$BASELINE" -COMMIT_BEFORE=$(git rev-parse HEAD) for i in $(seq 1 "$MAX_ITERATIONS"); do echo "" @@ -53,45 +66,63 @@ for i in $(seq 1 "$MAX_ITERATIONS"); do echo "=== Best score: $BEST_SCORE ops/s ===" echo "========================================" - # Save current state - COMMIT_BEFORE=$(git rev-parse HEAD) + # Build the prompt for this iteration + RECENT_LOG=$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations") - # Ask Claude (Sonnet) to make ONE optimization - # Using --print to run non-interactively - claude --model sonnet -p "$(cat </dev/null || echo "No previous iterations") +Previous optimization log (last 10 entries): +$RECENT_LOG YOUR TASK: Make exactly ONE focused optimization to the ENF code. -- Pick the most promising unused strategy from program.md -- Make a minimal, targeted change -- Do NOT run tests or benchmarks (the harness does that) -- Describe what you changed and why in a single line - -IMPORTANT: Only modify files under src/main/java/graphql/normalized/ or the utility -files mentioned in program.md. Make the change now. -EOF -)" +- Read the code files first, then pick the most promising strategy from program.md + that has NOT already been tried (check the log above) +- Make a minimal, targeted change to ONE or TWO files +- Do NOT run tests or benchmarks — the outer harness handles that +- Do NOT commit — the outer harness handles that +- After editing, output a single-line summary of what you changed and why + +SCOPE: Only modify files under src/main/java/graphql/normalized/ or the utility +files listed in program.md (ImmutableKit.java, FpKit.java). + +Make the change now." + + # Run Claude in non-interactive mode with file editing capability + # --dangerously-skip-permissions: allows edits without prompts (safe: tests gate everything) + # --model sonnet: fast iterations + # --max-turns 20: enough to read files + make edits, but bounded + echo "--- Asking Claude to make an optimization ---" + CLAUDE_OUTPUT=$(claude \ + --model sonnet \ + --dangerously-skip-permissions \ + --max-turns 20 \ + --verbose \ + -p "$PROMPT" \ + 2>&1) || true + + echo "$CLAUDE_OUTPUT" | tail -5 # Check if anything changed if git diff --quiet src/main/java/; then - echo "No changes made in iteration $i, skipping" - echo -e "$i\t-\t-\t-\tskipped\tno changes" >> "$LOG_FILE" + echo "No source changes in iteration $i, skipping" + printf "%s\t-\t-\t-\tskipped\tno changes\n" "$i" >> "$LOG_FILE" continue fi - # Run tests + # Show what changed + echo "--- Changes made ---" + git diff --stat src/main/java/ + + # Run tests (skip benchmarks in run_benchmark.sh — run tests separately for speed) echo "--- Running tests ---" - if ! ./gradlew test -q 2>&1 | tail -5; then - echo "Tests FAILED — reverting" + if ! ./gradlew test -q 2>&1 | tail -10; then + echo "Tests FAILED — reverting changes" git checkout -- src/ - echo -e "$i\t-\t-\t-\treverted\ttests failed" >> "$LOG_FILE" + printf "%s\t-\t-\t-\treverted\ttests failed\n" "$i" >> "$LOG_FILE" continue fi @@ -99,9 +130,9 @@ EOF echo "--- Running benchmark ---" SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh") if [ "$SCORE" = "FAILED" ]; then - echo "Benchmark FAILED — reverting" + echo "Benchmark FAILED — reverting changes" git checkout -- src/ - echo -e "$i\t-\t-\t-\treverted\tbenchmark failed" >> "$LOG_FILE" + printf "%s\t-\t-\t-\treverted\tbenchmark failed\n" "$i" >> "$LOG_FILE" continue fi @@ -110,32 +141,39 @@ EOF DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}') if [ "$IMPROVED" = "yes" ]; then - echo "IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA)" + echo "" + echo "*** IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA) ***" + echo "" BEST_SCORE="$SCORE" echo "$BEST_SCORE" > "$BEST_SCORE_FILE" - # Get a description of the change - DESCRIPTION=$(git diff --stat src/main/java/ | head -1) + # Get a description of the change from git diff + DESCRIPTION=$(git diff --stat src/main/java/ | tail -1 | xargs) # Commit the improvement git add src/main/java/ - git commit -m "autoresearch: iteration $i — $DESCRIPTION [+$DELTA ops/s]" + git commit -m "autoresearch: iteration $i [+$DELTA ops/s] + +$(git diff --cached --stat | head -5)" COMMIT=$(git rev-parse --short HEAD) - echo -e "$i\t$COMMIT\t$SCORE\t+$DELTA\tkept\t$DESCRIPTION" >> "$LOG_FILE" + printf "%s\t%s\t%s\t+%s\tkept\t%s\n" "$i" "$COMMIT" "$SCORE" "$DELTA" "$DESCRIPTION" >> "$LOG_FILE" else echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting" git checkout -- src/ - echo -e "$i\t-\t$SCORE\t$DELTA\treverted\tno improvement" >> "$LOG_FILE" + printf "%s\t-\t%s\t%s\treverted\tno improvement\n" "$i" "$SCORE" "$DELTA" >> "$LOG_FILE" fi done echo "" echo "========================================" echo "=== Autoresearch complete ===" -echo "=== Baseline: $BASELINE ops/s ===" -echo "=== Final best: $BEST_SCORE ops/s ===" -echo "=== Total improvement: $(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}') ops/s ===" +echo "=== Baseline: $BASELINE ops/s ===" +echo "=== Final best: $BEST_SCORE ops/s ===" +TOTAL_DELTA=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}') +TOTAL_PCT=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.1f", (($1 - $2) / $2) * 100}') +echo "=== Improvement: +$TOTAL_DELTA ops/s ($TOTAL_PCT%) ===" echo "========================================" echo "" echo "Results log: $LOG_FILE" +echo "Review kept commits: git log --oneline --grep='autoresearch'" diff --git a/autoresearch/run_benchmark.sh b/autoresearch/run_benchmark.sh index 4bd85f5d8..f188a1160 100755 --- a/autoresearch/run_benchmark.sh +++ b/autoresearch/run_benchmark.sh @@ -2,6 +2,9 @@ # Runs the ENF1 throughput benchmark and extracts the score. # Usage: ./autoresearch/run_benchmark.sh # Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error. +# +# Note: This script only runs the benchmark, NOT the tests. +# The autoresearch.sh loop runs tests separately before calling this. set -euo pipefail @@ -10,13 +13,6 @@ PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" cd "$PROJECT_DIR" -echo "=== Running tests first ===" >&2 -if ! ./gradlew test -q 2>&1 | tail -5 >&2; then - echo "FAILED: tests did not pass" >&2 - echo "FAILED" - exit 1 -fi - echo "=== Running ENF1 throughput benchmark ===" >&2 BENCHMARK_OUTPUT=$(./gradlew jmhRun \ -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" \