graphql-java · dondonz · Mar 21, 2026 · Mar 21, 2026 · dondonz · Mar 22, 2026
diff --git a/autoresearch/autoresearch.sh b/autoresearch/autoresearch.sh
@@ -0,0 +1,179 @@
+#!/usr/bin/env bash
+# Autoresearch loop driver for graphql-java ENF optimization.
+#
+# This script runs an autonomous optimization loop using Claude Code (Sonnet)
+# to iteratively improve ENF performance.
+#
+# Usage:
+#   ./autoresearch/autoresearch.sh [max_iterations]
+#
+# Prerequisites:
+#   - Claude Code CLI installed and authenticated (`claude` on PATH)
+#   - Java toolchain (JDK 25) available for builds
+#   - Run from the graphql-java project root
+#
+# Permissions:
+#   The script uses `claude --dangerously-skip-permissions` so the agent can
+#   edit files without interactive approval prompts. This is safe here because:
+#   - The agent is scoped to src/main/java/ edits only (via prompt)
+#   - Tests gate every change (bad edits get reverted)
+#   - Git tracks everything
+#
+# The loop:
+#   1. Get baseline benchmark score
+#   2. Ask Claude (Sonnet) to make ONE optimization
+#   3. Run tests + benchmark
+#   4. Keep if improved, revert if not
+#   5. Repeat
+
+set -euo pipefail
+
+MAX_ITERATIONS="${1:-50}"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+LOG_FILE="$SCRIPT_DIR/results.tsv"
+BEST_SCORE_FILE="$SCRIPT_DIR/.best_score"
+
+cd "$PROJECT_DIR"
+
+# Verify claude CLI is available
+if ! command -v claude &>/dev/null; then
+    echo "ERROR: 'claude' CLI not found on PATH. Install Claude Code first."
+    exit 1
+fi
+
+# Initialize log
+if [ ! -f "$LOG_FILE" ]; then
+    printf "iteration\tcommit\tscore\tdelta\tstatus\tdescription\n" > "$LOG_FILE"
+fi
+
+# Get baseline score
+echo "=== Getting baseline score ==="
+BASELINE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
+if [ "$BASELINE" = "FAILED" ]; then
+    echo "ERROR: Baseline benchmark failed. Fix issues before starting autoresearch."
+    exit 1
+fi
+echo "Baseline: $BASELINE ops/s"
+echo "$BASELINE" > "$BEST_SCORE_FILE"
+
+BEST_SCORE="$BASELINE"
+
+for i in $(seq 1 "$MAX_ITERATIONS"); do
+    echo ""
+    echo "========================================"
+    echo "=== Iteration $i / $MAX_ITERATIONS ==="
+    echo "=== Best score: $BEST_SCORE ops/s ==="
+    echo "========================================"
+
+    # Build the prompt for this iteration
+    RECENT_LOG=$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations")
+
+    PROMPT="You are running iteration $i of an autoresearch optimization loop for graphql-java.
+
+Read autoresearch/program.md for full context and strategy.
+
+Current best benchmark score: $BEST_SCORE ops/s (baseline was: $BASELINE ops/s)
+
+Previous optimization log (last 10 entries):
+$RECENT_LOG
+
+YOUR TASK: Make exactly ONE focused optimization to the ENF code.
+- Read the code files first, then pick the most promising strategy from program.md
+  that has NOT already been tried (check the log above)
+- Make a minimal, targeted change to ONE or TWO files
+- Do NOT run tests or benchmarks — the outer harness handles that
+- Do NOT commit — the outer harness handles that
+- After editing, output a single-line summary of what you changed and why
+
+SCOPE: Only modify files under src/main/java/graphql/normalized/ or the utility
+files listed in program.md (ImmutableKit.java, FpKit.java).
+
+Make the change now."
+
+    # Run Claude in non-interactive mode with file editing capability
+    # --dangerously-skip-permissions: allows edits without prompts (safe: tests gate everything)
+    # --model sonnet: fast iterations
+    # --max-turns 20: enough to read files + make edits, but bounded
+    echo "--- Asking Claude to make an optimization ---"
+    CLAUDE_OUTPUT=$(claude \
+        --model sonnet \
+        --dangerously-skip-permissions \
+        --max-turns 20 \
+        --verbose \
+        -p "$PROMPT" \
+        2>&1) || true
+
+    echo "$CLAUDE_OUTPUT" | tail -5
+
+    # Check if anything changed
+    if git diff --quiet src/main/java/; then
+        echo "No source changes in iteration $i, skipping"
+        printf "%s\t-\t-\t-\tskipped\tno changes\n" "$i" >> "$LOG_FILE"
+        continue
+    fi
+
+    # Show what changed
+    echo "--- Changes made ---"
+    git diff --stat src/main/java/
+
+    # Run tests (skip benchmarks in run_benchmark.sh — run tests separately for speed)
+    echo "--- Running tests ---"
+    if ! ./gradlew test -q 2>&1 | tail -10; then
+        echo "Tests FAILED — reverting changes"
+        git checkout -- src/
+        printf "%s\t-\t-\t-\treverted\ttests failed\n" "$i" >> "$LOG_FILE"
+        continue
+    fi
+
+    # Run benchmark
+    echo "--- Running benchmark ---"
+    SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
+    if [ "$SCORE" = "FAILED" ]; then
+        echo "Benchmark FAILED — reverting changes"
+        git checkout -- src/
+        printf "%s\t-\t-\t-\treverted\tbenchmark failed\n" "$i" >> "$LOG_FILE"
+        continue
+    fi
+
+    # Compare (using awk for floating point)
+    IMPROVED=$(echo "$SCORE $BEST_SCORE" | awk '{print ($1 > $2) ? "yes" : "no"}')
+    DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}')
+
+    if [ "$IMPROVED" = "yes" ]; then
+        echo ""
+        echo "*** IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA) ***"
+        echo ""
+        BEST_SCORE="$SCORE"
+        echo "$BEST_SCORE" > "$BEST_SCORE_FILE"
+
+        # Get a description of the change from git diff
+        DESCRIPTION=$(git diff --stat src/main/java/ | tail -1 | xargs)
+
+        # Commit the improvement
+        git add src/main/java/
+        git commit -m "autoresearch: iteration $i [+$DELTA ops/s]
+
+$(git diff --cached --stat | head -5)"
+
+        COMMIT=$(git rev-parse --short HEAD)
+        printf "%s\t%s\t%s\t+%s\tkept\t%s\n" "$i" "$COMMIT" "$SCORE" "$DELTA" "$DESCRIPTION" >> "$LOG_FILE"
+    else
+        echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting"
+        git checkout -- src/
+        printf "%s\t-\t%s\t%s\treverted\tno improvement\n" "$i" "$SCORE" "$DELTA" >> "$LOG_FILE"
+    fi
+done
+
+echo ""
+echo "========================================"
+echo "=== Autoresearch complete ==="
+echo "=== Baseline:    $BASELINE ops/s ==="
+echo "=== Final best:  $BEST_SCORE ops/s ==="
+TOTAL_DELTA=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}')
+TOTAL_PCT=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.1f", (($1 - $2) / $2) * 100}')
+echo "=== Improvement: +$TOTAL_DELTA ops/s ($TOTAL_PCT%) ==="
+echo "========================================"
+echo ""
+echo "Results log: $LOG_FILE"
+echo "Review kept commits: git log --oneline --grep='autoresearch'"
diff --git a/autoresearch/program.md b/autoresearch/program.md
@@ -0,0 +1,75 @@
+# Autoresearch: Optimize ExecutableNormalizedOperationFactory Performance
+
+## Goal
+
+Improve the throughput (ops/sec) of `ENF1Performance.benchMarkThroughput` by making
+targeted optimizations to the ENF creation pipeline. Every improvement must pass the
+full test suite.
+
+## Metric
+
+- **Primary**: `ENF1Performance.benchMarkThroughput` — higher is better (ops/sec)
+- Run with: `./gradlew jmhRun -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" -PjmhFork=1 -PjmhIterations=3 -PjmhWarmupIterations=2`
+- A run takes ~2-3 minutes. Parse the score from JMH's output line containing `benchMarkThroughput`.
+
+## Scope — Files You May Modify
+
+Only modify files under `src/main/java/graphql/normalized/`:
+
+- `ExecutableNormalizedOperationFactory.java` (959 lines) — the main target
+- `ENFMerger.java` (197 lines) — post-processing merge step
+- `ExecutableNormalizedField.java` (700 lines) — the field data class
+- `ExecutableNormalizedOperation.java` (199 lines) — the result container
+- Supporting: `ArgumentMaker.java`, `NormalizedInputValue.java`, etc.
+
+Also consider utility classes these depend on:
+- `graphql/collect/ImmutableKit.java`
+- `graphql/util/FpKit.java`
+
+**Do NOT modify**: test files, benchmark files, schema files, build files.
+
+## Constraints
+
+1. **All tests must pass**: Run `./gradlew test` before benchmarking. If tests fail, revert.
+2. **No new dependencies**: This is a firm project policy.
+3. **No wildcard imports, no inner classes, no Optional**: Project coding standards.
+4. **Preserve public API**: All `@PublicApi` method signatures must remain unchanged.
+5. **Thread safety**: The factory is called concurrently. Don't introduce shared mutable state.
+6. **Use `graphql.Assert`** not `Objects.requireNonNull`.
+
+## Optimization Strategies to Explore (ordered by expected impact)
+
+### High Impact
+1. **Reduce object allocation in hot loops**: `buildEnfsRecursively()` and `collectFromSelectionSet()` create many intermediate collections (ArrayList, LinkedHashSet, LinkedHashMap). Consider pre-sizing or reusing.
+2. **Avoid unnecessary Set/Map copies**: `groupByCommonParents()` creates grouped collections that could be more efficient.
+3. **Replace stream operations with loops**: In hot paths, `.stream().collect()` has overhead from lambda allocation and iterator creation. Simple for-loops are faster.
+4. **ImmutableListMultimap.Builder overhead**: The builders accumulate entries one-by-one. Consider whether bulk operations are possible.
+
+### Medium Impact
+5. **Cache type lookups**: `Introspection.getFieldDef()` and `schema.getImplementations()` are called repeatedly for the same types. A local cache per factory invocation could help.
+6. **Optimize ENFMerger**: The merge step does O(n) scans. Consider whether merge candidates can be identified during collection rather than post-processing.
+7. **Lazy QueryDirectives creation**: Only create `QueryDirectivesImpl` when directives are actually present on a field.
+8. **Reduce LinkedHashSet usage**: Where insertion order doesn't matter, plain HashSet is faster.
+
+### Lower Impact (but easy wins)
+9. **Pre-size collections**: When the approximate size is known (e.g., number of selections), pre-size ArrayList/HashMap.
+10. **Avoid unnecessary wrapping**: e.g., `Collections.singleton()` vs direct iteration.
+11. **StringBuilder for string concatenation** in any hot-path string building.
+
+## How to Iterate
+
+1. Pick ONE strategy from above (start with #1)
+2. Make a focused, minimal change
+3. Run `./gradlew test` — if it fails, revert immediately
+4. Run the benchmark — compare to previous best
+5. If improved: commit with message "autoresearch: <description> [+X.XX ops/s]"
+6. If not improved: revert with `git checkout -- src/`
+7. Move to next strategy
+
+## Important Notes
+
+- The factory creates a **new instance per call** (no shared state between invocations), so per-invocation caching is safe.
+- `ExecutableNormalizedField` is intentionally `@Mutable` — the factory builds it up incrementally.
+- The `ImmutableListMultimap` and `ImmutableMap` builders are finalized only at the end in the factory's constructor.
+- Guava is an existing dependency — you can use Guava utilities but nothing else new.
+- The `CollectedField`, `CollectedFieldGroup`, and `PossibleMerger` inner records are allocation-heavy — they're created per-field during traversal.
diff --git a/autoresearch/run_benchmark.sh b/autoresearch/run_benchmark.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Runs the ENF1 throughput benchmark and extracts the score.
+# Usage: ./autoresearch/run_benchmark.sh
+# Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error.
+#
+# Note: This script only runs the benchmark, NOT the tests.
+# The autoresearch.sh loop runs tests separately before calling this.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+cd "$PROJECT_DIR"
+
+echo "=== Running ENF1 throughput benchmark ===" >&2
+BENCHMARK_OUTPUT=$(./gradlew jmhRun \
+    -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" \
+    -PjmhFork=1 \
+    -PjmhIterations=3 \
+    -PjmhWarmupIterations=2 \
+    2>&1)
+
+# Extract score from JMH output line like:
+# ENF1Performance.benchMarkThroughput  thrpt    3  XX.XXX ± Y.YYY  ops/s
+SCORE=$(echo "$BENCHMARK_OUTPUT" | grep -E "benchMarkThroughput\s+thrpt" | awk '{print $(NF-3)}')
+
+if [ -z "$SCORE" ]; then
+    echo "FAILED: could not extract benchmark score" >&2
+    echo "Last 20 lines of output:" >&2
+    echo "$BENCHMARK_OUTPUT" | tail -20 >&2
+    echo "FAILED"
+    exit 1
+fi
+
+echo "Score: $SCORE ops/s" >&2
+echo "$SCORE"