Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions autoresearch/autoresearch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env bash
# Autoresearch loop driver for graphql-java ENF optimization.
#
# This script runs an autonomous optimization loop using Claude Code (Sonnet)
# to iteratively improve ENF performance.
#
# Usage:
# ./autoresearch/autoresearch.sh [max_iterations]
#
# Prerequisites:
# - Claude Code CLI installed and authenticated (`claude` on PATH)
# - Java toolchain (JDK 25) available for builds
# - Run from the graphql-java project root
#
# Permissions:
# The script uses `claude --dangerously-skip-permissions` so the agent can
# edit files without interactive approval prompts. This is safe here because:
# - The agent is scoped to src/main/java/ edits only (via prompt)
# - Tests gate every change (bad edits get reverted)
# - Git tracks everything
#
# The loop:
# 1. Get baseline benchmark score
# 2. Ask Claude (Sonnet) to make ONE optimization
# 3. Run tests + benchmark
# 4. Keep if improved, revert if not
# 5. Repeat

set -euo pipefail

MAX_ITERATIONS="${1:-50}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
LOG_FILE="$SCRIPT_DIR/results.tsv"
BEST_SCORE_FILE="$SCRIPT_DIR/.best_score"

cd "$PROJECT_DIR"

# Verify claude CLI is available
if ! command -v claude &>/dev/null; then
echo "ERROR: 'claude' CLI not found on PATH. Install Claude Code first."
exit 1
fi

# Initialize log
if [ ! -f "$LOG_FILE" ]; then
printf "iteration\tcommit\tscore\tdelta\tstatus\tdescription\n" > "$LOG_FILE"
fi

# Get baseline score
echo "=== Getting baseline score ==="
BASELINE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
if [ "$BASELINE" = "FAILED" ]; then
echo "ERROR: Baseline benchmark failed. Fix issues before starting autoresearch."
exit 1
fi
echo "Baseline: $BASELINE ops/s"
echo "$BASELINE" > "$BEST_SCORE_FILE"

BEST_SCORE="$BASELINE"

for i in $(seq 1 "$MAX_ITERATIONS"); do
echo ""
echo "========================================"
echo "=== Iteration $i / $MAX_ITERATIONS ==="
echo "=== Best score: $BEST_SCORE ops/s ==="
echo "========================================"

# Build the prompt for this iteration
RECENT_LOG=$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations")

PROMPT="You are running iteration $i of an autoresearch optimization loop for graphql-java.

Read autoresearch/program.md for full context and strategy.

Current best benchmark score: $BEST_SCORE ops/s (baseline was: $BASELINE ops/s)

Previous optimization log (last 10 entries):
$RECENT_LOG

YOUR TASK: Make exactly ONE focused optimization to the ENF code.
- Read the code files first, then pick the most promising strategy from program.md
that has NOT already been tried (check the log above)
- Make a minimal, targeted change to ONE or TWO files
- Do NOT run tests or benchmarks — the outer harness handles that
- Do NOT commit — the outer harness handles that
- After editing, output a single-line summary of what you changed and why

SCOPE: Only modify files under src/main/java/graphql/normalized/ or the utility
files listed in program.md (ImmutableKit.java, FpKit.java).

Make the change now."

# Run Claude in non-interactive mode with file editing capability
# --dangerously-skip-permissions: allows edits without prompts (safe: tests gate everything)
# --model sonnet: fast iterations
# --max-turns 20: enough to read files + make edits, but bounded
echo "--- Asking Claude to make an optimization ---"
CLAUDE_OUTPUT=$(claude \
--model sonnet \
--dangerously-skip-permissions \
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yolo

--max-turns 20 \
--verbose \
-p "$PROMPT" \
2>&1) || true

echo "$CLAUDE_OUTPUT" | tail -5

# Check if anything changed
if git diff --quiet src/main/java/; then
echo "No source changes in iteration $i, skipping"
printf "%s\t-\t-\t-\tskipped\tno changes\n" "$i" >> "$LOG_FILE"
continue
fi

# Show what changed
echo "--- Changes made ---"
git diff --stat src/main/java/

# Run tests (skip benchmarks in run_benchmark.sh — run tests separately for speed)
echo "--- Running tests ---"
if ! ./gradlew test -q 2>&1 | tail -10; then
echo "Tests FAILED — reverting changes"
git checkout -- src/
printf "%s\t-\t-\t-\treverted\ttests failed\n" "$i" >> "$LOG_FILE"
continue
fi

# Run benchmark
echo "--- Running benchmark ---"
SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
if [ "$SCORE" = "FAILED" ]; then
echo "Benchmark FAILED — reverting changes"
git checkout -- src/
printf "%s\t-\t-\t-\treverted\tbenchmark failed\n" "$i" >> "$LOG_FILE"
continue
fi

# Compare (using awk for floating point)
IMPROVED=$(echo "$SCORE $BEST_SCORE" | awk '{print ($1 > $2) ? "yes" : "no"}')
DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}')

if [ "$IMPROVED" = "yes" ]; then
echo ""
echo "*** IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA) ***"
echo ""
BEST_SCORE="$SCORE"
echo "$BEST_SCORE" > "$BEST_SCORE_FILE"

# Get a description of the change from git diff
DESCRIPTION=$(git diff --stat src/main/java/ | tail -1 | xargs)

# Commit the improvement
git add src/main/java/
git commit -m "autoresearch: iteration $i [+$DELTA ops/s]

$(git diff --cached --stat | head -5)"

COMMIT=$(git rev-parse --short HEAD)
printf "%s\t%s\t%s\t+%s\tkept\t%s\n" "$i" "$COMMIT" "$SCORE" "$DELTA" "$DESCRIPTION" >> "$LOG_FILE"
else
echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting"
git checkout -- src/
printf "%s\t-\t%s\t%s\treverted\tno improvement\n" "$i" "$SCORE" "$DELTA" >> "$LOG_FILE"
fi
done

echo ""
echo "========================================"
echo "=== Autoresearch complete ==="
echo "=== Baseline: $BASELINE ops/s ==="
echo "=== Final best: $BEST_SCORE ops/s ==="
TOTAL_DELTA=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}')
TOTAL_PCT=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.1f", (($1 - $2) / $2) * 100}')
echo "=== Improvement: +$TOTAL_DELTA ops/s ($TOTAL_PCT%) ==="
echo "========================================"
echo ""
echo "Results log: $LOG_FILE"
echo "Review kept commits: git log --oneline --grep='autoresearch'"
75 changes: 75 additions & 0 deletions autoresearch/program.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Autoresearch: Optimize ExecutableNormalizedOperationFactory Performance

## Goal

Improve the throughput (ops/sec) of `ENF1Performance.benchMarkThroughput` by making
targeted optimizations to the ENF creation pipeline. Every improvement must pass the
full test suite.

## Metric

- **Primary**: `ENF1Performance.benchMarkThroughput` — higher is better (ops/sec)
- Run with: `./gradlew jmhRun -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" -PjmhFork=1 -PjmhIterations=3 -PjmhWarmupIterations=2`
- A run takes ~2-3 minutes. Parse the score from JMH's output line containing `benchMarkThroughput`.

## Scope — Files You May Modify

Only modify files under `src/main/java/graphql/normalized/`:

- `ExecutableNormalizedOperationFactory.java` (959 lines) — the main target
- `ENFMerger.java` (197 lines) — post-processing merge step
- `ExecutableNormalizedField.java` (700 lines) — the field data class
- `ExecutableNormalizedOperation.java` (199 lines) — the result container
- Supporting: `ArgumentMaker.java`, `NormalizedInputValue.java`, etc.

Also consider utility classes these depend on:
- `graphql/collect/ImmutableKit.java`
- `graphql/util/FpKit.java`

**Do NOT modify**: test files, benchmark files, schema files, build files.

## Constraints

1. **All tests must pass**: Run `./gradlew test` before benchmarking. If tests fail, revert.
2. **No new dependencies**: This is a firm project policy.
3. **No wildcard imports, no inner classes, no Optional**: Project coding standards.
4. **Preserve public API**: All `@PublicApi` method signatures must remain unchanged.
5. **Thread safety**: The factory is called concurrently. Don't introduce shared mutable state.
6. **Use `graphql.Assert`** not `Objects.requireNonNull`.

## Optimization Strategies to Explore (ordered by expected impact)

### High Impact
1. **Reduce object allocation in hot loops**: `buildEnfsRecursively()` and `collectFromSelectionSet()` create many intermediate collections (ArrayList, LinkedHashSet, LinkedHashMap). Consider pre-sizing or reusing.
2. **Avoid unnecessary Set/Map copies**: `groupByCommonParents()` creates grouped collections that could be more efficient.
3. **Replace stream operations with loops**: In hot paths, `.stream().collect()` has overhead from lambda allocation and iterator creation. Simple for-loops are faster.
4. **ImmutableListMultimap.Builder overhead**: The builders accumulate entries one-by-one. Consider whether bulk operations are possible.

### Medium Impact
5. **Cache type lookups**: `Introspection.getFieldDef()` and `schema.getImplementations()` are called repeatedly for the same types. A local cache per factory invocation could help.
6. **Optimize ENFMerger**: The merge step does O(n) scans. Consider whether merge candidates can be identified during collection rather than post-processing.
7. **Lazy QueryDirectives creation**: Only create `QueryDirectivesImpl` when directives are actually present on a field.
8. **Reduce LinkedHashSet usage**: Where insertion order doesn't matter, plain HashSet is faster.

### Lower Impact (but easy wins)
9. **Pre-size collections**: When the approximate size is known (e.g., number of selections), pre-size ArrayList/HashMap.
10. **Avoid unnecessary wrapping**: e.g., `Collections.singleton()` vs direct iteration.
11. **StringBuilder for string concatenation** in any hot-path string building.

## How to Iterate

1. Pick ONE strategy from above (start with #1)
2. Make a focused, minimal change
3. Run `./gradlew test` — if it fails, revert immediately
4. Run the benchmark — compare to previous best
5. If improved: commit with message "autoresearch: <description> [+X.XX ops/s]"
6. If not improved: revert with `git checkout -- src/`
7. Move to next strategy

## Important Notes

- The factory creates a **new instance per call** (no shared state between invocations), so per-invocation caching is safe.
- `ExecutableNormalizedField` is intentionally `@Mutable` — the factory builds it up incrementally.
- The `ImmutableListMultimap` and `ImmutableMap` builders are finalized only at the end in the factory's constructor.
- Guava is an existing dependency — you can use Guava utilities but nothing else new.
- The `CollectedField`, `CollectedFieldGroup`, and `PossibleMerger` inner records are allocation-heavy — they're created per-field during traversal.
37 changes: 37 additions & 0 deletions autoresearch/run_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
# Runs the ENF1 throughput benchmark and extracts the score.
# Usage: ./autoresearch/run_benchmark.sh
# Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error.
#
# Note: This script only runs the benchmark, NOT the tests.
# The autoresearch.sh loop runs tests separately before calling this.

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"

cd "$PROJECT_DIR"

echo "=== Running ENF1 throughput benchmark ===" >&2
BENCHMARK_OUTPUT=$(./gradlew jmhRun \
-PjmhInclude="performance.ENF1Performance.benchMarkThroughput" \
-PjmhFork=1 \
-PjmhIterations=3 \
-PjmhWarmupIterations=2 \
2>&1)

# Extract score from JMH output line like:
# ENF1Performance.benchMarkThroughput thrpt 3 XX.XXX ± Y.YYY ops/s
SCORE=$(echo "$BENCHMARK_OUTPUT" | grep -E "benchMarkThroughput\s+thrpt" | awk '{print $(NF-3)}')

if [ -z "$SCORE" ]; then
echo "FAILED: could not extract benchmark score" >&2
echo "Last 20 lines of output:" >&2
echo "$BENCHMARK_OUTPUT" | tail -20 >&2
echo "FAILED"
exit 1
fi

echo "Score: $SCORE ops/s" >&2
echo "$SCORE"