From 4e4b444c49adda4a86e334205f567e701bb29fc3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 21 Mar 2026 21:58:46 +0000
Subject: [PATCH 1/2] Add autoresearch setup for ENF performance optimization

Three-file autoresearch framework targeting ExecutableNormalizedOperationFactory
throughput: program.md (strategy), run_benchmark.sh (metric), autoresearch.sh (loop).

https://claude.ai/code/session_01GfoPorZWo99NczxzJTYh9Q
---
 autoresearch/autoresearch.sh  | 141 ++++++++++++++++++++++++++++++++++
 autoresearch/program.md       |  75 ++++++++++++++++++
 autoresearch/run_benchmark.sh |  41 ++++++++++
 3 files changed, 257 insertions(+)
 create mode 100755 autoresearch/autoresearch.sh
 create mode 100644 autoresearch/program.md
 create mode 100755 autoresearch/run_benchmark.sh

diff --git a/autoresearch/autoresearch.sh b/autoresearch/autoresearch.sh
new file mode 100755
index 000000000..701ee3d8b
--- /dev/null
+++ b/autoresearch/autoresearch.sh
@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+# Autoresearch loop driver for graphql-java ENF optimization.
+#
+# This script runs an autonomous optimization loop using Claude Code (Sonnet)
+# to iteratively improve ENF performance.
+#
+# Usage:
+#   ./autoresearch/autoresearch.sh [max_iterations]
+#
+# Prerequisites:
+#   - Claude Code CLI installed and authenticated
+#   - Java toolchain (JDK 25) available for builds
+#
+# The loop:
+#   1. Get baseline benchmark score
+#   2. Ask Claude to make ONE optimization
+#   3. Run tests + benchmark
+#   4. Keep if improved, revert if not
+#   5. Repeat
+
+set -euo pipefail
+
+MAX_ITERATIONS="${1:-50}"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+LOG_FILE="$SCRIPT_DIR/results.tsv"
+BEST_SCORE_FILE="$SCRIPT_DIR/.best_score"
+
+cd "$PROJECT_DIR"
+
+# Initialize log
+if [ ! -f "$LOG_FILE" ]; then
+    echo -e "iteration\tcommit\tscore\tdelta\tstatus\tdescription" > "$LOG_FILE"
+fi
+
+# Get baseline score
+echo "=== Getting baseline score ==="
+BASELINE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
+if [ "$BASELINE" = "FAILED" ]; then
+    echo "ERROR: Baseline benchmark failed. Fix issues before starting autoresearch."
+    exit 1
+fi
+echo "Baseline: $BASELINE ops/s"
+echo "$BASELINE" > "$BEST_SCORE_FILE"
+
+BEST_SCORE="$BASELINE"
+COMMIT_BEFORE=$(git rev-parse HEAD)
+
+for i in $(seq 1 "$MAX_ITERATIONS"); do
+    echo ""
+    echo "========================================"
+    echo "=== Iteration $i / $MAX_ITERATIONS ==="
+    echo "=== Best score: $BEST_SCORE ops/s ==="
+    echo "========================================"
+
+    # Save current state
+    COMMIT_BEFORE=$(git rev-parse HEAD)
+
+    # Ask Claude (Sonnet) to make ONE optimization
+    # Using --print to run non-interactively
+    claude --model sonnet -p "$(cat <<EOF
+You are running iteration $i of an autoresearch optimization loop for graphql-java.
+
+Read autoresearch/program.md for full context and strategy.
+
+Current best benchmark score: $BEST_SCORE ops/s (baseline was: $BASELINE ops/s)
+
+Previous optimization log:
+$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations")
+
+YOUR TASK: Make exactly ONE focused optimization to the ENF code.
+- Pick the most promising unused strategy from program.md
+- Make a minimal, targeted change
+- Do NOT run tests or benchmarks (the harness does that)
+- Describe what you changed and why in a single line
+
+IMPORTANT: Only modify files under src/main/java/graphql/normalized/ or the utility
+files mentioned in program.md. Make the change now.
+EOF
+)"
+
+    # Check if anything changed
+    if git diff --quiet src/main/java/; then
+        echo "No changes made in iteration $i, skipping"
+        echo -e "$i\t-\t-\t-\tskipped\tno changes" >> "$LOG_FILE"
+        continue
+    fi
+
+    # Run tests
+    echo "--- Running tests ---"
+    if ! ./gradlew test -q 2>&1 | tail -5; then
+        echo "Tests FAILED — reverting"
+        git checkout -- src/
+        echo -e "$i\t-\t-\t-\treverted\ttests failed" >> "$LOG_FILE"
+        continue
+    fi
+
+    # Run benchmark
+    echo "--- Running benchmark ---"
+    SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
+    if [ "$SCORE" = "FAILED" ]; then
+        echo "Benchmark FAILED — reverting"
+        git checkout -- src/
+        echo -e "$i\t-\t-\t-\treverted\tbenchmark failed" >> "$LOG_FILE"
+        continue
+    fi
+
+    # Compare (using awk for floating point)
+    IMPROVED=$(echo "$SCORE $BEST_SCORE" | awk '{print ($1 > $2) ? "yes" : "no"}')
+    DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}')
+
+    if [ "$IMPROVED" = "yes" ]; then
+        echo "IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA)"
+        BEST_SCORE="$SCORE"
+        echo "$BEST_SCORE" > "$BEST_SCORE_FILE"
+
+        # Get a description of the change
+        DESCRIPTION=$(git diff --stat src/main/java/ | head -1)
+
+        # Commit the improvement
+        git add src/main/java/
+        git commit -m "autoresearch: iteration $i — $DESCRIPTION [+$DELTA ops/s]"
+
+        COMMIT=$(git rev-parse --short HEAD)
+        echo -e "$i\t$COMMIT\t$SCORE\t+$DELTA\tkept\t$DESCRIPTION" >> "$LOG_FILE"
+    else
+        echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting"
+        git checkout -- src/
+        echo -e "$i\t-\t$SCORE\t$DELTA\treverted\tno improvement" >> "$LOG_FILE"
+    fi
+done
+
+echo ""
+echo "========================================"
+echo "=== Autoresearch complete ==="
+echo "=== Baseline: $BASELINE ops/s ==="
+echo "=== Final best: $BEST_SCORE ops/s ==="
+echo "=== Total improvement: $(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}') ops/s ==="
+echo "========================================"
+echo ""
+echo "Results log: $LOG_FILE"
diff --git a/autoresearch/program.md b/autoresearch/program.md
new file mode 100644
index 000000000..9093cee29
--- /dev/null
+++ b/autoresearch/program.md
@@ -0,0 +1,75 @@
+# Autoresearch: Optimize ExecutableNormalizedOperationFactory Performance
+
+## Goal
+
+Improve the throughput (ops/sec) of `ENF1Performance.benchMarkThroughput` by making
+targeted optimizations to the ENF creation pipeline. Every improvement must pass the
+full test suite.
+
+## Metric
+
+- **Primary**: `ENF1Performance.benchMarkThroughput` — higher is better (ops/sec)
+- Run with: `./gradlew jmhRun -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" -PjmhFork=1 -PjmhIterations=3 -PjmhWarmupIterations=2`
+- A run takes ~2-3 minutes. Parse the score from JMH's output line containing `benchMarkThroughput`.
+
+## Scope — Files You May Modify
+
+Only modify files under `src/main/java/graphql/normalized/`:
+
+- `ExecutableNormalizedOperationFactory.java` (959 lines) — the main target
+- `ENFMerger.java` (197 lines) — post-processing merge step
+- `ExecutableNormalizedField.java` (700 lines) — the field data class
+- `ExecutableNormalizedOperation.java` (199 lines) — the result container
+- Supporting: `ArgumentMaker.java`, `NormalizedInputValue.java`, etc.
+
+Also consider utility classes these depend on:
+- `graphql/collect/ImmutableKit.java`
+- `graphql/util/FpKit.java`
+
+**Do NOT modify**: test files, benchmark files, schema files, build files.
+
+## Constraints
+
+1. **All tests must pass**: Run `./gradlew test` before benchmarking. If tests fail, revert.
+2. **No new dependencies**: This is a firm project policy.
+3. **No wildcard imports, no inner classes, no Optional**: Project coding standards.
+4. **Preserve public API**: All `@PublicApi` method signatures must remain unchanged.
+5. **Thread safety**: The factory is called concurrently. Don't introduce shared mutable state.
+6. **Use `graphql.Assert`** not `Objects.requireNonNull`.
+
+## Optimization Strategies to Explore (ordered by expected impact)
+
+### High Impact
+1. **Reduce object allocation in hot loops**: `buildEnfsRecursively()` and `collectFromSelectionSet()` create many intermediate collections (ArrayList, LinkedHashSet, LinkedHashMap). Consider pre-sizing or reusing.
+2. **Avoid unnecessary Set/Map copies**: `groupByCommonParents()` creates grouped collections that could be more efficient.
+3. **Replace stream operations with loops**: In hot paths, `.stream().collect()` has overhead from lambda allocation and iterator creation. Simple for-loops are faster.
+4. **ImmutableListMultimap.Builder overhead**: The builders accumulate entries one-by-one. Consider whether bulk operations are possible.
+
+### Medium Impact
+5. **Cache type lookups**: `Introspection.getFieldDef()` and `schema.getImplementations()` are called repeatedly for the same types. A local cache per factory invocation could help.
+6. **Optimize ENFMerger**: The merge step does O(n) scans. Consider whether merge candidates can be identified during collection rather than post-processing.
+7. **Lazy QueryDirectives creation**: Only create `QueryDirectivesImpl` when directives are actually present on a field.
+8. **Reduce LinkedHashSet usage**: Where insertion order doesn't matter, plain HashSet is faster.
+
+### Lower Impact (but easy wins)
+9. **Pre-size collections**: When the approximate size is known (e.g., number of selections), pre-size ArrayList/HashMap.
+10. **Avoid unnecessary wrapping**: e.g., `Collections.singleton()` vs direct iteration.
+11. **StringBuilder for string concatenation** in any hot-path string building.
+
+## How to Iterate
+
+1. Pick ONE strategy from above (start with #1)
+2. Make a focused, minimal change
+3. Run `./gradlew test` — if it fails, revert immediately
+4. Run the benchmark — compare to previous best
+5. If improved: commit with message "autoresearch: <description> [+X.XX ops/s]"
+6. If not improved: revert with `git checkout -- src/`
+7. Move to next strategy
+
+## Important Notes
+
+- The factory creates a **new instance per call** (no shared state between invocations), so per-invocation caching is safe.
+- `ExecutableNormalizedField` is intentionally `@Mutable` — the factory builds it up incrementally.
+- The `ImmutableListMultimap` and `ImmutableMap` builders are finalized only at the end in the factory's constructor.
+- Guava is an existing dependency — you can use Guava utilities but nothing else new.
+- The `CollectedField`, `CollectedFieldGroup`, and `PossibleMerger` inner records are allocation-heavy — they're created per-field during traversal.
diff --git a/autoresearch/run_benchmark.sh b/autoresearch/run_benchmark.sh
new file mode 100755
index 000000000..4bd85f5d8
--- /dev/null
+++ b/autoresearch/run_benchmark.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Runs the ENF1 throughput benchmark and extracts the score.
+# Usage: ./autoresearch/run_benchmark.sh
+# Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+cd "$PROJECT_DIR"
+
+echo "=== Running tests first ===" >&2
+if ! ./gradlew test -q 2>&1 | tail -5 >&2; then
+    echo "FAILED: tests did not pass" >&2
+    echo "FAILED"
+    exit 1
+fi
+
+echo "=== Running ENF1 throughput benchmark ===" >&2
+BENCHMARK_OUTPUT=$(./gradlew jmhRun \
+    -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" \
+    -PjmhFork=1 \
+    -PjmhIterations=3 \
+    -PjmhWarmupIterations=2 \
+    2>&1)
+
+# Extract score from JMH output line like:
+# ENF1Performance.benchMarkThroughput  thrpt    3  XX.XXX ± Y.YYY  ops/s
+SCORE=$(echo "$BENCHMARK_OUTPUT" | grep -E "benchMarkThroughput\s+thrpt" | awk '{print $(NF-3)}')
+
+if [ -z "$SCORE" ]; then
+    echo "FAILED: could not extract benchmark score" >&2
+    echo "Last 20 lines of output:" >&2
+    echo "$BENCHMARK_OUTPUT" | tail -20 >&2
+    echo "FAILED"
+    exit 1
+fi
+
+echo "Score: $SCORE ops/s" >&2
+echo "$SCORE"

From 00f96f1c7bb17fc5218859cba9263b9c5fa1304b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 21 Mar 2026 22:05:21 +0000
Subject: [PATCH 2/2] Fix autoresearch scripts for local Claude Code CLI usage

- Use `claude --dangerously-skip-permissions --max-turns 20` for unattended operation
- Separate test run from benchmark run (avoid running tests twice)
- Add CLI availability check
- Improve logging with printf instead of echo -e
- Show percentage improvement in final summary

https://claude.ai/code/session_01GfoPorZWo99NczxzJTYh9Q
---
 autoresearch/autoresearch.sh  | 114 ++++++++++++++++++++++------------
 autoresearch/run_benchmark.sh |  10 +--
 2 files changed, 79 insertions(+), 45 deletions(-)

diff --git a/autoresearch/autoresearch.sh b/autoresearch/autoresearch.sh
index 701ee3d8b..631084066 100755
--- a/autoresearch/autoresearch.sh
+++ b/autoresearch/autoresearch.sh
@@ -8,12 +8,20 @@
 #   ./autoresearch/autoresearch.sh [max_iterations]
 #
 # Prerequisites:
-#   - Claude Code CLI installed and authenticated
+#   - Claude Code CLI installed and authenticated (`claude` on PATH)
 #   - Java toolchain (JDK 25) available for builds
+#   - Run from the graphql-java project root
+#
+# Permissions:
+#   The script uses `claude --dangerously-skip-permissions` so the agent can
+#   edit files without interactive approval prompts. This is safe here because:
+#   - The agent is scoped to src/main/java/ edits only (via prompt)
+#   - Tests gate every change (bad edits get reverted)
+#   - Git tracks everything
 #
 # The loop:
 #   1. Get baseline benchmark score
-#   2. Ask Claude to make ONE optimization
+#   2. Ask Claude (Sonnet) to make ONE optimization
 #   3. Run tests + benchmark
 #   4. Keep if improved, revert if not
 #   5. Repeat
@@ -28,9 +36,15 @@ BEST_SCORE_FILE="$SCRIPT_DIR/.best_score"
 
 cd "$PROJECT_DIR"
 
+# Verify claude CLI is available
+if ! command -v claude &>/dev/null; then
+    echo "ERROR: 'claude' CLI not found on PATH. Install Claude Code first."
+    exit 1
+fi
+
 # Initialize log
 if [ ! -f "$LOG_FILE" ]; then
-    echo -e "iteration\tcommit\tscore\tdelta\tstatus\tdescription" > "$LOG_FILE"
+    printf "iteration\tcommit\tscore\tdelta\tstatus\tdescription\n" > "$LOG_FILE"
 fi
 
 # Get baseline score
@@ -44,7 +58,6 @@ echo "Baseline: $BASELINE ops/s"
 echo "$BASELINE" > "$BEST_SCORE_FILE"
 
 BEST_SCORE="$BASELINE"
-COMMIT_BEFORE=$(git rev-parse HEAD)
 
 for i in $(seq 1 "$MAX_ITERATIONS"); do
     echo ""
@@ -53,45 +66,63 @@ for i in $(seq 1 "$MAX_ITERATIONS"); do
     echo "=== Best score: $BEST_SCORE ops/s ==="
     echo "========================================"
 
-    # Save current state
-    COMMIT_BEFORE=$(git rev-parse HEAD)
+    # Build the prompt for this iteration
+    RECENT_LOG=$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations")
 
-    # Ask Claude (Sonnet) to make ONE optimization
-    # Using --print to run non-interactively
-    claude --model sonnet -p "$(cat <<EOF
-You are running iteration $i of an autoresearch optimization loop for graphql-java.
+    PROMPT="You are running iteration $i of an autoresearch optimization loop for graphql-java.
 
 Read autoresearch/program.md for full context and strategy.
 
 Current best benchmark score: $BEST_SCORE ops/s (baseline was: $BASELINE ops/s)
 
-Previous optimization log:
-$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations")
+Previous optimization log (last 10 entries):
+$RECENT_LOG
 
 YOUR TASK: Make exactly ONE focused optimization to the ENF code.
-- Pick the most promising unused strategy from program.md
-- Make a minimal, targeted change
-- Do NOT run tests or benchmarks (the harness does that)
-- Describe what you changed and why in a single line
-
-IMPORTANT: Only modify files under src/main/java/graphql/normalized/ or the utility
-files mentioned in program.md. Make the change now.
-EOF
-)"
+- Read the code files first, then pick the most promising strategy from program.md
+  that has NOT already been tried (check the log above)
+- Make a minimal, targeted change to ONE or TWO files
+- Do NOT run tests or benchmarks — the outer harness handles that
+- Do NOT commit — the outer harness handles that
+- After editing, output a single-line summary of what you changed and why
+
+SCOPE: Only modify files under src/main/java/graphql/normalized/ or the utility
+files listed in program.md (ImmutableKit.java, FpKit.java).
+
+Make the change now."
+
+    # Run Claude in non-interactive mode with file editing capability
+    # --dangerously-skip-permissions: allows edits without prompts (safe: tests gate everything)
+    # --model sonnet: fast iterations
+    # --max-turns 20: enough to read files + make edits, but bounded
+    echo "--- Asking Claude to make an optimization ---"
+    CLAUDE_OUTPUT=$(claude \
+        --model sonnet \
+        --dangerously-skip-permissions \
+        --max-turns 20 \
+        --verbose \
+        -p "$PROMPT" \
+        2>&1) || true
+
+    echo "$CLAUDE_OUTPUT" | tail -5
 
     # Check if anything changed
     if git diff --quiet src/main/java/; then
-        echo "No changes made in iteration $i, skipping"
-        echo -e "$i\t-\t-\t-\tskipped\tno changes" >> "$LOG_FILE"
+        echo "No source changes in iteration $i, skipping"
+        printf "%s\t-\t-\t-\tskipped\tno changes\n" "$i" >> "$LOG_FILE"
         continue
     fi
 
-    # Run tests
+    # Show what changed
+    echo "--- Changes made ---"
+    git diff --stat src/main/java/
+
+    # Run tests (skip benchmarks in run_benchmark.sh — run tests separately for speed)
     echo "--- Running tests ---"
-    if ! ./gradlew test -q 2>&1 | tail -5; then
-        echo "Tests FAILED — reverting"
+    if ! ./gradlew test -q 2>&1 | tail -10; then
+        echo "Tests FAILED — reverting changes"
         git checkout -- src/
-        echo -e "$i\t-\t-\t-\treverted\ttests failed" >> "$LOG_FILE"
+        printf "%s\t-\t-\t-\treverted\ttests failed\n" "$i" >> "$LOG_FILE"
         continue
     fi
 
@@ -99,9 +130,9 @@ EOF
     echo "--- Running benchmark ---"
     SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
     if [ "$SCORE" = "FAILED" ]; then
-        echo "Benchmark FAILED — reverting"
+        echo "Benchmark FAILED — reverting changes"
         git checkout -- src/
-        echo -e "$i\t-\t-\t-\treverted\tbenchmark failed" >> "$LOG_FILE"
+        printf "%s\t-\t-\t-\treverted\tbenchmark failed\n" "$i" >> "$LOG_FILE"
         continue
     fi
 
@@ -110,32 +141,39 @@ EOF
     DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}')
 
     if [ "$IMPROVED" = "yes" ]; then
-        echo "IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA)"
+        echo ""
+        echo "*** IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA) ***"
+        echo ""
         BEST_SCORE="$SCORE"
         echo "$BEST_SCORE" > "$BEST_SCORE_FILE"
 
-        # Get a description of the change
-        DESCRIPTION=$(git diff --stat src/main/java/ | head -1)
+        # Get a description of the change from git diff
+        DESCRIPTION=$(git diff --stat src/main/java/ | tail -1 | xargs)
 
         # Commit the improvement
         git add src/main/java/
-        git commit -m "autoresearch: iteration $i — $DESCRIPTION [+$DELTA ops/s]"
+        git commit -m "autoresearch: iteration $i [+$DELTA ops/s]
+
+$(git diff --cached --stat | head -5)"
 
         COMMIT=$(git rev-parse --short HEAD)
-        echo -e "$i\t$COMMIT\t$SCORE\t+$DELTA\tkept\t$DESCRIPTION" >> "$LOG_FILE"
+        printf "%s\t%s\t%s\t+%s\tkept\t%s\n" "$i" "$COMMIT" "$SCORE" "$DELTA" "$DESCRIPTION" >> "$LOG_FILE"
     else
         echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting"
         git checkout -- src/
-        echo -e "$i\t-\t$SCORE\t$DELTA\treverted\tno improvement" >> "$LOG_FILE"
+        printf "%s\t-\t%s\t%s\treverted\tno improvement\n" "$i" "$SCORE" "$DELTA" >> "$LOG_FILE"
     fi
 done
 
 echo ""
 echo "========================================"
 echo "=== Autoresearch complete ==="
-echo "=== Baseline: $BASELINE ops/s ==="
-echo "=== Final best: $BEST_SCORE ops/s ==="
-echo "=== Total improvement: $(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}') ops/s ==="
+echo "=== Baseline:    $BASELINE ops/s ==="
+echo "=== Final best:  $BEST_SCORE ops/s ==="
+TOTAL_DELTA=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}')
+TOTAL_PCT=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.1f", (($1 - $2) / $2) * 100}')
+echo "=== Improvement: +$TOTAL_DELTA ops/s ($TOTAL_PCT%) ==="
 echo "========================================"
 echo ""
 echo "Results log: $LOG_FILE"
+echo "Review kept commits: git log --oneline --grep='autoresearch'"
diff --git a/autoresearch/run_benchmark.sh b/autoresearch/run_benchmark.sh
index 4bd85f5d8..f188a1160 100755
--- a/autoresearch/run_benchmark.sh
+++ b/autoresearch/run_benchmark.sh
@@ -2,6 +2,9 @@
 # Runs the ENF1 throughput benchmark and extracts the score.
 # Usage: ./autoresearch/run_benchmark.sh
 # Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error.
+#
+# Note: This script only runs the benchmark, NOT the tests.
+# The autoresearch.sh loop runs tests separately before calling this.
 
 set -euo pipefail
 
@@ -10,13 +13,6 @@ PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
 
 cd "$PROJECT_DIR"
 
-echo "=== Running tests first ===" >&2
-if ! ./gradlew test -q 2>&1 | tail -5 >&2; then
-    echo "FAILED: tests did not pass" >&2
-    echo "FAILED"
-    exit 1
-fi
-
 echo "=== Running ENF1 throughput benchmark ===" >&2
 BENCHMARK_OUTPUT=$(./gradlew jmhRun \
     -PjmhInclude="performance.ENF1Performance.benchMarkThroughput" \