Skip to content

Commit 4ecb626

Browse files
misrasaurabh1claude
andcommitted
fix: Java comparator key collision and add A/A baseline test
The Java Comparator used only iteration_id as the map key when comparing test results. Since every test method had iteration_id="1", all rows collapsed to a single entry and only the last row survived. Because JUnit test execution order is non-deterministic, the surviving row differed between baseline and candidate runs, causing false correctness failures. Fix the key to include test_module_path:test_class_name:test_function_name for unique identification. Also add an A/A test that runs the original code through the candidate pipeline to detect verification bugs like this. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 00a5dc7 commit 4ecb626

3 files changed

Lines changed: 30 additions & 3 deletions

File tree

codeflash-java-runtime/src/main/java/com/codeflash/Comparator.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,11 @@ private static Map<String, byte[]> readTestResults(String dbPath) throws Excepti
144144
try (Connection conn = DriverManager.getConnection(url);
145145
Statement stmt = conn.createStatement();
146146
ResultSet rs = stmt.executeQuery(
147-
"SELECT iteration_id, return_value FROM test_results WHERE loop_index = 1")) {
147+
"SELECT test_module_path, test_class_name, test_function_name, iteration_id, return_value FROM test_results WHERE loop_index = 1")) {
148148
while (rs.next()) {
149+
String testModulePath = rs.getString("test_module_path");
150+
String testClassName = rs.getString("test_class_name");
151+
String testFunctionName = rs.getString("test_function_name");
149152
String iterationId = rs.getString("iteration_id");
150153
byte[] returnValue = rs.getBytes("return_value");
151154
// Strip the CODEFLASH_TEST_ITERATION suffix (e.g. "7_0" -> "7")
@@ -155,7 +158,10 @@ private static Map<String, byte[]> readTestResults(String dbPath) throws Excepti
155158
if (lastUnderscore > 0) {
156159
iterationId = iterationId.substring(0, lastUnderscore);
157160
}
158-
results.put(iterationId, returnValue);
161+
// Use module:class:function:iteration as key to uniquely identify
162+
// each invocation across different test files, classes, and methods
163+
String key = testModulePath + ":" + testClassName + ":" + testFunctionName + "::" + iterationId;
164+
results.put(key, returnValue);
159165
}
160166
}
161167
return results;
179 Bytes
Binary file not shown.

codeflash/optimization/function_optimizer.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,7 @@ def process_single_candidate(
13351335
optimized_code=candidate.source_code,
13361336
original_helper_code=original_helper_code,
13371337
)
1338-
if not did_update:
1338+
if not did_update and candidate.optimization_id != "original-code-aa-test":
13391339
logger.info("No functions were replaced in the optimized code. Skipping optimization candidate.")
13401340
console.rule()
13411341
return None
@@ -1371,6 +1371,18 @@ def process_single_candidate(
13711371
)
13721372
console.rule()
13731373

1374+
# Handle A/A test: original code run through the candidate pipeline
1375+
if candidate.optimization_id == "original-code-aa-test":
1376+
if not is_successful(run_results):
1377+
logger.warning(
1378+
"A/A test FAILED! Original code did not pass its own correctness check. "
1379+
"This indicates a bug in the verification pipeline."
1380+
)
1381+
eval_ctx.record_failed_candidate(candidate.optimization_id)
1382+
return None
1383+
logger.info("A/A test passed: original code passed correctness verification as expected.")
1384+
return None
1385+
13741386
if not is_successful(run_results):
13751387
eval_ctx.record_failed_candidate(candidate.optimization_id)
13761388
return None
@@ -1514,6 +1526,15 @@ def determine_best_candidate(
15141526
language=self.function_to_optimize.language,
15151527
)
15161528

1529+
# Prepend original code as an A/A test candidate (must pass correctness by definition)
1530+
aa_test_candidate = OptimizedCandidate(
1531+
source_code=code_context.read_writable_code,
1532+
explanation="Original code (A/A baseline test)",
1533+
optimization_id="original-code-aa-test",
1534+
source=OptimizedCandidateSource.OPTIMIZE,
1535+
)
1536+
candidates = [aa_test_candidate, *candidates]
1537+
15171538
processor = CandidateProcessor(
15181539
candidates,
15191540
future_line_profile_results,

0 commit comments

Comments
 (0)