Skip to content

Commit 0c1ab25

Browse files
fix[fuzz]: correct stack frame compare (#6424)
## Does this PR closes an open issue or discussion? <!-- This helps us keep track of fixed issues and changes. --> - Closes #. ## What changes are included in this PR? <!-- What changes are included here, if an issue or discussion are attached, there's no need to duplicate the details. --> ## What is the rationale for this change? <!-- Why do you propose this change, and why did you choose this approach. This helps reviewers and other readers understand changes, creates a shared understanding of the issue and codebase, and improves their ability to work with this change and offer better suggestions. --> ## How is this change tested? <!-- Changes should be tested, we expect changes to fit in one of the following categories: 1. Verifying existing behavior is maintained. 2. For serialization related changes - Compatibility should be maintained or explicitly broken. 3. For new behavior and functionality, this helps us maintaining that desired behavior in the future. --> ## Are there any user-facing changes? <!-- Does the change affect users in what of the following ways: 1. Breaks public APIs in some way. 2. Changes the underlying behavior of one of the integrations. 3. Should some documentation be changed to reflect this change? In the case some public API is changed in a breaking way, make sure to add the appropriate label. --> --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 007ef8c commit 0c1ab25

6 files changed

Lines changed: 984 additions & 48 deletions

File tree

.github/scripts/fuzz_report/cli.py

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import argparse
55
import json
66
import os
7+
import re
78
import subprocess
89
import sys
910
from pathlib import Path
@@ -14,6 +15,9 @@
1415

1516
TEMPLATES_DIR = Path(__file__).parent / "templates"
1617

18+
# Marker used to find/update the single recurrence-tracking comment.
19+
_RECURRENCE_MARKER = "<!-- fuzzer-recurrence-tracker -->"
20+
_RECURRENCE_COUNT_RE = r"<!-- fuzzer-recurrence-tracker count:(\d+) -->"
1721
# Variables that must be set (non-empty) before creating or commenting on an issue.
1822
REQUIRED_REPORT_VARIABLES = ["FUZZ_TARGET", "CRASH_FILE", "ARTIFACT_URL"]
1923

@@ -83,7 +87,14 @@ def _build_template_variables(
8387
def _determine_action(
8488
dedup_path: str | Path | None,
8589
) -> tuple[str, dict | None]:
86-
"""Determine action from dedup result. Returns (action, dedup_dict)."""
90+
"""Determine action from dedup result. Returns (action, dedup_dict).
91+
92+
Actions:
93+
create – new issue
94+
skip – exact duplicate, do nothing
95+
update_count – high-confidence duplicate, bump recurrence counter
96+
comment – medium-confidence duplicate, post full comment
97+
"""
8798
if not dedup_path or not Path(dedup_path).exists():
8899
return "create", None
89100

@@ -94,9 +105,90 @@ def _determine_action(
94105
if dedup.get("confidence") == "exact":
95106
return "skip", dedup
96107

108+
if dedup.get("confidence") == "high":
109+
return "update_count", dedup
110+
97111
return "comment", dedup
98112

99113

114+
def _render_recurrence_body(count: int) -> str:
115+
"""Render the minimal recurrence-tracking comment body."""
116+
return (
117+
f"Seen **{count}** time{'s' if count != 1 else ''}\n\n"
118+
f"<!-- fuzzer-recurrence-tracker count:{count} -->"
119+
)
120+
121+
122+
def _update_recurrence_count(repo: str, issue_number: int | str) -> int:
123+
"""Find-or-create the recurrence comment, incrementing its count.
124+
125+
Uses a compare-and-swap pattern: reads the current count from the
126+
existing comment (if any), increments it, and writes back.
127+
128+
Returns the new count.
129+
"""
130+
# List all comments on the issue
131+
result = subprocess.run(
132+
[
133+
"gh",
134+
"api",
135+
f"repos/{repo}/issues/{issue_number}/comments",
136+
"--paginate",
137+
"--jq",
138+
f'.[] | select(.body | contains("{_RECURRENCE_MARKER}")) | {{id: .id, body: .body}}',
139+
],
140+
capture_output=True,
141+
text=True,
142+
check=True,
143+
)
144+
145+
existing_id = None
146+
current_count = 0
147+
148+
for line in result.stdout.strip().splitlines():
149+
if not line:
150+
continue
151+
comment = json.loads(line)
152+
existing_id = comment["id"]
153+
m = re.search(_RECURRENCE_COUNT_RE, comment["body"])
154+
if m:
155+
current_count = int(m.group(1))
156+
break
157+
158+
new_count = current_count + 1
159+
body = _render_recurrence_body(new_count)
160+
161+
if existing_id:
162+
# Update existing comment (not atomic — race is acceptable since
163+
# fuzz CI jobs are serialized)
164+
subprocess.run(
165+
[
166+
"gh",
167+
"api",
168+
f"repos/{repo}/issues/comments/{existing_id}",
169+
"-X",
170+
"PATCH",
171+
"-f",
172+
f"body={body}",
173+
],
174+
check=True,
175+
)
176+
else:
177+
# Create new recurrence comment
178+
subprocess.run(
179+
[
180+
"gh",
181+
"api",
182+
f"repos/{repo}/issues/{issue_number}/comments",
183+
"-f",
184+
f"body={body}",
185+
],
186+
check=True,
187+
)
188+
189+
return new_count
190+
191+
100192
def cmd_extract(args: argparse.Namespace) -> int:
101193
"""Extract crash info from log file."""
102194
if not Path(args.log_file).exists():
@@ -193,6 +285,15 @@ def cmd_report(args: argparse.Namespace) -> int:
193285
_write_github_output("issue_number", str(existing_issue))
194286
return 0
195287

288+
if action == "update_count":
289+
new_count = _update_recurrence_count(args.repo, existing_issue)
290+
print(
291+
f"Updated recurrence count on #{existing_issue} to {new_count}",
292+
file=sys.stderr,
293+
)
294+
_write_github_output("issue_number", str(existing_issue))
295+
return 0
296+
196297
if action == "comment":
197298
variables.setdefault("DEDUP_REASON", dedup.get("reason", ""))
198299
variables.setdefault("DEDUP_CONFIDENCE", dedup.get("confidence", ""))
@@ -270,6 +371,7 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
270371
print(f" panic_message: {crash_info.panic_message}", file=sys.stderr)
271372
print(f" crash_type: {crash_info.crash_type}", file=sys.stderr)
272373
print(f" seed_hash: {crash_info.seed_hash}", file=sys.stderr)
374+
print(f" stack_frames: {crash_info.stack_frames[:5]}", file=sys.stderr)
273375
print(file=sys.stderr)
274376

275377
# Step 2: Dedup (if issues file provided)
@@ -286,6 +388,8 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
286388
print(f" confidence: {dedup_result.confidence}", file=sys.stderr)
287389
print(f" issue: #{dedup_result.issue_number}", file=sys.stderr)
288390
print(f" reason: {dedup_result.reason}", file=sys.stderr)
391+
if dedup_result.debug:
392+
print(f" debug: {json.dumps(dedup_result.debug, indent=4)}", file=sys.stderr)
289393
print(file=sys.stderr)
290394

291395
# Write dedup to temp file so _determine_action can read it
@@ -322,6 +426,15 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
322426
)
323427
return 0
324428

429+
if action == "update_count":
430+
print(
431+
f"(would update recurrence count on #{existing_issue})",
432+
file=sys.stderr,
433+
)
434+
print(file=sys.stderr)
435+
print(_render_recurrence_body(1))
436+
return 0
437+
325438
if action == "comment":
326439
template_path = TEMPLATES_DIR / "related_comment.md"
327440
variables.setdefault("DEDUP_REASON", dedup.get("reason", ""))

.github/scripts/fuzz_report/dedup.py

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ class DedupResult:
2121
issue_title: str | None = None
2222
reason: str = ""
2323
check_order: int | None = None
24+
# Debug details: what values were compared to produce this result
25+
debug: dict | None = None
2426

2527
def to_dict(self) -> dict:
2628
return {k: v for k, v in asdict(self).items() if v is not None}
@@ -56,9 +58,15 @@ def check_seed_hash(seed_hash: str, issues: list[dict]) -> DedupResult:
5658
issue_url=issue.get("url"),
5759
issue_title=issue.get("title"),
5860
reason="Exact seed hash match - same crash input",
61+
debug={"seed_hash": seed_hash},
5962
)
6063

61-
return DedupResult(duplicate=False, check="seed_hash", reason="No matching seed hash found")
64+
return DedupResult(
65+
duplicate=False,
66+
check="seed_hash",
67+
reason="No matching seed hash found",
68+
debug={"seed_hash": seed_hash},
69+
)
6270

6371

6472
def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult:
@@ -68,6 +76,7 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult
6876
duplicate=False,
6977
check="panic_location",
7078
reason="No panic location provided",
79+
debug={"panic_location": panic_location or ""},
7180
)
7281

7382
# Extract file:line pattern
@@ -85,19 +94,33 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult
8594
issue_url=issue.get("url"),
8695
issue_title=issue.get("title"),
8796
reason=f"Same panic location (file:line): {file_pattern}",
97+
debug={
98+
"panic_location": panic_location,
99+
"file_pattern": file_pattern,
100+
"matched_issue": issue["number"],
101+
},
88102
)
89103

90104
return DedupResult(
91105
duplicate=False,
92106
check="panic_location",
93107
reason="No matching panic location found",
108+
debug={
109+
"panic_location": panic_location,
110+
"file_pattern": file_pattern,
111+
},
94112
)
95113

96114

97115
def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult:
98116
"""Check if stack trace hash exists in any issue body."""
99117
if not stack_hash or stack_hash == "unknown":
100-
return DedupResult(duplicate=False, check="stack_trace", reason="No stack hash provided")
118+
return DedupResult(
119+
duplicate=False,
120+
check="stack_trace",
121+
reason="No stack hash provided",
122+
debug={"stack_hash": stack_hash or ""},
123+
)
101124

102125
for issue in issues:
103126
body = issue.get("body", "")
@@ -110,20 +133,28 @@ def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult:
110133
issue_url=issue.get("url"),
111134
issue_title=issue.get("title"),
112135
reason="Same stack trace (top 5 frames match)",
136+
debug={
137+
"stack_hash": stack_hash,
138+
"matched_issue": issue["number"],
139+
},
113140
)
114141

115142
return DedupResult(
116143
duplicate=False,
117144
check="stack_trace",
118145
reason="No matching stack trace hash found",
146+
debug={"stack_hash": stack_hash},
119147
)
120148

121149

122150
def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict]) -> DedupResult:
123151
"""Check if error pattern exists in any issue body."""
124152
if not message_hash:
125153
return DedupResult(
126-
duplicate=False, check="error_pattern", reason="No message hash provided"
154+
duplicate=False,
155+
check="error_pattern",
156+
reason="No message hash provided",
157+
debug={"error_variant": error_variant or ""},
127158
)
128159

129160
# First try: exact message hash match
@@ -138,6 +169,11 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict
138169
issue_url=issue.get("url"),
139170
issue_title=issue.get("title"),
140171
reason="Same error pattern (normalized message match)",
172+
debug={
173+
"message_hash": message_hash,
174+
"error_variant": error_variant,
175+
"matched_issue": issue["number"],
176+
},
141177
)
142178

143179
# Second try: same error variant (lower confidence)
@@ -153,43 +189,68 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict
153189
issue_url=issue.get("url"),
154190
issue_title=issue.get("title"),
155191
reason=f"Same error variant type: {error_variant}",
192+
debug={
193+
"message_hash": message_hash,
194+
"error_variant": error_variant,
195+
"matched_issue": issue["number"],
196+
},
156197
)
157198

158199
return DedupResult(
159-
duplicate=False, check="error_pattern", reason="No matching error pattern found"
200+
duplicate=False,
201+
check="error_pattern",
202+
reason="No matching error pattern found",
203+
debug={
204+
"message_hash": message_hash,
205+
"error_variant": error_variant,
206+
},
160207
)
161208

162209

163210
def check_duplicate(crash_info: CrashInfo, issues_path: str | Path) -> DedupResult:
164211
"""Run all deduplication checks in order. First match wins."""
165212
issues = load_issues(issues_path)
166213

214+
# Summary of extracted values for debugging (attached to every result)
215+
extraction_summary = {
216+
"panic_location": crash_info.panic_location,
217+
"crash_location": crash_info.crash_location,
218+
"error_variant": crash_info.error_variant,
219+
"stack_frames_top5": crash_info.stack_frames[:5],
220+
"normalized_message": crash_info.normalized_message,
221+
}
222+
167223
# Check 1: Seed hash (exact duplicate)
168224
result = check_seed_hash(crash_info.seed_hash, issues)
169225
if result.duplicate:
170226
result.check_order = 1
227+
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
171228
return result
172229

173230
# Check 2: Panic location (same crash site)
174231
result = check_panic_location(crash_info.panic_location, issues)
175232
if result.duplicate:
176233
result.check_order = 2
234+
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
177235
return result
178236

179237
# Check 3: Stack trace hash (same call path)
180238
result = check_stack_trace(crash_info.stack_trace_hash, issues)
181239
if result.duplicate:
182240
result.check_order = 3
241+
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
183242
return result
184243

185244
# Check 4: Error pattern (normalized message)
186245
result = check_error_pattern(crash_info.message_hash, crash_info.error_variant, issues)
187246
if result.duplicate:
188247
result.check_order = 4
248+
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
189249
return result
190250

191251
# No matches found
192252
return DedupResult(
193253
duplicate=False,
194254
reason="No duplicate detected by any check",
255+
debug={"extraction": extraction_summary},
195256
)

0 commit comments

Comments
 (0)