vortex-data
diff --git a/‎.github/scripts/fuzz_report/cli.py‎
Lines changed: 114 additions & 1 deletion b/‎.github/scripts/fuzz_report/cli.py‎
Lines changed: 114 additions & 1 deletion
diff --git a/‎.github/scripts/fuzz_report/dedup.py‎
Lines changed: 65 additions & 4 deletions b/‎.github/scripts/fuzz_report/dedup.py‎
Lines changed: 65 additions & 4 deletions
@@ -4,6 +4,7 @@
 import argparse
 import json
 import os
+import re
 import subprocess
 import sys
 from pathlib import Path
@@ -14,6 +15,9 @@
 
 TEMPLATES_DIR = Path(__file__).parent / "templates"
 
+# Marker used to find/update the single recurrence-tracking comment.
+_RECURRENCE_MARKER = "<!-- fuzzer-recurrence-tracker -->"
+_RECURRENCE_COUNT_RE = r"<!-- fuzzer-recurrence-tracker count:(\d+) -->"
 # Variables that must be set (non-empty) before creating or commenting on an issue.
 REQUIRED_REPORT_VARIABLES = ["FUZZ_TARGET", "CRASH_FILE", "ARTIFACT_URL"]
 
@@ -83,7 +87,14 @@ def _build_template_variables(
 def _determine_action(
     dedup_path: str | Path | None,
 ) -> tuple[str, dict | None]:
-    """Determine action from dedup result. Returns (action, dedup_dict)."""
+    """Determine action from dedup result. Returns (action, dedup_dict).
+
+    Actions:
+      create       – new issue
+      skip         – exact duplicate, do nothing
+      update_count – high-confidence duplicate, bump recurrence counter
+      comment      – medium-confidence duplicate, post full comment
+    """
     if not dedup_path or not Path(dedup_path).exists():
         return "create", None
 
@@ -94,9 +105,90 @@ def _determine_action(
     if dedup.get("confidence") == "exact":
         return "skip", dedup
 
+    if dedup.get("confidence") == "high":
+        return "update_count", dedup
+
     return "comment", dedup
 
 
+def _render_recurrence_body(count: int) -> str:
+    """Render the minimal recurrence-tracking comment body."""
+    return (
+        f"Seen **{count}** time{'s' if count != 1 else ''}\n\n"
+        f"<!-- fuzzer-recurrence-tracker count:{count} -->"
+    )
+
+
+def _update_recurrence_count(repo: str, issue_number: int | str) -> int:
+    """Find-or-create the recurrence comment, incrementing its count.
+
+    Uses a compare-and-swap pattern: reads the current count from the
+    existing comment (if any), increments it, and writes back.
+
+    Returns the new count.
+    """
+    # List all comments on the issue
+    result = subprocess.run(
+        [
+            "gh",
+            "api",
+            f"repos/{repo}/issues/{issue_number}/comments",
+            "--paginate",
+            "--jq",
+            f'.[] | select(.body | contains("{_RECURRENCE_MARKER}")) | {{id: .id, body: .body}}',
+        ],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+
+    existing_id = None
+    current_count = 0
+
+    for line in result.stdout.strip().splitlines():
+        if not line:
+            continue
+        comment = json.loads(line)
+        existing_id = comment["id"]
+        m = re.search(_RECURRENCE_COUNT_RE, comment["body"])
+        if m:
+            current_count = int(m.group(1))
+        break
+
+    new_count = current_count + 1
+    body = _render_recurrence_body(new_count)
+
+    if existing_id:
+        # Update existing comment (not atomic — race is acceptable since
+        # fuzz CI jobs are serialized)
+        subprocess.run(
+            [
+                "gh",
+                "api",
+                f"repos/{repo}/issues/comments/{existing_id}",
+                "-X",
+                "PATCH",
+                "-f",
+                f"body={body}",
+            ],
+            check=True,
+        )
+    else:
+        # Create new recurrence comment
+        subprocess.run(
+            [
+                "gh",
+                "api",
+                f"repos/{repo}/issues/{issue_number}/comments",
+                "-f",
+                f"body={body}",
+            ],
+            check=True,
+        )
+
+    return new_count
+
+
 def cmd_extract(args: argparse.Namespace) -> int:
     """Extract crash info from log file."""
     if not Path(args.log_file).exists():
@@ -193,6 +285,15 @@ def cmd_report(args: argparse.Namespace) -> int:
         _write_github_output("issue_number", str(existing_issue))
         return 0
 
+    if action == "update_count":
+        new_count = _update_recurrence_count(args.repo, existing_issue)
+        print(
+            f"Updated recurrence count on #{existing_issue} to {new_count}",
+            file=sys.stderr,
+        )
+        _write_github_output("issue_number", str(existing_issue))
+        return 0
+
     if action == "comment":
         variables.setdefault("DEDUP_REASON", dedup.get("reason", ""))
         variables.setdefault("DEDUP_CONFIDENCE", dedup.get("confidence", ""))
@@ -270,6 +371,7 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
     print(f"  panic_message:  {crash_info.panic_message}", file=sys.stderr)
     print(f"  crash_type:     {crash_info.crash_type}", file=sys.stderr)
     print(f"  seed_hash:      {crash_info.seed_hash}", file=sys.stderr)
+    print(f"  stack_frames:   {crash_info.stack_frames[:5]}", file=sys.stderr)
     print(file=sys.stderr)
 
     # Step 2: Dedup (if issues file provided)
@@ -286,6 +388,8 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
             print(f"  confidence: {dedup_result.confidence}", file=sys.stderr)
             print(f"  issue:      #{dedup_result.issue_number}", file=sys.stderr)
             print(f"  reason:     {dedup_result.reason}", file=sys.stderr)
+        if dedup_result.debug:
+            print(f"  debug:      {json.dumps(dedup_result.debug, indent=4)}", file=sys.stderr)
         print(file=sys.stderr)
 
     # Write dedup to temp file so _determine_action can read it
@@ -322,6 +426,15 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
         )
         return 0
 
+    if action == "update_count":
+        print(
+            f"(would update recurrence count on #{existing_issue})",
+            file=sys.stderr,
+        )
+        print(file=sys.stderr)
+        print(_render_recurrence_body(1))
+        return 0
+
     if action == "comment":
         template_path = TEMPLATES_DIR / "related_comment.md"
         variables.setdefault("DEDUP_REASON", dedup.get("reason", ""))
 
@@ -21,6 +21,8 @@ class DedupResult:
     issue_title: str | None = None
     reason: str = ""
     check_order: int | None = None
+    # Debug details: what values were compared to produce this result
+    debug: dict | None = None
 
     def to_dict(self) -> dict:
         return {k: v for k, v in asdict(self).items() if v is not None}
@@ -56,9 +58,15 @@ def check_seed_hash(seed_hash: str, issues: list[dict]) -> DedupResult:
                 issue_url=issue.get("url"),
                 issue_title=issue.get("title"),
                 reason="Exact seed hash match - same crash input",
+                debug={"seed_hash": seed_hash},
             )
 
-    return DedupResult(duplicate=False, check="seed_hash", reason="No matching seed hash found")
+    return DedupResult(
+        duplicate=False,
+        check="seed_hash",
+        reason="No matching seed hash found",
+        debug={"seed_hash": seed_hash},
+    )
 
 
 def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult:
@@ -68,6 +76,7 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult
             duplicate=False,
             check="panic_location",
             reason="No panic location provided",
+            debug={"panic_location": panic_location or ""},
         )
 
     # Extract file:line pattern
@@ -85,19 +94,33 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult
                 issue_url=issue.get("url"),
                 issue_title=issue.get("title"),
                 reason=f"Same panic location (file:line): {file_pattern}",
+                debug={
+                    "panic_location": panic_location,
+                    "file_pattern": file_pattern,
+                    "matched_issue": issue["number"],
+                },
             )
 
     return DedupResult(
         duplicate=False,
         check="panic_location",
         reason="No matching panic location found",
+        debug={
+            "panic_location": panic_location,
+            "file_pattern": file_pattern,
+        },
     )
 
 
 def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult:
     """Check if stack trace hash exists in any issue body."""
     if not stack_hash or stack_hash == "unknown":
-        return DedupResult(duplicate=False, check="stack_trace", reason="No stack hash provided")
+        return DedupResult(
+            duplicate=False,
+            check="stack_trace",
+            reason="No stack hash provided",
+            debug={"stack_hash": stack_hash or ""},
+        )
 
     for issue in issues:
         body = issue.get("body", "")
@@ -110,20 +133,28 @@ def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult:
                 issue_url=issue.get("url"),
                 issue_title=issue.get("title"),
                 reason="Same stack trace (top 5 frames match)",
+                debug={
+                    "stack_hash": stack_hash,
+                    "matched_issue": issue["number"],
+                },
             )
 
     return DedupResult(
         duplicate=False,
         check="stack_trace",
         reason="No matching stack trace hash found",
+        debug={"stack_hash": stack_hash},
     )
 
 
 def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict]) -> DedupResult:
     """Check if error pattern exists in any issue body."""
     if not message_hash:
         return DedupResult(
-            duplicate=False, check="error_pattern", reason="No message hash provided"
+            duplicate=False,
+            check="error_pattern",
+            reason="No message hash provided",
+            debug={"error_variant": error_variant or ""},
         )
 
     # First try: exact message hash match
@@ -138,6 +169,11 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict
                 issue_url=issue.get("url"),
                 issue_title=issue.get("title"),
                 reason="Same error pattern (normalized message match)",
+                debug={
+                    "message_hash": message_hash,
+                    "error_variant": error_variant,
+                    "matched_issue": issue["number"],
+                },
             )
 
     # Second try: same error variant (lower confidence)
@@ -153,43 +189,68 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict
                     issue_url=issue.get("url"),
                     issue_title=issue.get("title"),
                     reason=f"Same error variant type: {error_variant}",
+                    debug={
+                        "message_hash": message_hash,
+                        "error_variant": error_variant,
+                        "matched_issue": issue["number"],
+                    },
                 )
 
     return DedupResult(
-        duplicate=False, check="error_pattern", reason="No matching error pattern found"
+        duplicate=False,
+        check="error_pattern",
+        reason="No matching error pattern found",
+        debug={
+            "message_hash": message_hash,
+            "error_variant": error_variant,
+        },
     )
 
 
 def check_duplicate(crash_info: CrashInfo, issues_path: str | Path) -> DedupResult:
     """Run all deduplication checks in order. First match wins."""
     issues = load_issues(issues_path)
 
+    # Summary of extracted values for debugging (attached to every result)
+    extraction_summary = {
+        "panic_location": crash_info.panic_location,
+        "crash_location": crash_info.crash_location,
+        "error_variant": crash_info.error_variant,
+        "stack_frames_top5": crash_info.stack_frames[:5],
+        "normalized_message": crash_info.normalized_message,
+    }
+
     # Check 1: Seed hash (exact duplicate)
     result = check_seed_hash(crash_info.seed_hash, issues)
     if result.duplicate:
         result.check_order = 1
+        result.debug = {**(result.debug or {}), "extraction": extraction_summary}
         return result
 
     # Check 2: Panic location (same crash site)
     result = check_panic_location(crash_info.panic_location, issues)
     if result.duplicate:
         result.check_order = 2
+        result.debug = {**(result.debug or {}), "extraction": extraction_summary}
         return result
 
     # Check 3: Stack trace hash (same call path)
     result = check_stack_trace(crash_info.stack_trace_hash, issues)
     if result.duplicate:
         result.check_order = 3
+        result.debug = {**(result.debug or {}), "extraction": extraction_summary}
         return result
 
     # Check 4: Error pattern (normalized message)
     result = check_error_pattern(crash_info.message_hash, crash_info.error_variant, issues)
     if result.duplicate:
         result.check_order = 4
+        result.debug = {**(result.debug or {}), "extraction": extraction_summary}
         return result
 
     # No matches found
     return DedupResult(
         duplicate=False,
         reason="No duplicate detected by any check",
+        debug={"extraction": extraction_summary},
     )