-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_pin_freshness.py
More file actions
253 lines (215 loc) · 9.74 KB
/
check_pin_freshness.py
File metadata and controls
253 lines (215 loc) · 9.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/env python3
"""Audit GitHub Actions pin freshness against the upstream registries.
`check_action_pins.py` validates pin **shape** — does the @ref match the
policy bucket. This script validates **freshness** — does the @ref still
resolve to something upstream, and does the trailing `# vN.M.P` comment
on a SHA pin still match the tag's current SHA?
Filed as #136 after PR #121 surfaced `astral-sh/setup-uv@v5` going
silently dead — the tag stopped resolving to anything in March 2026,
producing 0-jobs / 0-seconds CI failures. The shape gate doesn't catch
that class; this freshness gate does.
Behaviour:
- Walks every workflow + composite-action file via the same
`parse_workflow` machinery as `check_action_pins.py`.
- For each tag pin (`@v8`, `@v8.0.0`): GET
`https://api.github.com/repos/<action>/git/refs/tags/<ref>`. A 404 means
the tag no longer exists upstream — emit `::warning::` (or `::error::`
under strict mode).
- For each SHA pin (`@<40-hex>` + trailing `# vN.M.P` comment): GET
`/repos/<action>/git/refs/tags/<comment-version>` to fetch the tag's
current SHA. If the tag exists and resolves to a different SHA than
the pin, the upstream re-tagged — warn (potential supply-chain shift).
If the tag's SHA is a tag object (annotated tag), dereference one
level via `git/tags/<sha>` to get the commit SHA before comparing.
- API failures (network, 4xx other than 404, 5xx) downgrade to
`::warning::` — the gate's job is to surface drift, not be a
transient-network tripwire.
Default: warn-not-fail (`exit 0` even on findings, with annotations).
With `PIN_FRESHNESS_STRICT=1`, findings escalate to errors (`exit 1`),
matching the `ASPIRATIONAL_STRICT=1` toggle pattern from #153.
Exit codes:
0 — every pin resolves cleanly OR strict mode is off and findings
are surfaced as warnings only
1 — strict mode is on and one or more pins failed freshness checks
2 — script-level error (workflows dir missing, parse failure, no
`GITHUB_TOKEN` set so we can't query the API)
Usage (from repo root, in CI with token):
GITHUB_TOKEN=... python .github/scripts/check_pin_freshness.py
"""
from __future__ import annotations
import importlib.util
import json
import os
import sys
import urllib.error
import urllib.request
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from types import ModuleType
# Reuse `parse_workflow`, `_collect_yaml_files`, `_VERSION_COMMENT_RE`,
# `_SHA_RE`, etc. from check_action_pins.py rather than duplicate them.
# Importlib-based load mirrors the test pattern used elsewhere in the
# repo so this script stays standalone (no setup.py wiring needed).
_SCRIPT_DIR = Path(__file__).parent
def _load_pin_module() -> ModuleType:
spec = importlib.util.spec_from_file_location(
"check_action_pins", _SCRIPT_DIR / "check_action_pins.py"
)
if spec is None or spec.loader is None:
msg = "could not load check_action_pins.py"
raise RuntimeError(msg)
module = importlib.util.module_from_spec(spec)
# Register in sys.modules BEFORE exec_module — `@dataclass` walks
# `sys.modules[cls.__module__]` while processing the class, and the
# ActionRef dataclass would AttributeError without this line.
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return module
_pins = _load_pin_module()
_API_BASE = "https://api.github.com"
GITHUB_API_ERRORS = (urllib.error.URLError, TimeoutError, json.JSONDecodeError)
def _fetch_json(url: str, token: str) -> dict[str, object] | None:
"""GET a GitHub API URL, return parsed JSON or None on any failure.
Failures (404, 5xx, network, JSON-parse) all collapse to None — the
caller decides how to surface them. Keeps this gate from being a
transient-CI tripwire.
"""
req = urllib.request.Request( # noqa: S310 — fixed api.github.com host
url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
},
)
try:
with urllib.request.urlopen(req, timeout=10) as response: # noqa: S310
payload = json.loads(response.read().decode("utf-8"))
except GITHUB_API_ERRORS:
return None
return payload if isinstance(payload, dict) else None
def _action_repo(action: str) -> str:
"""Return `owner/repo` for an action string that may carry a sub-path.
Action references can be `owner/repo` or `owner/repo/path/to/subaction`
(e.g. `github/codeql-action/init`). Only the first two slash-segments
name the GitHub repository — the trailing segments are paths within
the repo's tree (containing per-subaction `action.yml` files). The
REST API endpoint we hit (`/repos/<owner>/<repo>/git/...`) only
accepts the `owner/repo` form; passing the full action string would
404 on every sub-path action and surface as a false-positive
"tag no longer resolves" finding.
"""
parts = action.split("/", 2)
return "/".join(parts[:2]) if len(parts) >= 2 else action
def _resolve_tag_sha(action: str, tag: str, token: str) -> str | None:
"""Return the commit SHA the tag points at, or None on missing/error.
Annotated tags resolve via two GETs: first `/git/refs/tags/<tag>` to
get the tag-object SHA, then `/git/tags/<obj>` to dereference to the
commit. Lightweight tags resolve in one GET (the ref's `object.sha`
is the commit directly).
"""
repo = _action_repo(action)
ref = _fetch_json(f"{_API_BASE}/repos/{repo}/git/refs/tags/{tag}", token)
if ref is None:
return None
obj = ref.get("object")
if not isinstance(obj, dict):
return None
obj_type = obj.get("type")
obj_sha = obj.get("sha")
if not isinstance(obj_sha, str):
return None
if obj_type == "commit":
return obj_sha
if obj_type == "tag":
# Annotated tag — dereference to the commit it points at.
annotated = _fetch_json(f"{_API_BASE}/repos/{repo}/git/tags/{obj_sha}", token)
if annotated is None:
return None
inner = annotated.get("object")
if isinstance(inner, dict):
inner_sha = inner.get("sha")
if isinstance(inner_sha, str):
return inner_sha
return None
def _check_tag_pin(ref: object, token: str) -> str | None:
"""Tag pin: ensure the upstream tag still exists. Returns warning text or None."""
tag = ref.pin # type: ignore[attr-defined]
sha = _resolve_tag_sha(ref.action, tag, token) # type: ignore[attr-defined]
if sha is None:
return (
f"{ref.action}@{tag} — upstream tag no longer resolves " # type: ignore[attr-defined]
"(404 or API failure). If 404, the tag was deleted/renamed; "
"bump to a current tag or SHA pin."
)
return None
def _check_sha_pin(ref: object, token: str) -> str | None:
"""SHA pin: trailing tag comment must still resolve to the same SHA."""
if not ref.comment: # type: ignore[attr-defined]
return None # shape audit owns the missing-comment case
match = _pins._VERSION_COMMENT_RE.search(ref.comment) # type: ignore[attr-defined]
if not match:
return None
documented_tag = match.group(0)
upstream_sha = _resolve_tag_sha(ref.action, documented_tag, token) # type: ignore[attr-defined]
if upstream_sha is None:
return (
f"{ref.action}@{ref.pin[:8]}… (commented `{documented_tag}`) " # type: ignore[attr-defined]
"— upstream tag no longer resolves; comment may be stale."
)
if upstream_sha.lower() != ref.pin.lower(): # type: ignore[attr-defined]
return (
f"{ref.action}@{ref.pin[:8]}… (commented `{documented_tag}`) " # type: ignore[attr-defined]
f"— upstream tag has been re-tagged to "
f"{upstream_sha[:8]}…; pin no longer matches the documented tag."
)
return None
def main() -> int:
token = os.environ.get("GITHUB_TOKEN", "")
if not token:
print(
"::error::GITHUB_TOKEN required for pin-freshness audit "
"(API rate limit + private-repo access)."
)
return 2
yml_files = _pins._collect_yaml_files()
if not yml_files:
print("::error::no workflow / composite-action files found")
return 2
refs = []
for path in yml_files:
refs.extend(_pins.parse_workflow(path))
strict = os.environ.get("PIN_FRESHNESS_STRICT", "") == "1"
findings: list[tuple[object, str]] = []
for ref in refs:
if not ref.pin:
continue # shape audit catches missing-@
if _pins._SHA_RE.match(ref.pin):
problem = _check_sha_pin(ref, token)
else:
problem = _check_tag_pin(ref, token)
if problem is not None:
findings.append((ref, problem))
severity = "error" if strict else "warning"
for ref, problem in findings:
print(f"::{severity} file={ref.file},line={ref.line}::{problem}") # type: ignore[attr-defined]
summary = (
f"Pin-freshness audit: {len(refs)} pins checked across "
f"{len(yml_files)} files; {len(findings)} finding(s)"
)
# Surface the finding count as a workflow output so the calling
# workflow can decide whether to open a tracking issue. Skipped when
# GITHUB_OUTPUT isn't set (local runs / tests).
output_path = os.environ.get("GITHUB_OUTPUT", "")
if output_path:
with Path(output_path).open("a", encoding="utf-8") as fh:
fh.write(f"findings_count={len(findings)}\n")
if findings:
suffix = " (strict — failing)" if strict else " (warn-only)"
print(summary + suffix + ".")
return 1 if strict else 0
print(summary + ".")
return 0
if __name__ == "__main__":
sys.exit(main())