-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_commit_types.py
More file actions
264 lines (226 loc) · 10 KB
/
check_commit_types.py
File metadata and controls
264 lines (226 loc) · 10 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#!/usr/bin/env python3
"""Verify the commit-type allowlist + subject-case rule stay in sync.
Two configs hand-encode the same conventional-commit policy:
1. ``[tool.commitizen].customize.schema_pattern`` in ``pyproject.toml`` —
the commitizen regex (commit-msg hook, local).
2. ``.github/workflows/pr-title.yml`` ``types:`` input to the
``amannn/action-semantic-pull-request`` step plus its ``subjectPattern``
— the PR-title CI check.
Both are hand-maintained. Add a type in one, forget the other, and the
two layers drift: commits fail locally but PR titles pass (or vice
versa). ``docs/DEVELOPMENT.md`` explicitly warns these must stay in
sync, but prose warnings drift too.
This script enforces sync on **two axes**:
- **Type allowlist** — the seven prefixes (feat, fix, docs, test,
refactor, chore, release). Mirrors the ``check_required_contexts.py``
pattern from #72.
- **Subject-case rule** — the negative-lookahead constraint that rejects
Title-Case subjects (``feat: Add thing`` → reject; ``feat: add thing``
/ ``feat: CI failure`` → accept). Added in #128 so commitizen rejects
Title Case at commit-msg time, not just at the CI gate.
Fails CI when either axis disagrees in either direction.
Usage (from repo root):
uv run python .github/scripts/check_commit_types.py
"""
from __future__ import annotations
import re
import sys
import tomllib
from pathlib import Path
import yaml
PYPROJECT = Path("pyproject.toml")
PR_TITLE_YML = Path(".github/workflows/pr-title.yml")
# Matches the first alternation group in the commitizen schema_pattern.
# Schema example: ^(feat|fix|rc2|hot-fix|...)(\([\w\-]+\))?!?:\s.+
# Captures: feat|fix|rc2|hot-fix|...
#
# Character class [a-z0-9\-|]+ allows:
# - lowercase letters (standard types: feat, fix, docs, ...)
# - digits (release-candidate patterns: rc2, v2, ...)
# - hyphens (compound types: hot-fix, post-release, ...)
# - the `|` separator
# Widened from [a-z|]+ (#91): the tighter class would silently truncate
# extraction when a future type contained digits or hyphens.
_SCHEMA_ALTERNATION_RE = re.compile(r"\^\(([a-z0-9\-|]+)\)")
# Matches the subject-case constraint between `:\s` and the trailing `.+`
# in the commitizen schema_pattern. Tolerates three shapes seen across
# revisions:
# :\s — original #128 shape (single-space, susceptible to the
# `feat: Add thing` double-space bypass).
# :\s+ — naive widening (still backtracks on Title-Case input).
# :\s++ — possessive quantifier (#154); the schema we want long-term
# because it forbids the lookahead-bypass via backtracking.
# All three encode the same "after `:` then whitespace, then this lookahead"
# semantics; the regex captures the lookahead chunk regardless.
# Returns "" if no subject constraint is present (commitizen pre-#128 shape).
_SCHEMA_SUBJECT_RE = re.compile(r":\\s\+{0,2}(.*?)\.\+$")
def commitizen_types() -> set[str]:
"""Return the set of types allowed by the commitizen schema regex."""
data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8"))
schema: str = (
data.get("tool", {})
.get("commitizen", {})
.get("customize", {})
.get("schema_pattern", "")
)
if not schema:
msg = "[tool.commitizen].customize.schema_pattern not found in pyproject.toml"
raise ValueError(msg)
match = _SCHEMA_ALTERNATION_RE.search(schema)
if not match:
msg = (
"Could not extract the type alternation group from "
f"schema_pattern: {schema!r}. Expected it to start with "
"'^(<type>|<type>|...)'."
)
raise ValueError(msg)
types = {t for t in match.group(1).split("|") if t}
# Defensive: a malformed pattern like "^(|feat)..." could produce an
# empty type after split. If nothing survives the filter, raise rather
# than return a silent-pass empty set that would trivially match an
# empty set from the other extractor (#92).
if not types:
msg = (
"Empty type alternation extracted from schema_pattern. "
f"Check pyproject.toml: {schema!r}"
)
raise ValueError(msg)
return types
def commitizen_subject_pattern() -> str:
"""Extract the subject-case constraint from commitizen's schema_pattern.
The schema_pattern shape (post-#128):
^(feat|fix|...)(\\([\\w\\-]+\\))?!?:\\s(?![A-Z][a-z]).+
Returns the chunk between ``:\\s`` and the trailing ``.+`` — i.e. the
negative-lookahead constraint on the subject. Returns "" when no
subject constraint is present (commitizen pre-#128 shape).
"""
data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8"))
schema: str = (
data.get("tool", {})
.get("commitizen", {})
.get("customize", {})
.get("schema_pattern", "")
)
if not schema:
# Same error commitizen_types() raises — caller already enforces.
return ""
match = _SCHEMA_SUBJECT_RE.search(schema)
if not match:
return ""
return match.group(1)
def pr_title_types() -> set[str]:
"""Return the set of types declared in the pr-title workflow."""
return _pr_title_field("types", _parse_types) # type: ignore[return-value]
def pr_title_subject_pattern() -> str:
"""Return the subject-case constraint declared in the pr-title workflow.
Strips the leading ``^`` anchor and the trailing ``.+$`` from the
YAML ``subjectPattern`` field so the comparison with commitizen's
constraint is normalised. Returns "" when the field is absent.
"""
raw: str = _pr_title_field("subjectPattern", lambda v: v or "", required=False) # type: ignore[assignment]
if not raw:
return ""
pattern = re.sub(r"^\^", "", raw)
pattern = re.sub(r"\.\+\$$", "", pattern)
return pattern
def _parse_types(value: str) -> set[str]:
"""Parse the YAML ``types`` field (newline-separated string) into a set."""
types = {line.strip() for line in value.splitlines() if line.strip()}
if not types:
msg = (
f"`types:` block in {PR_TITLE_YML} is empty or "
"whitespace-only. Expected at least one commit type per line."
)
raise ValueError(msg)
return types
def _pr_title_field(
name: str,
parse: object,
*,
required: bool = True,
) -> object:
"""Extract a single field from the action-semantic-pull-request step."""
data = yaml.safe_load(PR_TITLE_YML.read_text(encoding="utf-8"))
for job in data.get("jobs", {}).values():
for step in job.get("steps", []):
uses = step.get("uses", "")
if "action-semantic-pull-request" in uses:
value = step.get("with", {}).get(name)
if value is None:
if required:
msg = (
f"`with.{name}` not found in the "
"action-semantic-pull-request step. Update this "
"script if the action's input names changed."
)
raise ValueError(msg)
return ""
return parse(value) # type: ignore[operator]
msg = (
"Could not find an `amannn/action-semantic-pull-request` step in "
f"{PR_TITLE_YML}. If the action was renamed or the file moved, "
"update this script."
)
raise ValueError(msg)
def main() -> int:
cz_types = commitizen_types()
pr_types = pr_title_types()
cz_subject = commitizen_subject_pattern()
pr_subject = pr_title_subject_pattern()
failed = False
# Belt-and-braces safety net: both extractors raise on empty, but guard
# against a future refactor that drops the raise (#92).
if not cz_types or not pr_types:
print(
"::error::One or both extractors returned empty; sync check cannot "
f"proceed. commitizen_types() empty: {not cz_types}; "
f"pr_title_types() empty: {not pr_types}."
)
return 1
if cz_types == pr_types:
print(f"Commit types in sync ({len(cz_types)} types): {sorted(cz_types)}")
else:
failed = True
print(
"::error::[tool.commitizen].customize.schema_pattern and "
".github/workflows/pr-title.yml types are out of sync"
)
for name in sorted(cz_types - pr_types):
print(f"::error:: + in commitizen only: {name!r}")
for name in sorted(pr_types - cz_types):
print(f"::error:: - in pr-title.yml only: {name!r}")
print(
"\nFix: update both the schema_pattern in pyproject.toml AND "
"the `types` list in .github/workflows/pr-title.yml so they "
"contain the same type names. See docs/DEVELOPMENT.md#commit-messages."
)
if cz_subject == pr_subject:
if cz_subject:
print(f"Subject-case constraint in sync: {cz_subject!r}")
else:
# Both empty — older shape, before #128's subject-case landed in
# commitizen. Don't fail here; the `Lint PR title` workflow remains
# the single layer if commitizen drops back. Surface as a warning.
print(
"::warning::Both commitizen and pr-title.yml have empty "
"subject-case constraints. Per docs/DEVELOPMENT.md the rule "
"should be enforced at both layers — re-add `(?![A-Z][a-z])` "
"to commitizen's schema_pattern after `:\\s`."
)
else:
failed = True
print(
"::error::commitizen schema_pattern subject-case constraint "
"and pr-title.yml `subjectPattern` are out of sync"
)
print(f"::error:: commitizen extracted: {cz_subject!r}")
print(f"::error:: pr-title.yml extracted: {pr_subject!r}")
print(
"\nFix: keep both regexes equivalent after stripping anchors. "
"Commitizen's chunk lives between `:\\s` and `.+` in "
"schema_pattern; pr-title.yml's lives in the `subjectPattern` "
"field stripped of `^` and `.+$`."
)
return 1 if failed else 0
if __name__ == "__main__":
sys.exit(main())