-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_file_length.py
More file actions
114 lines (90 loc) · 3.71 KB
/
check_file_length.py
File metadata and controls
114 lines (90 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
"""Enforce the per-file line-count cap from `CLAUDE.md`.
`CLAUDE.md` *Code standards*: *"No file over 300 lines, no function
over ~50 lines."* The function-length half is enforced by ruff's
`PLR0915` / `PLR0912` rules (`pyproject.toml [tool.ruff.lint].select`).
This script enforces the file-length half.
Behaviour:
- Walks `src/`, `tests/`, `eval/`, `.github/scripts/` for `*.py` files.
- For each file, counts lines (newline-terminated and final-line-without-
newline both count as one line).
- Fails when any file exceeds `THRESHOLD = 300`.
There is **no exemption mechanism**. Per `feedback_no_noqa`, an
allowlist that records "current offenders with a tracker ticket" is a
non-blocking deferral by another name — the team gets used to seeing
the offenders listed, the tracker ticket sits open, and the rule never
fully bites. Pre-existing offenders were refactored in #144 before this
gate landed (six files / two functions split into helpers).
If a file legitimately should not be capped (generated code, vendored
sources), put it in a directory this script does not walk — and document
the exemption as a structural decision in `docs/DEVELOPMENT.md`, not as
an inline allowlist entry.
Exit codes:
0 — every walked file is at or under `THRESHOLD`
1 — at least one file exceeds the cap
2 — script-level error (no walk-target directories at all)
Usage (from repo root):
python .github/scripts/check_file_length.py
"""
from __future__ import annotations
import sys
from pathlib import Path
THRESHOLD = 300
# Directories walked. Each is project-owned Python code subject to the
# `CLAUDE.md` cap. Adding a new walk root requires a code change here
# (and a comment naming the rationale) — there is deliberately no
# environment-variable / CLI-flag override.
ROOTS: tuple[str, ...] = (
"src",
"tests",
"eval",
".github/scripts",
)
def count_lines(path: Path) -> int:
"""Count newline-terminated lines plus a final un-terminated line, if any."""
text = path.read_text(encoding="utf-8")
if not text:
return 0
# `splitlines()` discards a trailing empty token, mirroring `wc -l + 1`
# for files without a trailing newline.
return len(text.splitlines())
def _normalised(path: Path) -> str:
"""Return the path with forward slashes (Windows / POSIX parity)."""
return path.as_posix()
def main() -> int:
walked: list[Path] = []
failures: list[str] = []
for root_name in ROOTS:
root = Path(root_name)
if not root.is_dir():
# Directory may not exist yet (e.g. eval/ in a new fork). Skip cleanly.
continue
for path in sorted(root.rglob("*.py")):
walked.append(path)
lines = count_lines(path)
if lines > THRESHOLD:
failures.append(
f"::error file={_normalised(path)}::{lines} lines > "
f"{THRESHOLD} (per `CLAUDE.md`). Split the file or "
"refactor — there is no exemption mechanism, see the "
"module docstring."
)
if not walked:
print(f"::error::no walk targets exist; checked {ROOTS!r}")
return 2
if failures:
for line in failures:
print(line)
print(
f"\n{len(failures)} file(s) exceed the cap. Refactor in this PR — "
"splitting into single-responsibility modules or extracting helpers "
"is the same shape #144 used for the original offenders."
)
return 1
print(
f"File-length audit OK — {len(walked)} file(s) checked across "
f"{len(ROOTS)} root(s), threshold {THRESHOLD}."
)
return 0
if __name__ == "__main__":
sys.exit(main())