-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_src_readmes.py
More file actions
142 lines (117 loc) · 5.16 KB
/
check_src_readmes.py
File metadata and controls
142 lines (117 loc) · 5.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
"""Enforce the per-package README rule from `CLAUDE.md`.
`CLAUDE.md` *Code standards*: *"Each `src/` directory has a README
explaining its purpose and key interfaces."* As `src/` grows, READMEs
go missing silently — and even when one exists, it can degrade into
unstructured prose that no contributor reads. This script audits both
shape and substance.
Behaviour:
- Walks every subdirectory under `src/` (recursive, skipping
`__pycache__`).
- A subdirectory must have a `README.md` when it contains at least one
`.py` file other than `__init__.py`. Empty directories and
`__init__.py`-only namespace packages are exempt — they have no
surface to document.
- The README must be non-trivial: at least `MIN_BYTES = 200` bytes
after stripping leading/trailing whitespace. Catches the empty-stub
failure mode (#126).
- The README must contain a `## Key interfaces` heading (or its
documented synonym `## Public surface`). #152 promoted the gate
from presence + size to presence + structure: a 200-byte unrelated
paragraph passed pre-#152, but doesn't actually document the
package's public surface. The Purpose statement is encoded
positionally as the H1 heading + immediately-following paragraph
rather than as an explicit `## Purpose` heading — that's the
existing convention across all seven packages and the natural
markdown shape; a separate `## Purpose` heading would duplicate
what the H1 already establishes.
There is **no exemption mechanism**, mirroring `check_file_length.py`
(see `feedback_no_noqa`). If a future package legitimately needs a
shorter README (e.g. a single-file helper with a self-explanatory
filename), restructure rather than carve out an allowlist entry.
Exit codes:
0 — every package with code has a non-trivial, structured README
1 — at least one package is missing a README, has a stub one, or
lacks the required heading
2 — `src/` does not exist (run from the wrong directory?)
Usage (from repo root):
python .github/scripts/check_src_readmes.py
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
SRC_ROOT = Path("src")
MIN_BYTES = 200
# Required heading shapes. Each tuple is "any-of" — a README satisfies the
# rule if it contains at least one matching heading. Match is anchored to
# line start, case-insensitive, allows `#` levels 2-4 so a deeply nested
# subsection still counts.
KEY_INTERFACES_HEADING = re.compile(
r"^#{2,4}\s+(Key interfaces|Public surface)\b",
re.IGNORECASE | re.MULTILINE,
)
def _normalised(path: Path) -> str:
return path.as_posix()
def _has_documentable_code(directory: Path) -> bool:
"""True when the directory has at least one `.py` file beyond `__init__.py`."""
for entry in directory.iterdir():
if entry.is_file() and entry.suffix == ".py" and entry.name != "__init__.py":
return True
return False
def _readme_failure(directory: Path) -> str | None:
"""Return an error message if the directory's README is missing or stub-sized."""
readme = directory / "README.md"
if not readme.is_file():
return (
f"::error file={_normalised(directory)}::missing README.md. "
"`CLAUDE.md` requires every `src/` package to document its "
"purpose and key interfaces."
)
body = readme.read_text(encoding="utf-8").strip()
if len(body.encode("utf-8")) < MIN_BYTES:
return (
f"::error file={_normalised(readme)}::README.md is shorter than "
f"{MIN_BYTES} bytes after stripping whitespace. Add purpose + "
"key-interfaces text — a single heading does not satisfy the rule."
)
if not KEY_INTERFACES_HEADING.search(body):
return (
f"::error file={_normalised(readme)}::README.md missing a "
"`## Key interfaces` heading (or the synonym `## Public surface`). "
"Per CLAUDE.md the README must document the package's public "
"surface; the heading anchors that section so contributors can "
"find it. Add the heading and list the package's exported names."
)
return None
def main() -> int:
if not SRC_ROOT.is_dir():
print(f"::error::{SRC_ROOT.as_posix()} not found; run from repo root")
return 2
audited: list[Path] = []
failures: list[str] = []
for directory in sorted(p for p in SRC_ROOT.rglob("*") if p.is_dir()):
if directory.name == "__pycache__":
continue
if not _has_documentable_code(directory):
continue
audited.append(directory)
message = _readme_failure(directory)
if message is not None:
failures.append(message)
if failures:
for line in failures:
print(line)
print(
f"\n{len(failures)} package(s) failed the README audit. "
"Fix in this PR — there is no exemption mechanism, see the "
"module docstring."
)
return 1
print(
f"src/ README audit OK — {len(audited)} package(s) documented "
f"(min {MIN_BYTES} bytes, `## Key interfaces` heading required)."
)
return 0
if __name__ == "__main__":
sys.exit(main())