-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathgenerate_examples_md.py
More file actions
263 lines (210 loc) · 8.82 KB
/
generate_examples_md.py
File metadata and controls
263 lines (210 loc) · 8.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/usr/bin/env python
"""Generate EXAMPLES.md and examples/registry.py from example file frontmatter.
Usage:
uv run python scripts/generate_examples_md.py # Generate files
uv run python scripts/generate_examples_md.py --check # Check if files are up to date
"""
from __future__ import annotations
import re
import sys
import argparse
from typing import Any
from pathlib import Path
import frontmatter # type: ignore[import-untyped]
ROOT = Path(__file__).parent.parent
EXAMPLES_DIR = ROOT / "examples"
OUTPUT_FILE = ROOT / "EXAMPLES.md"
OUTPUT_REGISTRY_FILE = EXAMPLES_DIR / "registry.py"
LLMS_FILE = ROOT / "llms.txt"
REQUIRED_FIELDS = ["title", "slug", "use_case", "workflow", "tags", "prerequisites", "run", "test"]
EXCLUDED_FILES = {"_harness.py", "example_types.py", "registry.py", "__init__.py"}
def parse_example(path: Path) -> dict[str, Any]:
"""Parse frontmatter from a Python file's docstring."""
content = path.read_text()
match = re.search(r'^(?:#!.*\n)?(?:\s*\n)*"""([\s\S]*?)"""', content)
if not match:
raise ValueError(f"{path}: missing docstring")
docstring = match.group(1).strip()
if not docstring.startswith("---"):
raise ValueError(f"{path}: docstring must start with frontmatter (---)")
try:
post = frontmatter.loads(docstring)
return dict(post.metadata)
except Exception as e:
raise ValueError(f"{path}: invalid frontmatter: {e}") from e
def validate_example(metadata: dict[str, Any], file_name: str, seen_slugs: set[str]) -> None:
"""Validate all example metadata in one pass."""
path = f"examples/{file_name}"
missing = [f for f in REQUIRED_FIELDS if f not in metadata]
if missing:
raise ValueError(f"{path}: missing fields: {', '.join(missing)}")
for field in ("workflow", "tags", "prerequisites"):
if not isinstance(metadata[field], list) or not metadata[field]:
raise ValueError(f"{path}: '{field}' must be a non-empty list")
slug = metadata["slug"]
expected_slug = file_name.replace(".py", "").replace("_", "-")
if slug != expected_slug:
raise ValueError(f"{path}: slug '{slug}' must match '{expected_slug}'")
if slug in seen_slugs:
raise ValueError(f"{path}: duplicate slug")
seen_slugs.add(slug)
module_name = file_name.replace(".py", "")
if f"examples.{module_name}" not in metadata["run"]:
raise ValueError(f"{path}: run command must reference 'examples.{module_name}'")
def ensure_llms_references(examples: list[dict[str, Any]]) -> None:
"""Ensure llms.txt references at least one example file."""
if not LLMS_FILE.exists():
raise ValueError(f"Missing llms file: {LLMS_FILE.relative_to(ROOT)}")
llms_text = LLMS_FILE.read_text()
referenced = set(re.findall(r"examples/([a-z0-9_]+\.py)", llms_text))
if not referenced:
raise ValueError(f"{LLMS_FILE.relative_to(ROOT)}: expected at least one reference to examples/*.py")
generated = {e["file_name"] for e in examples}
for file_name in referenced:
if file_name not in generated:
raise ValueError(f"{LLMS_FILE.relative_to(ROOT)}: references unknown example file 'examples/{file_name}'")
def normalize_env_var(prerequisite: str) -> str | None:
"""Extract environment variable name from prerequisite string."""
match = re.match(r"^[A-Z0-9_]+", prerequisite)
return match.group(0) if match else None
def markdown_for_example(example: dict[str, Any]) -> str:
"""Generate markdown section for a single example."""
lines = [
f'<a id="{example["slug"]}"></a>',
f"## {example['title']}",
"",
f"**Use case:** {example['use_case']}",
"",
f"**Tags:** {', '.join(f'`{tag}`' for tag in example['tags'])}",
"",
"### Workflow",
*[f"- {step}" for step in example["workflow"]],
"",
"### Prerequisites",
*[f"- `{item}`" for item in example["prerequisites"]],
"",
"### Run",
"```sh",
example["run"],
"```",
"",
"### Test",
"```sh",
example["test"],
"```",
"",
f"**Source:** [`examples/{example['file_name']}`](./examples/{example['file_name']})",
"",
]
return "\n".join(lines)
def generate_markdown(examples: list[dict[str, Any]]) -> str:
"""Generate the full EXAMPLES.md content."""
toc = "\n".join(f"- [{e['title']}](#{e['slug']})" for e in examples)
sections = "\n".join(markdown_for_example(e) for e in examples)
return "\n".join(
[
"# Examples",
"",
"> This file is auto-generated from metadata in `examples/*.py`.",
"> Do not edit this file manually. Run `uv run python scripts/generate_examples_md.py` instead.",
"",
"Runnable examples live in [`examples/`](./examples).",
"",
"## Table of Contents",
"",
toc,
"",
sections.rstrip(),
"",
]
)
def generate_registry(examples: list[dict[str, Any]]) -> str:
"""Generate the registry.py content."""
imports: list[tuple[str, str]] = [("example_types", "ExampleResult")]
for example in examples:
module = example["file_name"].replace(".py", "")
runner = f"run_{module}_example"
imports.append((module, runner))
imports.sort(key=lambda x: (len(x[0]), x[0]))
import_lines = [f"from .{mod} import {name}" for mod, name in imports]
entries: list[str] = []
for example in examples:
module = example["file_name"].replace(".py", "")
runner = f"run_{module}_example"
env_vars = [normalize_env_var(p) for p in example["prerequisites"]]
env_list = ", ".join(f'"{e}"' for e in env_vars if e)
title = example["title"].replace('"', '\\"')
entries.append(f''' {{
"slug": "{example["slug"]}",
"title": "{title}",
"file_name": "{example["file_name"]}",
"required_env": [{env_list}],
"run": {runner},
}},''')
return f'''"""
This file is auto-generated by scripts/generate_examples_md.py.
Do not edit manually.
"""
from __future__ import annotations
from typing import Any, Callable, cast
{chr(10).join(import_lines)}
ExampleRegistryEntry = dict[str, Any]
example_registry: list[ExampleRegistryEntry] = [
{chr(10).join(entries)}
]
def get_example_runner(slug: str) -> Callable[[], ExampleResult] | None:
"""Get the runner function for an example by slug."""
for entry in example_registry:
if entry["slug"] == slug:
return cast(Callable[[], ExampleResult], entry["run"])
return None
'''
def main() -> int:
"""Main entry point."""
parser = argparse.ArgumentParser(description="Generate EXAMPLES.md and registry.py")
parser.add_argument("--check", action="store_true", help="Check if files are up to date")
args = parser.parse_args()
seen_slugs: set[str] = set()
examples: list[dict[str, Any]] = []
for path in sorted(EXAMPLES_DIR.glob("*.py")):
if path.name in EXCLUDED_FILES or path.name.startswith("_"):
continue
try:
metadata = parse_example(path)
validate_example(metadata, path.name, seen_slugs)
examples.append({**metadata, "file_name": path.name})
except ValueError as e:
print(f"Error: {e}", file=sys.stderr)
return 1
examples.sort(key=lambda e: e["title"])
try:
ensure_llms_references(examples)
except ValueError as e:
print(f"Error: {e}", file=sys.stderr)
return 1
markdown = generate_markdown(examples)
registry_source = generate_registry(examples)
if args.check:
errors: list[str] = []
if not OUTPUT_FILE.exists():
errors.append(f"{OUTPUT_FILE.relative_to(ROOT)} does not exist")
elif OUTPUT_FILE.read_text() != markdown:
errors.append(f"{OUTPUT_FILE.relative_to(ROOT)} is out of date")
if not OUTPUT_REGISTRY_FILE.exists():
errors.append(f"{OUTPUT_REGISTRY_FILE.relative_to(ROOT)} does not exist")
elif OUTPUT_REGISTRY_FILE.read_text() != registry_source:
errors.append(f"{OUTPUT_REGISTRY_FILE.relative_to(ROOT)} is out of date")
if errors:
for err in errors:
print(f"Error: {err}", file=sys.stderr)
print("\nRun `uv run python scripts/generate_examples_md.py` to regenerate.", file=sys.stderr)
return 1
print("All generated files are up to date.")
return 0
OUTPUT_FILE.write_text(markdown)
OUTPUT_REGISTRY_FILE.write_text(registry_source)
print(f"Wrote {OUTPUT_FILE.relative_to(ROOT)} from {len(examples)} example(s)")
print(f"Wrote {OUTPUT_REGISTRY_FILE.relative_to(ROOT)} from {len(examples)} example(s)")
return 0
if __name__ == "__main__":
sys.exit(main())