Skip to content

Commit 520e285

Browse files
vintaclaude
andcommitted
test: add entry validation and broken-link detection tests
Add three tests against the real README: verify all entries have non-empty names, valid http(s) URLs, and no broken markdown link syntax (e.g. '[name(url)' missing the closing ']('). Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 54864ab commit 520e285

1 file changed

Lines changed: 70 additions & 0 deletions

File tree

website/tests/test_readme_parser.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pytest
77

88
from readme_parser import (
9+
_find_inline,
910
_parse_section_entries,
1011
parse_readme,
1112
render_inline_html,
@@ -486,3 +487,72 @@ def test_miscellaneous_in_own_group(self):
486487
misc_group = next((g for g in self.groups if g["name"] == "Miscellaneous"), None)
487488
assert misc_group is not None
488489
assert any(c["name"] == "Miscellaneous" for c in misc_group["categories"])
490+
491+
def test_all_entries_have_nonempty_names(self):
492+
bad = []
493+
for cat in self.cats:
494+
for entry in cat["entries"]:
495+
if not entry["name"].strip():
496+
bad.append(f"{cat['name']}: empty entry name (url={entry['url']})")
497+
assert bad == [], "Entries with empty names:\n" + "\n".join(bad)
498+
499+
def test_all_entries_have_valid_urls(self):
500+
bad = []
501+
for cat in self.cats:
502+
for entry in cat["entries"]:
503+
if not entry["url"].startswith(("https://", "http://")):
504+
bad.append(f"{cat['name']}: [{entry['name']}] has invalid url: {entry['url']!r}")
505+
for see in entry["also_see"]:
506+
if not see["url"].startswith(("https://", "http://")):
507+
bad.append(f"{cat['name']}: [{see['name']}] (also_see) has invalid url: {see['url']!r}")
508+
assert bad == [], "Entries with invalid URLs:\n" + "\n".join(bad)
509+
510+
def test_no_malformed_entry_lines(self):
511+
"""Detect list items that look like entries but have broken link syntax.
512+
513+
Walks the markdown-it AST for list items whose inline text starts
514+
with '[' but contain no link node. This catches broken markdown
515+
like '- [name(url)' where the closing '](' is missing.
516+
"""
517+
md = MarkdownIt("commonmark")
518+
root = SyntaxTreeNode(md.parse(self.readme_text))
519+
520+
# Find category section boundaries (between --- and # Resources/Contributing)
521+
hr_idx = None
522+
end_idx = None
523+
for i, node in enumerate(root.children):
524+
if hr_idx is None and node.type == "hr":
525+
hr_idx = i
526+
elif node.type == "heading" and node.tag == "h1":
527+
text = render_inline_text(node.children[0].children) if node.children else ""
528+
if end_idx is None and text in ("Resources", "Contributing"):
529+
end_idx = i
530+
if hr_idx is None:
531+
return
532+
533+
bad = []
534+
cat_nodes = root.children[hr_idx + 1 : end_idx or len(root.children)]
535+
for node in cat_nodes:
536+
if node.type != "bullet_list":
537+
continue
538+
self._check_list_for_broken_links(node, bad)
539+
540+
assert bad == [], "List items with broken link syntax:\n" + "\n".join(bad)
541+
542+
def _check_list_for_broken_links(self, bullet_list, bad):
543+
for list_item in bullet_list.children:
544+
if list_item.type != "list_item":
545+
continue
546+
inline = _find_inline(list_item)
547+
if inline is None:
548+
continue
549+
# Check if inline text starts with '[' but has no link node
550+
has_link = any(c.type == "link" for c in inline.children)
551+
text = render_inline_text(inline.children)
552+
if not has_link and text.startswith("["):
553+
line = list_item.map[0] + 1 if list_item.map else "?"
554+
bad.append(f" line {line}: {text}")
555+
# Recurse into nested lists
556+
for child in list_item.children:
557+
if child.type == "bullet_list":
558+
self._check_list_for_broken_links(child, bad)

0 commit comments

Comments
 (0)