Skip to content

Commit 1ae889b

Browse files
vintaclaude
andcommitted
fix: stricter GitHub owner/repo regexes and injection tests
Split _GITHUB_NAME_RE into separate owner and repo patterns. Owner regex now rejects leading/trailing hyphens and dots (matching GitHub's actual username rules). Repo regex requires alphanumeric start but allows dots and underscores anywhere after. New tests cover GraphQL injection attempts, invalid leading chars, and valid hyphenated/underscore/dot combinations. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 87c5f3b commit 1ae889b

2 files changed

Lines changed: 37 additions & 4 deletions

File tree

website/fetch_github_stars.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
GRAPHQL_URL = "https://api.github.com/graphql"
2020
BATCH_SIZE = 50
2121

22-
# Allowlist for valid GitHub owner/repo name characters.
23-
# GitHub usernames and repo names only allow letters, digits, hyphens, underscores, and dots.
24-
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$")
22+
# GitHub usernames: alphanumeric and hyphens, must start/end with alphanumeric.
23+
_GITHUB_OWNER_RE = re.compile(r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$")
24+
# GitHub repo names: alphanumeric, hyphens, underscores, dots, must start with alphanumeric.
25+
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
2526

2627

2728
def extract_github_repos(text: str) -> set[str]:
@@ -50,7 +51,7 @@ def build_graphql_query(repos: list[str]) -> str:
5051
parts = []
5152
for i, repo in enumerate(repos):
5253
owner, name = repo.split("/", 1)
53-
if not _GITHUB_NAME_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
54+
if not _GITHUB_OWNER_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
5455
continue
5556
parts.append(
5657
f'repo_{i}: repository(owner: "{owner}", name: "{name}") '

website/tests/test_fetch_github_stars.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,38 @@ def test_skips_only_bad_repos(self):
9898
assert "good" in query
9999
assert "bad" not in query
100100

101+
def test_skips_graphql_injection_in_owner(self):
102+
query = build_graphql_query(['org"){evil}/repo'])
103+
assert query == ""
104+
105+
def test_skips_graphql_injection_in_name(self):
106+
query = build_graphql_query(['org/repo"){evil}'])
107+
assert query == ""
108+
109+
def test_skips_owner_starting_with_hyphen(self):
110+
query = build_graphql_query(["-bad/repo"])
111+
assert query == ""
112+
113+
def test_skips_owner_starting_with_dot(self):
114+
query = build_graphql_query([".bad/repo"])
115+
assert query == ""
116+
117+
def test_skips_repo_starting_with_dot(self):
118+
query = build_graphql_query(["org/.hidden"])
119+
assert query == ""
120+
121+
def test_allows_repo_with_dots_and_underscores(self):
122+
query = build_graphql_query(["org/my_repo.py"])
123+
assert 'name: "my_repo.py"' in query
124+
125+
def test_allows_hyphenated_owner(self):
126+
query = build_graphql_query(["my-org/repo"])
127+
assert 'owner: "my-org"' in query
128+
129+
def test_skips_owner_with_underscore(self):
130+
query = build_graphql_query(["bad_owner/repo"])
131+
assert query == ""
132+
101133

102134
class TestParseGraphqlResponse:
103135
def test_parses_star_count_and_owner(self):

0 commit comments

Comments
 (0)