Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 43 additions & 1 deletion .githooks/pre-commit
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#!/bin/bash

# Pre-commit hook to enforce Windows compatibility and file size limits
# Pre-commit hook to enforce Windows compatibility, file size limits,
# and dangerous Unicode character detection.
#
# 1. Windows filenames: prevents characters that are reserved on Windows (< > : " | ? * \)
# so the repo can be cloned on Windows systems.
# 2. File size: rejects files larger than 10 MB. Many enterprise users mirror graphql-java
# into internal repositories that enforce file size limits.
# 3. Dangerous Unicode: detects invisible/control characters that can be used for
# "Trojan Source" (BiDi override), homoglyph, or glassworm-style attacks.

# ANSI color codes for better output readability
RED='\033[0;31m'
Expand Down Expand Up @@ -75,6 +78,45 @@ if [ -n "$LARGE_FILES" ]; then
ERRORS_FOUND=1
fi

# Check 3: Dangerous Unicode characters (Trojan Source / glassworm attacks)
# Detects: C0/C1 control chars (except TAB, LF, CR), zero-width characters,
# BiDi override/embedding/isolate chars.
# Uses perl for macOS compatibility (grep -P is not available on macOS).
echo " Checking for dangerous Unicode characters..."

UNICODE_FILES=""
if [ -n "$STAGED_FILES" ]; then
while IFS= read -r file; do
if [ ! -f "$file" ]; then
continue
fi
# Skip binary files
if file --mime-type "$file" 2>/dev/null | grep -qv 'text/'; then
continue
fi
MATCHES=$(perl -CSD -ne '
if (/[\x{0000}-\x{0008}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}\x{200B}-\x{200D}\x{FEFF}\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
print " line $.: $_";
}
' "$file" 2>/dev/null || true)
if [ -n "$MATCHES" ]; then
UNICODE_FILES="${UNICODE_FILES} - ${file}\n${MATCHES}\n"
fi
done <<< "$STAGED_FILES"
fi

if [ -n "$UNICODE_FILES" ]; then
echo -e "${RED}Error: The following files contain dangerous Unicode characters:${NC}"
echo -e "$UNICODE_FILES"
echo -e "${YELLOW}These characters are invisible or alter text rendering and can be used for${NC}"
echo -e "${YELLOW}Trojan Source or glassworm-style attacks. Detected character categories:${NC}"
echo -e "${YELLOW} - C0/C1 control characters (U+0000-001F, U+007F-009F, except TAB/LF/CR)${NC}"
echo -e "${YELLOW} - Zero-width characters (U+200B-200D, U+FEFF)${NC}"
echo -e "${YELLOW} - BiDi override/isolate (U+202A-202E, U+2066-2069)${NC}"
echo -e "${YELLOW}Please remove these characters from the affected files.${NC}"
ERRORS_FOUND=1
fi

# Exit with error if any checks failed
if [ "$ERRORS_FOUND" -eq 1 ]; then
echo -e "${RED}Pre-commit checks failed. Please fix the issues above and try again.${NC}"
Expand Down
54 changes: 53 additions & 1 deletion .github/workflows/validate-files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ name: Validate Files
# so the repo can be cloned on Windows systems.
# 2. File size limits — no files larger than 10 MB. Many enterprise users mirror
# graphql-java into internal repositories that enforce file size limits.
# 3. No dangerous Unicode characters — prevents Trojan Source (BiDi override),
# glassworm, and similar attacks using invisible or control characters.

on:
push:
Expand All @@ -24,7 +26,7 @@ permissions:
jobs:
validate-filenames-and-size:
runs-on: ubuntu-latest
name: Validate Windows Compatibility and File Sizes
name: Validate Files (Windows names, size, Unicode safety)
steps:
- name: Checkout code
uses: actions/checkout@v6
Expand Down Expand Up @@ -96,3 +98,53 @@ jobs:
else
echo "✓ All files are within the 10MB size limit"
fi

- name: Check for dangerous Unicode characters
run: |
echo "Checking for dangerous Unicode characters (Trojan Source / glassworm)..."

# Dangerous character ranges:
# U+0000-0008, U+000B-000C, U+000E-001F C0 control chars (except TAB, LF, CR)
# U+007F-009F DELETE + C1 control chars
# U+200B-200D Zero-width space/non-joiner/joiner
# U+FEFF Zero-width no-break space (BOM)
# U+202A-202E BiDi embedding/override (Trojan Source)
# U+2066-2069 BiDi isolate chars (Trojan Source)

FOUND_FILES=""

while IFS= read -r file; do
if [ ! -f "$file" ]; then
continue
fi
# Skip binary files
if file --mime-type "$file" 2>/dev/null | grep -qv 'text/'; then
continue
fi
MATCHES=$(perl -CSD -ne '
if (/[\x{0000}-\x{0008}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}\x{200B}-\x{200D}\x{FEFF}\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
print " line $.: $_";
}
' "$file" 2>/dev/null || true)
if [ -n "$MATCHES" ]; then
echo "::error file=${file}::File contains dangerous Unicode characters"
FOUND_FILES="${FOUND_FILES}${file}:\n${MATCHES}\n"
fi
done <<< "$(git ls-files)"

if [ -n "$FOUND_FILES" ]; then
echo ""
echo "The following files contain dangerous Unicode characters:"
echo -e "$FOUND_FILES"
echo ""
echo "These invisible or rendering-altering characters can be used for"
echo "Trojan Source or glassworm-style attacks. Detected categories:"
echo " - C0/C1 control characters (U+0000-001F, U+007F-009F, except TAB/LF/CR)"
echo " - Zero-width characters (U+200B-200D, U+FEFF)"
echo " - BiDi override/isolate (U+202A-202E, U+2066-2069)"
echo ""
echo "Please remove these characters from the affected files."
exit 1
else
echo "✓ No dangerous Unicode characters found"
fi
2 changes: 2 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ The pre-commit hook will automatically check for:
- Splitting them into smaller parts (`.part1`, `.part2`, etc.)
- Reducing the file size

- **Dangerous Unicode characters**: Files containing invisible or rendering-altering Unicode characters will be rejected. This protects against [Trojan Source](https://trojansource.codes/) (BiDi override) and glassworm-style attacks. Blocked character categories include C0/C1 control characters, zero-width characters, and BiDi overrides.

To bypass the hooks temporarily (not recommended), use `git commit --no-verify`.

### CI Validation
Expand Down