Merge pull request #4359 from graphql-java/dependabot/github_actions/… #754
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Validate Files | |
| # This workflow validates that all files in the repository comply with: | |
| # 1. Windows filename compatibility — no reserved characters (< > : " | ? * \) | |
| # so the repo can be cloned on Windows systems. | |
| # 2. File size limits — no files larger than 10 MB. Many enterprise users mirror | |
| # graphql-java into internal repositories that enforce file size limits. | |
| # 3. No dangerous Unicode characters — prevents Trojan Source (BiDi override), | |
| # glassworm, and similar attacks using invisible or control characters. | |
| on: | |
| push: | |
| branches: | |
| - master | |
| - '**' | |
| pull_request: | |
| branches: | |
| - master | |
| - 23.x | |
| - 22.x | |
| - 21.x | |
| - 20.x | |
| - 19.x | |
| permissions: | |
| contents: read | |
| jobs: | |
| validate-filenames-and-size: | |
| runs-on: ubuntu-latest | |
| name: Validate Files (Windows names, size, Unicode safety) | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 # Fetch all history to check all files | |
| - name: Check for Windows-incompatible filenames | |
| run: | | |
| echo "Checking for Windows-incompatible filenames..." | |
| # Windows reserved characters: < > : " | ? * \ | |
| INVALID_CHARS='[<>:"|?*\\]' | |
| # Get all files in the repository (excluding .git directory) | |
| ALL_FILES=$(git ls-files) | |
| # Check each file for invalid characters | |
| INVALID_FILES=$(echo "$ALL_FILES" | grep -E "$INVALID_CHARS" || true) | |
| if [ -n "$INVALID_FILES" ]; then | |
| echo "::error::The following files have Windows-incompatible characters in their names:" | |
| echo "$INVALID_FILES" | while read -r file; do | |
| echo "::error file=${file}::File contains Windows-incompatible characters" | |
| echo " - $file" | |
| done | |
| echo "" | |
| echo "Please rename these files to remove characters: < > : \" | ? * \\" | |
| echo "For ISO timestamps, replace colons with hyphens (e.g., 08:40:24 -> 08-40-24)" | |
| exit 1 | |
| else | |
| echo "✓ All filenames are Windows-compatible" | |
| fi | |
| - name: Check for files larger than 10MB | |
| run: | | |
| echo "Checking for files larger than 10MB..." | |
| MAX_SIZE=$((10 * 1024 * 1024)) # 10 MB in bytes | |
| LARGE_FILES="" | |
| # Get all files in the repository (excluding .git directory) | |
| ALL_FILES=$(git ls-files) | |
| # Check each file's size | |
| while IFS= read -r file; do | |
| if [ -f "$file" ]; then | |
| size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null) | |
| if [ -z "$size" ]; then | |
| echo "::warning file=${file}::Could not determine size of file" | |
| continue | |
| fi | |
| if [ "$size" -gt "$MAX_SIZE" ]; then | |
| size_mb=$(awk "BEGIN {printf \"%.2f\", $size/1024/1024}") | |
| echo "::error file=${file}::File size (${size_mb} MB) exceeds 10MB limit" | |
| LARGE_FILES="${LARGE_FILES}${file} (${size_mb} MB)\n" | |
| fi | |
| fi | |
| done <<< "$ALL_FILES" | |
| if [ -n "$LARGE_FILES" ]; then | |
| echo "" | |
| echo "The following files exceed 10MB:" | |
| echo -e "$LARGE_FILES" | |
| echo "" | |
| echo "Please consider one of these options:" | |
| echo " 1. Split the file into smaller parts with suffixes .part1, .part2, etc." | |
| echo " 2. Remove unnecessary content from the file" | |
| exit 1 | |
| else | |
| echo "✓ All files are within the 10MB size limit" | |
| fi | |
| - name: Check for dangerous Unicode characters | |
| run: | | |
| echo "Checking for dangerous Unicode characters (Trojan Source / glassworm)..." | |
| # Dangerous character ranges: | |
| # U+0000-0008, U+000B-000C, U+000E-001F C0 control chars (except TAB, LF, CR) | |
| # U+007F-009F DELETE + C1 control chars | |
| # U+200B-200D Zero-width space/non-joiner/joiner | |
| # U+FEFF Zero-width no-break space (BOM) | |
| # U+202A-202E BiDi embedding/override (Trojan Source) | |
| # U+2066-2069 BiDi isolate chars (Trojan Source) | |
| FOUND_FILES="" | |
| while IFS= read -r file; do | |
| if [ ! -f "$file" ]; then | |
| continue | |
| fi | |
| # Skip binary files | |
| if file --mime-type "$file" 2>/dev/null | grep -qv 'text/'; then | |
| continue | |
| fi | |
| MATCHES=$(perl -CSD -ne ' | |
| if (/[\x{0000}-\x{0008}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}\x{200B}-\x{200D}\x{FEFF}\x{202A}-\x{202E}\x{2066}-\x{2069}]/) { | |
| print " line $.: $_"; | |
| } | |
| ' "$file" 2>/dev/null || true) | |
| if [ -n "$MATCHES" ]; then | |
| echo "::error file=${file}::File contains dangerous Unicode characters" | |
| FOUND_FILES="${FOUND_FILES}${file}:\n${MATCHES}\n" | |
| fi | |
| done <<< "$(git ls-files)" | |
| if [ -n "$FOUND_FILES" ]; then | |
| echo "" | |
| echo "The following files contain dangerous Unicode characters:" | |
| echo -e "$FOUND_FILES" | |
| echo "" | |
| echo "These invisible or rendering-altering characters can be used for" | |
| echo "Trojan Source or glassworm-style attacks. Detected categories:" | |
| echo " - C0/C1 control characters (U+0000-001F, U+007F-009F, except TAB/LF/CR)" | |
| echo " - Zero-width characters (U+200B-200D, U+FEFF)" | |
| echo " - BiDi override/isolate (U+202A-202E, U+2066-2069)" | |
| echo "" | |
| echo "Please remove these characters from the affected files." | |
| exit 1 | |
| else | |
| echo "✓ No dangerous Unicode characters found" | |
| fi |