Skip to content

Merge pull request #4359 from graphql-java/dependabot/github_actions/… #754

Merge pull request #4359 from graphql-java/dependabot/github_actions/…

Merge pull request #4359 from graphql-java/dependabot/github_actions/… #754

name: Validate Files
# This workflow validates that all files in the repository comply with:
# 1. Windows filename compatibility — no reserved characters (< > : " | ? * \)
# so the repo can be cloned on Windows systems.
# 2. File size limits — no files larger than 10 MB. Many enterprise users mirror
# graphql-java into internal repositories that enforce file size limits.
# 3. No dangerous Unicode characters — prevents Trojan Source (BiDi override),
# glassworm, and similar attacks using invisible or control characters.
on:
push:
branches:
- master
- '**'
pull_request:
branches:
- master
- 23.x
- 22.x
- 21.x
- 20.x
- 19.x
permissions:
contents: read
jobs:
validate-filenames-and-size:
runs-on: ubuntu-latest
name: Validate Files (Windows names, size, Unicode safety)
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history to check all files
- name: Check for Windows-incompatible filenames
run: |
echo "Checking for Windows-incompatible filenames..."
# Windows reserved characters: < > : " | ? * \
INVALID_CHARS='[<>:"|?*\\]'
# Get all files in the repository (excluding .git directory)
ALL_FILES=$(git ls-files)
# Check each file for invalid characters
INVALID_FILES=$(echo "$ALL_FILES" | grep -E "$INVALID_CHARS" || true)
if [ -n "$INVALID_FILES" ]; then
echo "::error::The following files have Windows-incompatible characters in their names:"
echo "$INVALID_FILES" | while read -r file; do
echo "::error file=${file}::File contains Windows-incompatible characters"
echo " - $file"
done
echo ""
echo "Please rename these files to remove characters: < > : \" | ? * \\"
echo "For ISO timestamps, replace colons with hyphens (e.g., 08:40:24 -> 08-40-24)"
exit 1
else
echo "✓ All filenames are Windows-compatible"
fi
- name: Check for files larger than 10MB
run: |
echo "Checking for files larger than 10MB..."
MAX_SIZE=$((10 * 1024 * 1024)) # 10 MB in bytes
LARGE_FILES=""
# Get all files in the repository (excluding .git directory)
ALL_FILES=$(git ls-files)
# Check each file's size
while IFS= read -r file; do
if [ -f "$file" ]; then
size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null)
if [ -z "$size" ]; then
echo "::warning file=${file}::Could not determine size of file"
continue
fi
if [ "$size" -gt "$MAX_SIZE" ]; then
size_mb=$(awk "BEGIN {printf \"%.2f\", $size/1024/1024}")
echo "::error file=${file}::File size (${size_mb} MB) exceeds 10MB limit"
LARGE_FILES="${LARGE_FILES}${file} (${size_mb} MB)\n"
fi
fi
done <<< "$ALL_FILES"
if [ -n "$LARGE_FILES" ]; then
echo ""
echo "The following files exceed 10MB:"
echo -e "$LARGE_FILES"
echo ""
echo "Please consider one of these options:"
echo " 1. Split the file into smaller parts with suffixes .part1, .part2, etc."
echo " 2. Remove unnecessary content from the file"
exit 1
else
echo "✓ All files are within the 10MB size limit"
fi
- name: Check for dangerous Unicode characters
run: |
echo "Checking for dangerous Unicode characters (Trojan Source / glassworm)..."
# Dangerous character ranges:
# U+0000-0008, U+000B-000C, U+000E-001F C0 control chars (except TAB, LF, CR)
# U+007F-009F DELETE + C1 control chars
# U+200B-200D Zero-width space/non-joiner/joiner
# U+FEFF Zero-width no-break space (BOM)
# U+202A-202E BiDi embedding/override (Trojan Source)
# U+2066-2069 BiDi isolate chars (Trojan Source)
FOUND_FILES=""
while IFS= read -r file; do
if [ ! -f "$file" ]; then
continue
fi
# Skip binary files
if file --mime-type "$file" 2>/dev/null | grep -qv 'text/'; then
continue
fi
MATCHES=$(perl -CSD -ne '
if (/[\x{0000}-\x{0008}\x{000B}\x{000C}\x{000E}-\x{001F}\x{007F}-\x{009F}\x{200B}-\x{200D}\x{FEFF}\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
print " line $.: $_";
}
' "$file" 2>/dev/null || true)
if [ -n "$MATCHES" ]; then
echo "::error file=${file}::File contains dangerous Unicode characters"
FOUND_FILES="${FOUND_FILES}${file}:\n${MATCHES}\n"
fi
done <<< "$(git ls-files)"
if [ -n "$FOUND_FILES" ]; then
echo ""
echo "The following files contain dangerous Unicode characters:"
echo -e "$FOUND_FILES"
echo ""
echo "These invisible or rendering-altering characters can be used for"
echo "Trojan Source or glassworm-style attacks. Detected categories:"
echo " - C0/C1 control characters (U+0000-001F, U+007F-009F, except TAB/LF/CR)"
echo " - Zero-width characters (U+200B-200D, U+FEFF)"
echo " - BiDi override/isolate (U+202A-202E, U+2066-2069)"
echo ""
echo "Please remove these characters from the affected files."
exit 1
else
echo "✓ No dangerous Unicode characters found"
fi