|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | +# shellcheck source=scripts/lib.sh |
| 4 | +source "$(dirname "${BASH_SOURCE[0]}")/lib.sh" |
| 5 | +cdroot |
| 6 | + |
| 7 | +echo "--- check for emdash/endash characters" |
| 8 | + |
| 9 | +mode="changed" |
| 10 | +for arg in "$@"; do |
| 11 | + if [[ "$arg" == "--all" ]]; then |
| 12 | + mode="all" |
| 13 | + fi |
| 14 | +done |
| 15 | + |
| 16 | +# Build the pattern from raw bytes so the script itself does not |
| 17 | +# contain literal emdash/endash characters (which would trigger |
| 18 | +# the check when the script is in the diff). |
| 19 | +emdash=$'\xE2\x80\x94' |
| 20 | +endash=$'\xE2\x80\x93' |
| 21 | +pattern="${emdash}|${endash}" |
| 22 | + |
| 23 | +scan_all_files() { |
| 24 | + local output |
| 25 | + output=$(git ls-files -z | xargs -0 grep -IEn "$pattern" 2>/dev/null || true) |
| 26 | + if [[ -n "$output" ]]; then |
| 27 | + echo "$output" |
| 28 | + found=1 |
| 29 | + else |
| 30 | + found=0 |
| 31 | + fi |
| 32 | +} |
| 33 | + |
| 34 | +if [[ "$mode" == "all" ]]; then |
| 35 | + scan_all_files |
| 36 | +else |
| 37 | + base="" |
| 38 | + if [[ -n "${GITHUB_BASE_REF:-}" ]]; then |
| 39 | + base="origin/${GITHUB_BASE_REF}" |
| 40 | + elif git rev-parse --verify origin/main >/dev/null 2>&1; then |
| 41 | + base=$(git merge-base HEAD origin/main 2>/dev/null || echo "origin/main") |
| 42 | + fi |
| 43 | + |
| 44 | + if [[ -z "$base" ]]; then |
| 45 | + echo "WARNING: no base ref found, scanning all tracked files." |
| 46 | + scan_all_files |
| 47 | + else |
| 48 | + # Ensure the base ref is fetchable. CI shallow clones |
| 49 | + # (fetch-depth: 1) may not have the base branch available. |
| 50 | + if ! git rev-parse --verify "$base" >/dev/null 2>&1; then |
| 51 | + ref="${base#origin/}" |
| 52 | + echo "Base ref $base not found locally, fetching $ref..." |
| 53 | + git fetch origin "$ref" --depth=1 2>/dev/null || true |
| 54 | + if ! git rev-parse --verify "$base" >/dev/null 2>&1; then |
| 55 | + echo "ERROR: could not fetch base ref $base." |
| 56 | + exit 1 |
| 57 | + fi |
| 58 | + fi |
| 59 | + |
| 60 | + found=0 |
| 61 | + if ! diff_output=$(git diff "$base" -U0 -- . 2>&1); then |
| 62 | + echo "ERROR: git diff against $base failed:" |
| 63 | + echo "$diff_output" |
| 64 | + exit 1 |
| 65 | + fi |
| 66 | + |
| 67 | + if [[ -z "$diff_output" ]]; then |
| 68 | + echo "OK: no changes to check." |
| 69 | + exit 0 |
| 70 | + fi |
| 71 | + |
| 72 | + # Parse the diff to check only added lines for emdash/endash. |
| 73 | + current_file="" |
| 74 | + current_line=0 |
| 75 | + while IFS= read -r diff_line; do |
| 76 | + if [[ "$diff_line" =~ ^\+\+\+\ b/(.*) ]]; then |
| 77 | + current_file="${BASH_REMATCH[1]}" |
| 78 | + fi |
| 79 | + # Anchored to hunk header structure to avoid matching |
| 80 | + # digits from trailing function context. |
| 81 | + if [[ "$diff_line" =~ ^@@\ -[0-9,]+\ \+([0-9]+) ]]; then |
| 82 | + current_line=${BASH_REMATCH[1]} |
| 83 | + continue |
| 84 | + fi |
| 85 | + if [[ "$diff_line" =~ ^\+ ]] && [[ ! "$diff_line" =~ ^\+\+\+\ [ab/] ]]; then |
| 86 | + if echo "$diff_line" | grep -Eq "$pattern"; then |
| 87 | + echo "${current_file}:${current_line}:${diff_line:1}" |
| 88 | + found=1 |
| 89 | + fi |
| 90 | + ((current_line++)) || true |
| 91 | + fi |
| 92 | + done <<<"$diff_output" |
| 93 | + fi |
| 94 | +fi |
| 95 | + |
| 96 | +if [[ "$found" -ne 0 ]]; then |
| 97 | + echo "" |
| 98 | + echo "ERROR: Found emdash (U+2014) or endash (U+2013) characters." |
| 99 | + echo "" |
| 100 | + echo " Do not use emdash or endash in code, comments, string literals," |
| 101 | + echo " or documentation. Use commas, semicolons, or periods instead." |
| 102 | + echo " Restructure the sentence if needed. Do not replace them with" |
| 103 | + echo " ' -- ' either." |
| 104 | + echo "" |
| 105 | + echo " Example:" |
| 106 | + echo " Bad: This is slow [emdash] we should cache it." |
| 107 | + echo " Good: This is slow. We should cache it." |
| 108 | + echo " Good: This is slow, so we should cache it." |
| 109 | + echo "" |
| 110 | + exit 1 |
| 111 | +fi |
| 112 | + |
| 113 | +echo "OK: no emdash or endash characters found." |
0 commit comments