|
3 | 3 | # Merge all the .xml rulesets into a single "default.rulesets" file -- this |
4 | 4 | # prevents inodes from wasting disk space, but more importantly, works around |
5 | 5 | # the fact that zip does not perform well on a pile of small files. |
6 | | -cd src |
| 6 | + |
7 | 7 | RULESETS=chrome/content/rules/default.rulesets |
8 | | -echo "Creating ruleset library..." |
9 | 8 |
|
10 | | -# Under git bash, sed -i issues errors and sets the file "read only". Thanks. |
11 | | -[ -f "$RULESETS" ] && chmod u+w $RULESETS |
| 9 | +INDENT_CHAR=' ' |
| 10 | +# Any whitespace that marks one level of indentation. |
| 11 | + |
| 12 | +TAG_DEFINITIONS=' |
| 13 | +# tag | level of indentation | + prepended linebreaks | + appended linebreaks |
| 14 | +rulesetlibrary 0 -1 0 |
| 15 | +ruleset 0 1 0 |
| 16 | +rule 1 0 0 |
| 17 | +target 1 0 0 |
| 18 | +exclusion 2 0 0 |
| 19 | +securecookie 2 0 0' |
| 20 | +# Extra prepended linebreaks are added before the opening <tags>, |
| 21 | +# and appended after closing </tags> and <tags/>. It's not perfect but it works. |
| 22 | +# One linebreak is implicitly prepended; opt out by supplying -1 |
| 23 | +# This does not work whatsoever with nested tags, mind. |
| 24 | + |
| 25 | +SED_TRIM_CMD=' |
| 26 | + :a |
| 27 | + s/<!--.*-->//g |
| 28 | + /<!--/N |
| 29 | + //ba |
| 30 | +
|
| 31 | + s/\([^ ]\)\(to\|from\|name\)=/\1 \2=/g |
| 32 | + s: />:/>:g |
| 33 | + s/\([^ ]\) \{2,\}/\1/g |
| 34 | + s/ \+$//g |
| 35 | + s:\(http[s?]\{,2\}\)[^:]//:\1\://:g |
| 36 | + s:\([^:]\)/\{2,\}:\1/:g' |
| 37 | +# sed command to scrub comments and fix various whitespace irregularities; |
| 38 | +# missing whitespace inbetween tag fields: <x y="z"trapped=":("/>, |
| 39 | +# random double and trailing whitespace, unwanted whitespace before '/>', |
| 40 | +# semicolons after protocols;// (rather than colons), and mid-URI double slashes |
| 41 | + |
| 42 | + |
| 43 | +# Functions |
| 44 | + |
| 45 | +repeat_char() { |
| 46 | + [ $2 -gt 0 ] || return |
| 47 | + local i |
| 48 | + for i in $(seq 1 $2); do printf "$1"; done |
| 49 | +} |
| 50 | + |
| 51 | +format_rulesets() { |
| 52 | + local IFS tag idepth prebreaks postbreaks |
| 53 | + local _indent _pre _post _sed_pre _sed_post _sed_oneshot |
| 54 | + |
| 55 | + # Print pretty banner, very hardcoded |
| 56 | + printf '\n' |
| 57 | + printf '%15s | %s | %s | %s\n' 'tag name' 'indent' 'prebreak' 'postbreak' |
| 58 | + printf '%15s-+-%s-+-%s-+-%s\n' \ |
| 59 | + '-------------' '------' '--------' '----------' |
| 60 | + |
| 61 | + # Iterate through tags and add appropriate indentation and linebreaks |
| 62 | + while read tag idepth prebreaks postbreaks; do |
| 63 | + ( [ "$tag" = '#' ] || [ ! $postbreaks ] ) && continue # Invalid; skip |
| 64 | + unset _indent _pre _post _sed_pre _sed_post _sed_oneshot |
| 65 | + |
| 66 | + printf "%15s | %6d | %8d | %9s\n" "$tag" $idepth $prebreaks $postbreaks |
| 67 | + |
| 68 | + # Special characters (\n) need double escaping when saved to a variable |
| 69 | + # since they are dereferenced and break everytime they're passed around. |
| 70 | + # bash printf has a %q format character for this, but we're /bin/sh |
12 | 71 |
|
13 | | -echo "<rulesetlibrary gitcommitid=\"${GIT_COMMIT_ID}\">" > $RULESETS |
14 | | -# Include the filename.xml as the "f" attribute |
15 | | -for file in chrome/content/rules/*.xml; do |
16 | | - xmlInsertString="<ruleset" |
17 | | - fileName=$(basename "$file") |
18 | | - fileContent=$(sed "s/${xmlInsertString}/${xmlInsertString} f=\"${fileName}\"/" "chrome/content/rules/${fileName}") |
19 | | - echo "$fileContent" >> $RULESETS |
20 | | -done |
21 | | -echo "</rulesetlibrary>" >> $RULESETS |
| 72 | + # Should always be a prepended linebreak unless we opt out with -1 |
| 73 | + _pre="$(repeat_char '\\n' $((prebreaks+1)))" |
| 74 | + _post="$(repeat_char '\\n' $postbreaks)" |
| 75 | + _indent="$(repeat_char "$INDENT_CHAR" $idepth)" |
22 | 76 |
|
23 | | -echo "Removing whitespaces and comments..." |
| 77 | + # breaks before opening <tags> and <tags/> |
| 78 | + _sed_pre="s:<${tag}[ />]:${_pre}${_indent}\0:g;" |
| 79 | + # breaks after closing </tags> |
| 80 | + _sed_post="s:</${tag}>:\n${_indent}\0${_post}:g;" |
| 81 | + # breaks after oneshot <tags/> |
| 82 | + _sed_oneshot="s:<${tag}\(/>\| [^>]\+/>\):\0${_post}:g;" |
| 83 | + |
| 84 | + sed -ir "$_sed_pre $_sed_post $_sed_oneshot" $RULESETS |
| 85 | + done <<- EOF |
| 86 | + $TAG_DEFINITIONS |
| 87 | + EOF |
| 88 | + |
| 89 | + echo #padding for some distance after the tag table |
| 90 | +} |
24 | 91 |
|
25 | 92 | rulesize() { |
26 | | - echo `wc -c $RULESETS | cut -d \ -f 1` |
| 93 | + wc -c < $RULESETS |
27 | 94 | } |
28 | | -CRUSH=`rulesize` |
29 | | -sed -i -e :a -re 's/<!--.*?-->//g;/<!--/N;//ba' $RULESETS |
30 | | -sed -i ':a;N;$!ba;s/\n//g;s/>[ ]*</></g;s/[ ]*to=/ to=/g;s/[ ]*from=/ from=/g;s/ \/>/\/>/g' $RULESETS |
31 | | -echo "Crushed $CRUSH bytes of rulesets into `rulesize`" |
32 | | - |
33 | | -if [ -x $(which xmllint) ] |
34 | | -then |
35 | | - if xmllint --noout $RULESETS |
36 | | - then |
37 | | - echo "$RULESETS passed XML validity test." |
38 | | - else |
39 | | - echo "ERROR: $RULESETS failed XML validity test." |
40 | | - exit 2 |
41 | | - fi |
42 | | -else |
43 | | - echo "WARNING: xmllint not present; validation of $RULESETS skipped." |
44 | | -fi |
45 | | - |
46 | | -# We make default.rulesets at build time, but it shouldn't have a variable |
47 | | -# timestamp |
| 95 | + |
| 96 | +populate_rulesets() { |
| 97 | + local xmlfile |
| 98 | + # Under git bash, sed -i issues errors and sets the file "read only" |
| 99 | + [ -f "$RULESETS" ] && chmod u+w $RULESETS |
| 100 | + |
| 101 | + printf '<rulesetlibrary gitcommitid="%s">' \ |
| 102 | + "${GIT_COMMIT_ID:-unset}" > $RULESETS |
| 103 | + |
| 104 | + # Include the filename.xml as the "f" attribute |
| 105 | + for xmlfile in chrome/content/rules/*.xml; do |
| 106 | + sed "s/<ruleset/\0 f=\"${xmlfile##*/}\"/g" "$xmlfile" >> $RULESETS |
| 107 | + done |
| 108 | + |
| 109 | + echo "</rulesetlibrary>" >> $RULESETS |
| 110 | +} |
| 111 | + |
| 112 | +flatten_file() { |
| 113 | + # Strip *all* control chars; we'll re-add them soon as per tag definitions. |
| 114 | + # tr cannot edit in-place so we need to temp, either in a file or a variable |
| 115 | + echo "$(tr -d '[:cntrl:]' < $RULESETS | tr -s '[:space:]')" > $RULESETS |
| 116 | + # Beware that this *assumes* the used shell accepts variable sizes of >2Mb. |
| 117 | +} |
| 118 | + |
| 119 | + |
| 120 | +# Execution start |
| 121 | + |
| 122 | +cd src |
| 123 | + |
| 124 | +echo "Creating ruleset library..." |
| 125 | +populate_rulesets |
| 126 | + |
| 127 | +echo "Removing control characters, whitespace and comments..." |
| 128 | +PRECRUSH=$(rulesize) |
| 129 | +flatten_file |
| 130 | + |
| 131 | +echo "Formatting..." |
| 132 | +format_rulesets |
| 133 | + |
| 134 | +echo "Final touches..." |
| 135 | +# sed -i is not portable (GNU extension), but maybe we don't care. |
| 136 | +sed -ir "$SED_TRIM_CMD" $RULESETS |
| 137 | +POSTCRUSH=$(rulesize) |
| 138 | + |
| 139 | +# All done, print summary |
| 140 | +printf "Crushed %d bytes of rulesets into %d (delta %d)\n" \ |
| 141 | + $PRECRUSH $POSTCRUSH $((POSTCRUSH-PRECRUSH)) |
| 142 | + |
| 143 | +# Timestamp |
48 | 144 | touch -r chrome/content/rules $RULESETS |
49 | 145 |
|
| 146 | +# We need to keep $RULESETS for makecrx.sh but the rest is of no further use |
| 147 | +unset INDENT_CHAR TAG_DEFINITIONS SED_TRIM_CMD PRECRUSH POSTCRUSH |
| 148 | +unset repeat_char format_rulesets rulesize populate_rulesets flatten_file |
| 149 | + |
50 | 150 | cd .. |
| 151 | + |
| 152 | + |
| 153 | +# grep tests to ensure the sed magic worked (should find no matches): |
| 154 | +# |
| 155 | +# non-indenting double whitespace: '[^ ] \{2,\}' |
| 156 | +# missing space after field: '="[^"]\+"[^ />]' # not perfect |
| 157 | +# trailing whitespace: ' $' # pipe to | cat -A - |
| 158 | +# malformed http(s) protocol text 'http[s?]\{,2\}[^:]//' |
| 159 | +# random double+ slashes: '[^:;]//' |
| 160 | +# |
0 commit comments