|
4 | 4 | # prevents inodes from wasting disk space, but more importantly, works around |
5 | 5 | # the fact that zip does not perform well on a pile of small files. |
6 | 6 |
|
7 | | -RULESETS=chrome/content/rules/default.rulesets |
8 | | - |
9 | | -INDENT_CHAR=' ' |
10 | | -# Any whitespace that marks one level of indentation. |
11 | | - |
12 | | -TAG_DEFINITIONS=' |
13 | | -# tag | level of indentation | + prepended linebreaks | + appended linebreaks |
14 | | -rulesetlibrary 0 -1 0 |
15 | | -ruleset 0 1 0 |
16 | | -rule 1 0 0 |
17 | | -target 1 0 0 |
18 | | -exclusion 2 0 0 |
19 | | -securecookie 2 0 0' |
20 | | -# Extra prepended linebreaks are added before the opening <tags>, |
21 | | -# and appended after closing </tags> and <tags/>. It's not perfect but it works. |
22 | | -# One linebreak is implicitly prepended; opt out by supplying -1 |
23 | | -# This does not work whatsoever with nested tags, mind. |
24 | | - |
25 | | -SED_TRIM_CMD=' |
26 | | - :a |
27 | | - s/<!--.*-->//g |
28 | | - /<!--/N |
29 | | - //ba |
30 | | -
|
31 | | - s/\([^ ]\)\(to\|from\|name\)=/\1 \2=/g |
32 | | - s: />:/>:g |
33 | | - s/\([^ ]\) \{2,\}/\1/g |
34 | | - s/ \+$//g |
35 | | - s:\(http[s?]\{,2\}\)[^:]//:\1\://:g |
36 | | - s:\([^:]\)/\{2,\}:\1/:g' |
37 | | -# sed command to scrub comments and fix various whitespace irregularities; |
38 | | -# missing whitespace inbetween tag fields: <x y="z"trapped=":("/>, |
39 | | -# random double and trailing whitespace, unwanted whitespace before '/>', |
40 | | -# semicolons after protocols;// (rather than colons), and mid-URI double slashes |
41 | | - |
42 | | - |
43 | | -# Functions |
44 | | - |
45 | | -repeat_char() { |
46 | | - [ $2 -gt 0 ] || return |
47 | | - local i |
48 | | - for i in $(seq 1 $2); do printf "$1"; done |
49 | | -} |
50 | | - |
51 | | -format_rulesets() { |
52 | | - local IFS tag idepth prebreaks postbreaks |
53 | | - local _indent _pre _post _sed_pre _sed_post _sed_oneshot |
54 | | - |
55 | | - # Print pretty banner, very hardcoded |
56 | | - printf '\n' |
57 | | - printf '%15s | %s | %s | %s\n' 'tag name' 'indent' 'prebreak' 'postbreak' |
58 | | - printf '%15s-+-%s-+-%s-+-%s\n' \ |
59 | | - '-------------' '------' '--------' '----------' |
60 | | - |
61 | | - # Iterate through tags and add appropriate indentation and linebreaks |
62 | | - while read tag idepth prebreaks postbreaks; do |
63 | | - ( [ "$tag" = '#' ] || [ ! $postbreaks ] ) && continue # Invalid; skip |
64 | | - unset _indent _pre _post _sed_pre _sed_post _sed_oneshot |
65 | | - |
66 | | - printf "%15s | %6d | %8d | %9s\n" "$tag" $idepth $prebreaks $postbreaks |
67 | | - |
68 | | - # Special characters (\n) need double escaping when saved to a variable |
69 | | - # since they are dereferenced and break everytime they're passed around. |
70 | | - # bash printf has a %q format character for this, but we're /bin/sh |
71 | | - |
72 | | - # Should always be a prepended linebreak unless we opt out with -1 |
73 | | - _pre="$(repeat_char '\\n' $((prebreaks+1)))" |
74 | | - _post="$(repeat_char '\\n' $postbreaks)" |
75 | | - _indent="$(repeat_char "$INDENT_CHAR" $idepth)" |
76 | | - |
77 | | - # breaks before opening <tags> and <tags/> |
78 | | - _sed_pre="s:<${tag}[ />]:${_pre}${_indent}\0:g;" |
79 | | - # breaks after closing </tags> |
80 | | - _sed_post="s:</${tag}>:\n${_indent}\0${_post}:g;" |
81 | | - # breaks after oneshot <tags/> |
82 | | - _sed_oneshot="s:<${tag}\(/>\| [^>]\+/>\):\0${_post}:g;" |
83 | | - |
84 | | - sed -ir "$_sed_pre $_sed_post $_sed_oneshot" $RULESETS |
85 | | - done <<- EOF |
86 | | - $TAG_DEFINITIONS |
87 | | - EOF |
88 | | - |
89 | | - echo #padding for some distance after the tag table |
90 | | -} |
91 | | - |
92 | | -rulesize() { |
93 | | - wc -c < $RULESETS |
94 | | -} |
| 7 | +python ./utils/merge-rulesets.py |
95 | 8 |
|
96 | | -populate_rulesets() { |
97 | | - local xmlfile |
98 | | - # Under git bash, sed -i issues errors and sets the file "read only" |
99 | | - [ -f "$RULESETS" ] && chmod u+w $RULESETS |
100 | | - |
101 | | - printf '<rulesetlibrary gitcommitid="%s">' \ |
102 | | - "${GIT_COMMIT_ID:-unset}" > $RULESETS |
103 | | - |
104 | | - # Include the filename.xml as the "f" attribute |
105 | | - for xmlfile in chrome/content/rules/*.xml; do |
106 | | - sed "s/<ruleset/\0 f=\"${xmlfile##*/}\"/g" "$xmlfile" >> $RULESETS |
107 | | - done |
108 | | - |
109 | | - echo "</rulesetlibrary>" >> $RULESETS |
110 | | -} |
111 | | - |
112 | | -flatten_file() { |
113 | | - # Strip *all* control chars; we'll re-add them soon as per tag definitions. |
114 | | - # tr cannot edit in-place so we need to temp, either in a file or a variable |
115 | | - echo "$(tr -d '[:cntrl:]' < $RULESETS | tr -s '[:space:]')" > $RULESETS |
116 | | - # Beware that this *assumes* the used shell accepts variable sizes of >2Mb. |
117 | | -} |
118 | | - |
119 | | - |
120 | | -# Execution start |
121 | | - |
122 | | -cd src |
123 | | - |
124 | | -echo "Creating ruleset library..." |
125 | | -populate_rulesets |
126 | | - |
127 | | -echo "Removing control characters, whitespace and comments..." |
128 | | -PRECRUSH=$(rulesize) |
129 | | -flatten_file |
130 | | - |
131 | | -echo "Formatting..." |
132 | | -format_rulesets |
133 | | - |
134 | | -echo "Final touches..." |
135 | | -# sed -i is not portable (GNU extension), but maybe we don't care. |
136 | | -sed -ir "$SED_TRIM_CMD" $RULESETS |
137 | | -POSTCRUSH=$(rulesize) |
138 | | - |
139 | | -# All done, print summary |
140 | | -printf "Crushed %d bytes of rulesets into %d (delta %d)\n" \ |
141 | | - $PRECRUSH $POSTCRUSH $((POSTCRUSH-PRECRUSH)) |
142 | | - |
143 | | -# Timestamp |
144 | | -touch -r chrome/content/rules $RULESETS |
145 | | - |
146 | | -# We need to keep $RULESETS for makecrx.sh but the rest is of no further use |
147 | | -unset INDENT_CHAR TAG_DEFINITIONS SED_TRIM_CMD PRECRUSH POSTCRUSH |
148 | | -unset repeat_char format_rulesets rulesize populate_rulesets flatten_file |
149 | | - |
150 | | -cd .. |
151 | | - |
152 | | - |
153 | | -# grep tests to ensure the sed magic worked (should find no matches): |
154 | | -# |
155 | | -# non-indenting double whitespace: '[^ ] \{2,\}' |
156 | | -# missing space after field: '="[^"]\+"[^ />]' # not perfect |
157 | | -# trailing whitespace: ' $' # pipe to | cat -A - |
158 | | -# malformed http(s) protocol text 'http[s?]\{,2\}[^:]//' |
159 | | -# random double+ slashes: '[^:;]//' |
160 | | -# |
| 9 | +RULESETS=chrome/content/rules/default.rulesets |
0 commit comments