Skip to content

Commit cdb362d

Browse files
committed
Translate merge-rulesets into python: 5x speedup
1 parent a722f92 commit cdb362d

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

makecrx.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ do_not_ship="*.py *.xml icon.jpg"
7878
rm -f $do_not_ship
7979
cd ../..
8080

81-
. ./utils/merge-rulesets.sh
81+
python ./utils/merge-rulesets.py
8282

8383
cp src/$RULESETS pkg/crx/rules/default.rulesets
8484

makexpi.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,13 @@ fi
103103

104104
# Used for figuring out which branch to pull from when viewing source for rules
105105
GIT_OBJECT_FILE=".git/refs/heads/master"
106-
GIT_COMMIT_ID="HEAD"
106+
export GIT_COMMIT_ID="HEAD"
107107
if [ -e "$GIT_OBJECT_FILE" ]; then
108-
GIT_COMMIT_ID=$(cat "$GIT_OBJECT_FILE")
108+
export GIT_COMMIT_ID=$(cat "$GIT_OBJECT_FILE")
109109
fi
110110

111111

112-
sh ./utils/merge-rulesets.sh
112+
python ./utils/merge-rulesets.py
113113
cd src
114114

115115
# Build the XPI!

utils/merge-rulesets.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python
2+
3+
# Merge all the .xml rulesets into a single "default.rulesets" file -- this
4+
# prevents inodes from wasting disk space, but more importantly, works around
5+
# the fact that zip does not perform well on a pile of small files.
6+
7+
# currently a very literal translation of merge-rulesets.sh, but about five
8+
# times faster
9+
10+
import os
11+
from glob import glob
12+
from subprocess import call
13+
import sys
14+
import traceback
15+
import re
16+
17+
os.chdir("src")
18+
rulesets_fn="chrome/content/rules/default.rulesets"
19+
print "Creating ruleset library..."
20+
21+
# Under git bash, sed -i issues errors and sets the file "read only". Thanks.
22+
if os.path.isfile(rulesets_fn):
23+
os.system("chmod u+w " + rulesets_fn)
24+
25+
library = open(rulesets_fn,"w")
26+
27+
# XXX TODO replace all sed commands with native Python
28+
#strip_oneline_comment = re.compile(r"<!--.*?-->")
29+
30+
commit_id = os.environ["GIT_COMMIT_ID"]
31+
library.write('<rulesetlibrary gitcommitid="%s">' % commit_id)
32+
# Include the filename.xml as the "f" attribute
33+
for rfile in sorted(glob("chrome/content/rules/*.xml")):
34+
ruleset = open(rfile).read()
35+
fn=os.path.basename(rfile)
36+
ruleset = ruleset.replace("<ruleset", '<ruleset f="%s"' % fn, 1)
37+
library.write(ruleset)
38+
library.write("</rulesetlibrary>\n")
39+
library.close()
40+
41+
print "Removing whitespaces and comments..."
42+
43+
def rulesize():
44+
return len(open(rulesets_fn).read())
45+
46+
crush = rulesize()
47+
sedcmd = ["sed", "-i", "-e", ":a", "-re"]
48+
call(sedcmd + [r"s/<!--.*?-->//g;/<!--/N;//ba", rulesets_fn])
49+
call( ["sed", "-i", r":a;N;$!ba;s/\n//g;s/>[ ]*</></g;s/[ ]*to=/ to=/g;s/[ ]*from=/ from=/g;s/ \/>/\/>/g", rulesets_fn])
50+
print "Crushed", crush, "bytes of rulesets into", rulesize()
51+
52+
try:
53+
if 0 == call(["xmllint", "--noout", rulesets_fn]):
54+
print rulesets_fn, "passed XML validity test."
55+
else:
56+
print "ERROR:", rulesets_fn, "failed XML validity test!"
57+
sys.exit(1)
58+
except OSError, e:
59+
if "No such file or directory" not in traceback.format_exc():
60+
raise
61+
print "WARNING: xmllint not present; validation of", rulesets_fn, " skipped."
62+
63+
# We make default.rulesets at build time, but it shouldn't have a variable
64+
# timestamp
65+
call(["touch", "-r", "chrome/content/rules", rulesets_fn])
66+

0 commit comments

Comments
 (0)