Skip to content

Commit b9b5631

Browse files
committed
Try replacing sed with python, compare results
1 parent e5bd36f commit b9b5631

File tree

1 file changed

+23
-15
lines changed

1 file changed

+23
-15
lines changed

utils/merge-rulesets.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,20 @@
3838
if os.path.isfile(rulesets_fn):
3939
os.system("chmod u+w " + rulesets_fn)
4040

41+
def rulesize():
42+
return len(open(rulesets_fn).read())
43+
44+
def clean_up(rulefile):
45+
"""Remove extra whitespace and comments from ruleset library"""
46+
comment_and_newline_pattern = re.compile(r"<!--.*?-->|\n|\r", flags=re.DOTALL)
47+
rulefile = comment_and_newline_pattern.sub('', rulefile)
48+
to_and_from_pattern = re.compile(r'\s*(to=|from=)')
49+
rulefile = to_and_from_pattern.sub(r' \1', rulefile)
50+
rulefile = re.sub(r">\s*<", r"><", rulefile)
51+
rulefile = re.sub(r" />", r"/>", rulefile)
52+
rulefile = re.sub(r"</ruleset>", r"</ruleset>\n", rulefile)
53+
return rulefile
54+
4155
library = open(rulesets_fn,"w")
4256

4357
# XXX TODO replace all sed commands with native Python
@@ -51,29 +65,23 @@
5165
library.write('<rulesetlibrary>')
5266

5367
# Include the filename.xml as the "f" attribute
68+
print("Removing whitespaces and comments...")
69+
70+
crush = rulesize()
71+
5472
for rfile in sorted(xml_ruleset_files):
5573
ruleset = open(rfile).read()
5674
fn=os.path.basename(rfile)
5775
ruleset = ruleset.replace("<ruleset", '<ruleset f="%s"' % fn, 1)
58-
library.write(ruleset)
76+
library.write(clean_up(ruleset))
5977
library.write("</rulesetlibrary>\n")
6078
library.close()
6179

62-
print("Removing whitespaces and comments...")
63-
64-
def rulesize():
65-
return len(open(rulesets_fn).read())
80+
#sedcmd = ["sed", "-i", "-e", ":a", "-re"]
81+
#call(sedcmd + [r"s/<!--.*?-->//g;/<!--/N;//ba", rulesets_fn])
82+
#call(["sed", "-i", r":a;N;$!ba;s/\n//g;s/>[ ]*</></g;s/[ ]*to=/ to=/g;s/[ ]*from=/ from=/g;s/ \/>/\/>/g", rulesets_fn])
83+
#call(["sed", "-i", r"s/<\/ruleset>/<\/ruleset>\n/g", rulesets_fn])
6684

67-
def clean_up(rulefile):
68-
"""Remove extra whitespace and comments from ruleset library"""
69-
comment_and_newline_pattern = re.compile(r"<!--.*?-->|\n|\r", flags=re.DOTALL)
70-
rulefile = comment_and_newline_pattern.sub('', rulefile)
71-
72-
crush = rulesize()
73-
sedcmd = ["sed", "-i", "-e", ":a", "-re"]
74-
call(sedcmd + [r"s/<!--.*?-->//g;/<!--/N;//ba", rulesets_fn])
75-
call(["sed", "-i", r":a;N;$!ba;s/\n//g;s/>[ ]*</></g;s/[ ]*to=/ to=/g;s/[ ]*from=/ from=/g;s/ \/>/\/>/g", rulesets_fn])
76-
call(["sed", "-i", r"s/<\/ruleset>/<\/ruleset>\n/g", rulesets_fn])
7785
print("Crushed", crush, "bytes of rulesets into", rulesize())
7886

7987
try:

0 commit comments

Comments
 (0)