Skip to content

Commit 67c5727

Browse files
Yegor IevlevHainish
authored andcommitted
Relaxng whitelist
1 parent 820a3f4 commit 67c5727

File tree

4 files changed

+152
-14
lines changed

4 files changed

+152
-14
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"AJC.com.xml": "8fc6770176eb8ce3a0e7718c23eda87b5e42b5728c8ca11e6294b2bf1314cd6a",
3+
"AdultShopping.com.xml": "9b0c1717949ea0261768e12231136f05b8ee38a05accbf5ef44aaa0448243763",
4+
"Aeriagames.xml": "2bba736076144c6e074447935696b487b2457699ad13c4c0da5036f27408e7a5",
5+
"BME.hu.xml": "27b2ce590d3f055f2b2e302adaebea4076b35a6ae88ae5571f8b99b90596f02b",
6+
"BME.hu_incomplete.xml": "55f4d6dc511bed9ebc7b1e7fcf0e71393410b289f56b24104e5c4868d23709f4",
7+
"Blip.xml": "66331a40fd9d91e4330901622ab1bbdc82ba217788292b1f807a30e2624d560a",
8+
"CA-State-Board-of-Equalization.xml": "40eda2604a5f9e813b730b16c02462cc669f5023b3e05ea9c58b95698ff66b2a",
9+
"CafePress.xml": "b49b4768e3a293b9d10b79ed43be486cfa115d080549cfc1740e19bfb8b22f4b",
10+
"Caltech.xml": "042dc612e2c72f4a301a85723831437bf10b989aa19fb641a17dba73ffbe58d6",
11+
"Care-Net.xml": "607016d5e3a347555805ecf03412c853154a14bdee9f4df6ec5c768e9e658b1a",
12+
"Cashback.co.uk.xml": "217ff6ddb616b6c5d083457725f10f3a4be6dadea364cc6d7ce991b3ca54bdaf",
13+
"Comic-Con-Intl.xml": "8c916f22f2e2eb2341cbe6ab9deac7e4f30b8417b069ce627f28a4160c00febc",
14+
"Cru.org.xml": "f8652acfddf3457e9e0e85b2bd132a38ff68b381f932ddd3b5a21969b2dcee6c",
15+
"DailyDot.xml": "d6e3dc30934e0371fd209a4b31f31543f2fbaee16a4d96a81176215e2a3dc7ef",
16+
"Epic-Systems.xml": "7b8ae8c613a3a8af91a71295c4ab70980384910c901ad3418114f71d56e70db3",
17+
"Fasthosts.xml": "be9a331d03034ba8a740ba7b54b20d6ccb3b852bef352cad65509db3c1e49ed6",
18+
"First_Class_Magazine.se.xml": "74625aa6750d0e100f2f58097b3d8f4ef42234f6e3c8aaf09b0c0d650c214ac3",
19+
"InkFrog.com.xml": "b0674aba431f6f84a1712c31e7d36a881470fe0a99682fd24229399252192f13",
20+
"Intelliworks_Chat.com.xml": "a2935104eeec79044ee7c456a5a86422d95f7df2c84328b097f07605e5b49683",
21+
"KKH.de.xml": "d7e1e0d8a6e8cabc8f3f66b5c0f31373fe1c5b6226d2e5d5a4ff869aab80e23b",
22+
"Khalsa-Credit-Union.xml": "012ae8fce4391969449ac24dac608aa1243430a64290db9a8986160423c2c9ce",
23+
"Knappschaft-Bahn-See.xml": "1d11fb0e62d795c0e1f849247bb0fbd820621587ee63afd9712a7a448294b53e",
24+
"MYEDDEBT.com.xml": "944ee1959b21eb47077b1bad494a3be3985c59f18b472b2ea0c9204c34ce5a2c",
25+
"MoveOn.xml": "bbe420e68b9c3cfef2fb973e6e91bd385ea169958941c990015f14683d99db37",
26+
"MyEdAccount.Com.xml": "9ef73c2c7642cb622145154e9b86859f1b55598fd5a97f29a9bde51764528fe6",
27+
"Natl-American-Arab-Nursing-Assn.xml": "436ca84d68ab3d9bd61a8b53d936954fcfa84bb3eec4938578f2e4cf4d86ad42",
28+
"Necuhb.xml": "aae1dd28949eb63e1518412b3063cd8db7fb5d1e2fcd31af51b4d9a7e28582ec",
29+
"Pair-Networks.xml": "2942c293649a231016bdaf6d94a045f8053ecb1754c56e9360c0db8a69d22a35",
30+
"Perfect_Audience.xml": "2805cdd9fb991f19b1b3f2c19bb5fe58094930ec0f802c3c4f24004e0c03d519",
31+
"Plymouth-University.xml": "c0dafeb264c87fb39e69321f0d9b5b6c940c8f1049de25f57e3e70ca702523e2",
32+
"SPajIT.xml": "4f5afd28fc7b6c403dda9c120786dcc43c37fab22b1c9c9c36edeba5b15b5704",
33+
"SSSup.it.xml": "c9ad023f61c9cd1d3706e52cc669f633d12053fa22c25c9701e36c06d5fc5cdf",
34+
"Sberbank_of_Russia.xml": "d30a764588f988909b8455f18519f05a8420301505916efe6ffb20e61aae2bac",
35+
"Telus.xml": "4640813b9d6571afc2b0492e6cc87441f160f4d457ed1be96f14c5c74d607ab2",
36+
"US-Dept-of-Veterans-Affairs.xml": "b27a17d365037a96b22427b5ace8e28661fd70e0362822c2cd791cb5f4bfb83d",
37+
"VEVO.xml": "499d9ae9dab918b21217c5f710550576bfc4415653422bbca7f7eaeb18665b7b",
38+
"Yimg.com.xml": "e0aa84f8ba58a339cabda74594b507c6427a9856a57b1ee3232c7f38ab80751a",
39+
"YouVersion.xml": "ccf17e77a962a90d0134a342bbf02a2c48fceaa22672205030959e5eabbb54d1",
40+
"Your-Mailing-List-Provider.xml": "56e89e377a8cc26456aeaf380a689621afd18dd2e445399865874768f48943e1",
41+
"basekit.com.xml": "1be99dfdfb4634cdccca0ed844c0319740db173777c3ed8f3701ee0c955419e7",
42+
"spectator.co.uk.xml": "43193aad792e4d54329e2adaa183bfe551251f43c168a68cda7c28345b173e94"
43+
}

test/validations/relaxng/run.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,59 @@
33

44
import argparse
55
import glob
6+
import hashlib
7+
import json
68
import os
79

810
from lxml import etree
911

10-
# commandline arguments parsing (nobody use it, though)
1112
parser = argparse.ArgumentParser(description="Validate rulesets against relaxng schema.xml")
1213
parser.add_argument("--source_dir", default="src/chrome/content/rules")
14+
parser.add_argument("--https2https_whitelist", default="test/validations/relaxng/https2https_whitelist.json")
1315

1416
args = parser.parse_args()
1517

16-
# XML ruleset files
1718
files = glob.glob(os.path.join(args.source_dir, "*.xml"))
1819

19-
# read the schema file
20-
relaxng_doc = etree.parse('test/validations/relaxng/schema.xml')
20+
relaxng_doc = etree.parse("test/validations/relaxng/schema.xml")
2121
relaxng = etree.RelaxNG(relaxng_doc)
2222

23+
relaxng_doc_https2https = etree.parse("test/validations/relaxng/schema_https2https.xml")
24+
relaxng_https2https = etree.RelaxNG(relaxng_doc_https2https)
25+
26+
https2https_whitelist = None
27+
28+
with open(args.https2https_whitelist, "r") as file:
29+
https2https_whitelist = json.load(file)
30+
2331
exit_code = 0
2432

2533
print("Validation of rulesets against relaxng schema.xml begins...")
2634

2735
for filename in sorted(files):
2836
tree = etree.parse(filename)
2937

30-
if not relaxng.validate(tree):
38+
basename = os.path.basename(filename)
39+
40+
ruleset_relaxng = relaxng
41+
42+
if basename in https2https_whitelist:
43+
with open(filename, "rb") as file:
44+
if hashlib.sha256(file.read()).hexdigest() == https2https_whitelist[basename]:
45+
ruleset_relaxng = relaxng_https2https
46+
47+
if not ruleset_relaxng.validate(tree):
3148
exit_code = 1
32-
e = relaxng.error_log.last_error
49+
e = ruleset_relaxng.error_log.last_error
3350
print("{} {}:{}:{}: {}".format(e.level_name, e.filename, e.line, e.column, e.message))
3451

3552
if exit_code == 0:
36-
message = "Validation of rulesets against relaxng schema.xml succeeded."
53+
print("Validation of rulesets against relaxng schema.xml succeeded.")
3754
else:
38-
message = "\nTwo very common reasons for this are the following:\n" \
39-
" - missing caret (^) in 'from' attribute: it should be \"^http:\" and not \"http:\"\n" \
40-
" - missing trailing slashes in 'from' or 'to' when specifying full hostnames: \n" \
41-
" it should be \"https://eff.org/\" and not \"https://eff.org\"\n\n" \
42-
"Validation of rulesets against relaxng schema.xml failed."
55+
print("Validation of rulesets against relaxng schema.xml failed.\n\n" \
56+
"Two very common reasons for this are the following:\n" \
57+
" - missing caret (^) in \"from\" attribute: it should be \"^http:\" and not \"http:\"\n" \
58+
" - missing trailing slashes in \"from\" or \"to\" when specifying full hostnames: \n" \
59+
" it should be \"https://eff.org/\" and not \"https://eff.org\"\n")
4360

44-
print(message)
4561
exit(exit_code)

test/validations/relaxng/schema.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
the hostname), and at least one other slash (terminating the
6161
hostname, and possible beginning a path). Alternatively it can be
6262
the literal string "^http:". -->
63-
<param name="pattern">(\^.*//.*/.*|\^http:)</param>
63+
<param name="pattern">(\^http://.*/.*|\^http:)</param>
6464
</data>
6565
</attribute>
6666
<attribute name="to">
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
<element xmlns="http://relaxng.org/ns/structure/1.0" name="ruleset" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
2+
<attribute name="name" />
3+
4+
<optional>
5+
<attribute name="default_off" />
6+
</optional>
7+
8+
<optional>
9+
<attribute name="platform">
10+
<data type="string">
11+
<param name="pattern">mixedcontent</param>
12+
</data>
13+
</attribute>
14+
</optional>
15+
16+
<interleave>
17+
18+
<zeroOrMore>
19+
<element name="test">
20+
<attribute name="url">
21+
<data type="string">
22+
<param name="pattern">http[^\\]+</param>
23+
</data>
24+
</attribute>
25+
</element>
26+
</zeroOrMore>
27+
28+
<oneOrMore>
29+
<element name="target">
30+
<attribute name="host">
31+
<data type="string">
32+
<param name="pattern">(([a-z0-9äö_-]+|\*)\.)*([a-z0-9äö-]+|\*)</param>
33+
</data>
34+
</attribute>
35+
</element>
36+
</oneOrMore>
37+
38+
<zeroOrMore>
39+
<element name="exclusion">
40+
<attribute name="pattern" />
41+
</element>
42+
</zeroOrMore>
43+
44+
<zeroOrMore>
45+
<element name="securecookie">
46+
<attribute name="host"/>
47+
<data type="string">
48+
<param name="pattern">[^/]*</param>
49+
</data>
50+
<attribute name="name"/>
51+
</element>
52+
</zeroOrMore>
53+
54+
<oneOrMore>
55+
<element name="rule">
56+
<attribute name="from">
57+
<data type="string">
58+
<!-- The from attribute of a rule must start with a caret.
59+
It also must contain two slashes (separating the protocol from
60+
the hostname), and at least one other slash (terminating the
61+
hostname, and possible beginning a path). Alternatively it can be
62+
the literal string "^http:". -->
63+
<param name="pattern">(\^.*//.*/.*|\^http:)</param>
64+
</data>
65+
</attribute>
66+
<attribute name="to">
67+
<data type="string">
68+
<!-- The to attribute of a rule must not contain spaces or
69+
backslashes. It also must contain at least one slash after the
70+
hostname. Alternatively it can be the literal string "https:"-->
71+
<param name="pattern">(https://[^ \\]*/[^ \\]*|https:)</param>
72+
</data>
73+
</attribute>
74+
</element>
75+
</oneOrMore>
76+
77+
</interleave>
78+
79+
</element>

0 commit comments

Comments
 (0)