Skip to content

Commit 4964511

Browse files
Chan Chak ShingHainish
authored andcommitted
Update https_everywhere_checker/check_rules.py, Fix EFForg#10877 (EFForg#11187)
1 parent 454fb4a commit 4964511

File tree

4 files changed

+63
-1
lines changed

4 files changed

+63
-1
lines changed

test/rules/coverage.checker.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
[rulesets]
55
rulesdir = src/chrome/content/rules
66
check_coverage = true
7+
check_target_validity = true
78
check_nonmatch_groups = true
89
check_test_formatting = true
910
include_default_off = false

test/rules/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ pycurl
22
regex
33
bsdiff4
44
python-Levenshtein
5+
tldextract

test/rules/src/https_everywhere_checker/check_rules.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,18 @@ def fetchUrl(self, plainUrl, transformedUrl, fetcherPlain, fetcherRewriting, rul
130130
logging.debug("Fetching plain page %s", plainUrl)
131131
# If we get an exception (e.g. connection refused,
132132
# connection timeout) on the plain page, don't treat
133-
# that as a failure.
133+
# that as a failure (except DNS resolution errors)
134134
plainRcode, plainPage = None, None
135135
try:
136136
plainRcode, plainPage = fetcherPlain.fetchHtml(plainUrl)
137137
except Exception, e:
138+
errno, message = e
139+
if errno == 6:
140+
message = "Fetch error: %s => %s: %s" % (
141+
plainUrl, transformedUrl, e)
142+
self.queue_result("error", "fetch-error %s"% e, ruleFname, plainUrl, https_url=transformedUrl)
143+
return message
144+
138145
logging.debug("Non-fatal fetch error for plain page %s: %s" % (plainUrl, e))
139146

140147
# Compare HTTP return codes - if original page returned 2xx,
@@ -290,6 +297,9 @@ def cli():
290297
checkCoverage = False
291298
if config.has_option("rulesets", "check_coverage"):
292299
checkCoverage = config.getboolean("rulesets", "check_coverage")
300+
checkTargetValidity = False
301+
if config.has_option("rulesets", "check_target_validity"):
302+
checkTargetValidity = config.getboolean("rulesets", "check_target_validity")
293303
checkNonmatchGroups = False
294304
if config.has_option("rulesets", "check_nonmatch_groups"):
295305
checkNonmatchGroups = config.getboolean("rulesets", "check_nonmatch_groups")
@@ -341,6 +351,7 @@ def cli():
341351

342352
rulesets = []
343353
coverageProblemsExist = False
354+
targetValidityProblemExist = False
344355
nonmatchGroupProblemsExist = False
345356
testFormattingProblemsExist = False
346357
for xmlFname in xmlFnames:
@@ -363,6 +374,12 @@ def cli():
363374
for problem in problems:
364375
coverageProblemsExist = True
365376
logging.error(problem)
377+
if checkTargetValidity:
378+
logging.debug("Checking target validity for '%s'." % ruleset.name)
379+
problems = ruleset.getTargetValidityProblems()
380+
for problem in problems:
381+
targetValidityProblemExist = True
382+
logging.error(problem)
366383
if checkNonmatchGroups:
367384
logging.debug("Checking non-match groups for '%s'." % ruleset.name)
368385
problems = ruleset.getNonmatchGroupProblems()
@@ -445,6 +462,9 @@ def cli():
445462
if checkCoverage:
446463
if coverageProblemsExist:
447464
return 1 # exit with error code
465+
if checkTargetValidity:
466+
if targetValidityProblemExist:
467+
return 1 # exit with error code
448468
if checkNonmatchGroups:
449469
if nonmatchGroupProblemsExist:
450470
return 1 # exit with error code

test/rules/src/https_everywhere_checker/rules.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from tldextract import tldextract
12
from urlparse import urlparse
23

34
import regex
5+
import socket
46

57
class Rule(object):
68
"""Represents one from->to rule element."""
@@ -179,6 +181,44 @@ def _determineTestApplication(self):
179181
self.determine_test_application_run = True
180182
return self.test_application_problems
181183

184+
def getTargetValidityProblems(self):
185+
"""Verify that each target has a valid TLD in order to prevent problematic rewrite
186+
as stated in EFForg/https-everywhere/issues/10877. In particular,
187+
right-wildcard target are ignored from this test.
188+
189+
Returns an array of strings reporting any coverage problems if they exist,
190+
or empty list if coverage is sufficient.
191+
"""
192+
problems = self._determineTestApplication()
193+
# Next, make sure each target has a valid TLD
194+
for target in self.targets:
195+
# Ignore right-wildcard targets
196+
if target.endswith(".*"):
197+
continue
198+
199+
# Ignore if target is an ipv4 address
200+
try:
201+
socket.inet_aton(target)
202+
continue
203+
except:
204+
pass
205+
206+
# Ignore if target is an ipv6 address
207+
try:
208+
socket.inet_pton(socket.AF_INET6, target)
209+
continue
210+
except:
211+
pass
212+
213+
# Extract TLD from target if possible
214+
res = tldextract.extract(target)
215+
if res.suffix == "":
216+
problems.append("%s: Target '%s' missing eTLD" % (self.filename, target))
217+
elif res.domain == "":
218+
problems.append("%s: Target '%s' containing entire eTLD" % (self.filename, target))
219+
220+
return problems
221+
182222
def getCoverageProblems(self):
183223
"""Verify that each rule and each exclusion has the right number of tests
184224
that applies to it. TODO: Also check that each target has the right

0 commit comments

Comments
 (0)