Skip to content

Commit 651b093

Browse files
author
tim_one
committed
Replaced the ELLIPSIS implementation with a worst-case linear-time one.
git-svn-id: http://svn.python.org/projects/python/trunk@37051 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 4d5825e commit 651b093

2 files changed

Lines changed: 66 additions & 23 deletions

File tree

Lib/doctest.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,50 @@ def truncate(self, size=None):
362362
if hasattr(self, "softspace"):
363363
del self.softspace
364364

365+
# Worst-case linear-time ellipsis matching.
366+
def ellipsis_match(want, got):
367+
if ELLIPSIS_MARKER not in want:
368+
return want == got
369+
# Remove \n from ...\n, else the newline will be required,
370+
# and (for example) ... on a line by itself can't match
371+
# nothing gracefully.
372+
want = want.replace(ELLIPSIS_MARKER + '\n', ELLIPSIS_MARKER)
373+
# Find "the real" strings.
374+
ws = want.split(ELLIPSIS_MARKER)
375+
assert len(ws) >= 2
376+
# Match. In general, we only need to find the leftmost non-overlapping
377+
# match for each piece. "Real strings" at the start or end of `want`
378+
# are special cases.
379+
w = ws[0]
380+
if w:
381+
# An ellipsis didn't start `want`. We need to match exactly
382+
# at the start.
383+
if not got.startswith(w):
384+
return False
385+
pos = len(w)
386+
del ws[0]
387+
else:
388+
pos = 0
389+
390+
for w in ws:
391+
# w may be '' at times, if there are consecutive ellipses, or
392+
# due to an ellipsis at the start or end of `want`. That's OK.
393+
# Search for an empty string succeeds, and doesn't change pos.
394+
pos = got.find(w, pos)
395+
if pos < 0:
396+
return False
397+
pos += len(w)
398+
399+
# If `want` ended with an ellipsis, the tail matches anything.
400+
if ws[-1] == '':
401+
return True
402+
# Else `want` ended with a real string. If the last real match
403+
# exhausted `got`, we win.
404+
if pos == len(got):
405+
return True
406+
# Else maybe we matched the last real string too early.
407+
return got.endswith(ws[-1])
408+
365409
######################################################################
366410
## 2. Example & DocTest
367411
######################################################################
@@ -1475,22 +1519,9 @@ def check_output(self, want, got, optionflags):
14751519
return True
14761520

14771521
# The ELLIPSIS flag says to let the sequence "..." in `want`
1478-
# match any substring in `got`. We implement this by
1479-
# transforming `want` into a regular expression.
1522+
# match any substring in `got`.
14801523
if optionflags & ELLIPSIS:
1481-
# Remove \n from ...\n, else the newline will be required,
1482-
# and (for example) ... on a line by itself can't match
1483-
# nothing gracefully.
1484-
want_re = want.replace(ELLIPSIS_MARKER + '\n', ELLIPSIS_MARKER)
1485-
# Escape any special regexp characters
1486-
want_re = re.escape(want_re)
1487-
# Replace escaped ellipsis markers ('\.\.\.') with .*
1488-
want_re = want_re.replace(re.escape(ELLIPSIS_MARKER), '.*')
1489-
# Require that it matches the entire string; and set the
1490-
# re.DOTALL flag (with '(?s)').
1491-
want_re = '(?s)^%s$' % want_re
1492-
# Check if the `want_re` regexp matches got.
1493-
if re.match(want_re, got):
1524+
if ellipsis_match(want, got):
14941525
return True
14951526

14961527
# We didn't find any match; return false.

Lib/test/test_doctest.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -780,29 +780,41 @@ def optionflags(): r"""
780780
>>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test)
781781
(0, 1)
782782
783-
... should also match nothing gracefully:
784-
XXX This can be provoked into requiring exponential time by adding more
785-
XXX ellipses; the implementation should change. It's much easier to
786-
XXX provoke exponential time with expected output that doesn't match,
787-
XXX BTW (then multiple regexp .* thingies each try all possiblities,
788-
XXX multiplicatively, without hope of success). That's the real danger,
789-
XXX that a failing test will appear to be hung.
783+
... should also match nothing gracefully (note that a regular-expression
784+
implementation of ELLIPSIS would take a loooong time to match this one!):
790785
791786
>>> for i in range(100):
792787
... print i**2 #doctest: +ELLIPSIS
793788
0
794789
...
795790
1
796791
...
792+
......
793+
...
797794
36
798795
...
799796
...
797+
...
800798
49
801799
64
802-
......
800+
.........
803801
9801
804802
...
805803
804+
... can be surprising; e.g., this test passes:
805+
806+
>>> for i in range(21): #doctest: +ELLIPSIS
807+
... print i
808+
0
809+
1
810+
2
811+
...
812+
1
813+
...
814+
2
815+
...
816+
0
817+
806818
The UNIFIED_DIFF flag causes failures that involve multi-line expected
807819
and actual outputs to be displayed using a unified diff:
808820

0 commit comments

Comments
 (0)