Skip to content

Commit 7588d9d

Browse files
committed
fix parse.py to work with multiline input; do the same for test_parser.py; remove the #data at the end of the two test files, test files require two new lines at the end for now; add a testcase which has a new line; make some editorial changes in parser.py
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40328
1 parent 9831cc6 commit 7588d9d

3 files changed

Lines changed: 38 additions & 25 deletions

File tree

parse.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ def convertTreeDump(treedump):
1717
treedump = treedump.split("\n")[1:]
1818
rv = []
1919
for line in treedump:
20-
rv.append(line[3:])
20+
if line.startswith("|"):
21+
rv.append(line[3:])
22+
else:
23+
rv.append(line)
2124
return "\n".join(rv)
2225

2326
if __name__ == "__main__":

src/parser.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ def __str__(self):
2424
return self.name
2525

2626
def __repr__(self):
27-
return "<%s %s>"%(self.__class__, self.name)
27+
return "<%s %s>" % (self.__class__, self.name)
2828

2929
def printTree(self, indent=0):
30-
tree = '\n|%s%s' % (' '*indent, str(self))
30+
tree = '\n|%s%s' % (' '* indent, str(self))
3131
for child in self.childNodes:
32-
tree += child.printTree(indent+2)
32+
tree += child.printTree(indent + 2)
3333
return tree
3434

3535
def appendChild(self, node, index=None):
@@ -69,7 +69,7 @@ def __init__(self):
6969
Node.__init__(self, None)
7070

7171
def __str__(self):
72-
return '#document'
72+
return "#document"
7373

7474
def printTree(self):
7575
tree = str(self)
@@ -82,29 +82,29 @@ def __init__(self, name):
8282
Node.__init__(self, name)
8383

8484
def __str__(self):
85-
return '<!DOCTYPE %s>' % self.name
85+
return "<!DOCTYPE %s>" % self.name
8686

8787
class TextNode(Node):
8888
def __init__(self, value):
8989
Node.__init__(self, None)
9090
self.value = value
9191

9292
def __str__(self):
93-
return '"%s"' % self.value
93+
return "\"%s\"" % self.value
9494

9595
class Element(Node):
9696
def __init__(self, name):
9797
Node.__init__(self, name)
9898

9999
def __str__(self):
100-
return '<%s>' % self.name
100+
return "<%s>" % self.name
101101

102102
def printTree(self, indent):
103103
tree = '\n|%s%s' % (' '*indent, str(self))
104104
indent += 2
105105
if self.attributes:
106106
for name, value in self.attributes.iteritems():
107-
tree += '\n|%s%s="%s"' % (' '*indent, name, value)
107+
tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
108108
for child in self.childNodes:
109109
tree += child.printTree(indent)
110110
return tree
@@ -115,7 +115,7 @@ def __init__(self, data):
115115
self.data = data
116116

117117
def __str__(self):
118-
return '<!-- %s -->' % self.data
118+
return "<!-- %s -->" % self.data
119119

120120
class HTMLParser(object):
121121
"""Main parser class"""

tests/test_parser.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
def parseTestcase(testString):
99
testString = testString.split("\n")
1010
try:
11+
if testString[0] != "#data":
12+
print testString
1113
assert testString[0] == "#data"
1214
except:
1315
raise
@@ -16,12 +18,13 @@ def parseTestcase(testString):
1618
errors = []
1719
currentList = input
1820
for line in testString:
19-
if line and line[0] != "#":
21+
if line and not (line.startswith("#errors") or
22+
line.startswith("#document") or line.startswith("#data")):
2023
if currentList is output:
21-
assert line[0] == "|"
22-
currentList.append(line[2:])
23-
# XXX the line might not start with a "|" if it's a
24-
# continuation line, e.g. if a text node contained a linefeed
24+
if line.startswith("|"):
25+
currentList.append(line[2:])
26+
else:
27+
currentList.append(line)
2528
else:
2629
currentList.append(line)
2730
elif line == "#errors":
@@ -35,7 +38,10 @@ def convertTreeDump(treedump):
3538
treedump = treedump.split("\n")[1:]
3639
rv = []
3740
for line in treedump:
38-
rv.append(line[3:])
41+
if line.startswith("|"):
42+
rv.append(line[3:])
43+
else:
44+
rv.append(line)
3945
return "\n".join(rv)
4046

4147
class TestCase(unittest.TestCase):
@@ -45,26 +51,30 @@ def runParserTest(self, input, output, errors):
4551
#concatenate all consecutive character tokens into a single token
4652
p = parser.HTMLParser()
4753
document = p.parse(StringIO.StringIO(input))
48-
errorMsg = "\n".join(["\n\nExpected:", output, "\nRecieved:",
49-
convertTreeDump(document.printTree())])
50-
self.assertEquals(output, convertTreeDump(document.printTree()),
51-
errorMsg)
54+
errorMsg = "\n".join(["\n\nExpected:", output, "\nRecieved:",
55+
convertTreeDump(document.printTree())])
56+
self.assertEquals(output, convertTreeDump(document.printTree()),
57+
errorMsg)
5258

5359
def test_parser():
5460
for filename in glob.glob('tree-construction/*.dat'):
5561
f = open(filename)
5662
test = []
57-
lastLine = ""
63+
documentSeen = False
5864
for line in f:
59-
#Assume tests are separated by a blank line
60-
if not (line == "\n" and lastLine[0] == "|"):
61-
#Strip out newline characters from the end of the string
65+
# XXX This algorithm would need to be changed if we want to get rid
66+
# of the double newline requirement at the end of test files.
67+
if line.startswith("#document"):
68+
documentSeen = True
69+
if not line == "\n":
70+
test.append(line[:-1])
71+
elif line == "\n" and not documentSeen:
6272
test.append(line[:-1])
6373
else:
6474
input, output, errors = parseTestcase("\n".join(test))
6575
yield TestCase.runParserTest, input, output, errors
6676
test = []
67-
lastLine = line
77+
documentSeen = False
6878

6979
def buildTestSuite():
7080
tests = 0

0 commit comments

Comments
 (0)