fix parse.py to work with multiline input; do the same for test_parser.py; remove the #data at the end of the two test files, test files require two new lines at the end for now; add a testcase which has a new line; make some editorial changes in parser.py

annevk · annevk · commit 7588d9d6168c · 2006-12-24T20:20:08.000Z
--HG--
extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40328
diff --git a/parse.py b/parse.py
@@ -17,7 +17,10 @@ def convertTreeDump(treedump):
     treedump = treedump.split("\n")[1:]
     rv = []
     for line in treedump:
-        rv.append(line[3:])
+        if line.startswith("|"):
+            rv.append(line[3:])
+        else:
+            rv.append(line)
     return "\n".join(rv)
 
 if __name__ == "__main__":
diff --git a/src/parser.py b/src/parser.py
@@ -24,12 +24,12 @@ def __str__(self):
         return self.name
 
     def __repr__(self):
-        return "<%s %s>"%(self.__class__, self.name)
+        return "<%s %s>" % (self.__class__, self.name)
 
     def printTree(self, indent=0):
-        tree = '\n|%s%s' % (' '*indent, str(self))
+        tree = '\n|%s%s' % (' '* indent, str(self))
         for child in self.childNodes:
-            tree += child.printTree(indent+2)
+            tree += child.printTree(indent + 2)
         return tree
 
     def appendChild(self, node, index=None):
@@ -69,7 +69,7 @@ def __init__(self):
         Node.__init__(self, None)
 
     def __str__(self):
-        return '#document'
+        return "#document"
 
     def printTree(self):
         tree = str(self)
@@ -82,29 +82,29 @@ def __init__(self, name):
         Node.__init__(self, name)
 
     def __str__(self):
-        return '<!DOCTYPE %s>' % self.name
+        return "<!DOCTYPE %s>" % self.name
 
 class TextNode(Node):
     def __init__(self, value):
         Node.__init__(self, None)
         self.value = value
 
     def __str__(self):
-        return '"%s"' % self.value
+        return "\"%s\"" % self.value
 
 class Element(Node):
     def __init__(self, name):
         Node.__init__(self, name)
 
     def __str__(self):
-        return '<%s>' % self.name
+        return "<%s>" % self.name
 
     def printTree(self, indent):
         tree = '\n|%s%s' % (' '*indent, str(self))
         indent += 2
         if self.attributes:
             for name, value in self.attributes.iteritems():
-                tree += '\n|%s%s="%s"' % (' '*indent, name, value)
+                tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
         for child in self.childNodes:
             tree += child.printTree(indent)
         return tree
@@ -115,7 +115,7 @@ def __init__(self, data):
         self.data = data
 
     def __str__(self):
-        return '<!-- %s -->' % self.data
+        return "<!-- %s -->" % self.data
 
 class HTMLParser(object):
     """Main parser class"""
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -8,6 +8,8 @@
 def parseTestcase(testString):
     testString = testString.split("\n")
     try:
+        if testString[0] != "#data":
+            print testString
         assert testString[0] == "#data"
     except:
         raise
@@ -16,12 +18,13 @@ def parseTestcase(testString):
     errors = []
     currentList = input
     for line in testString:
-        if line and line[0] != "#":
+        if line and not (line.startswith("#errors") or
+          line.startswith("#document") or line.startswith("#data")):
             if currentList is output:
-                assert line[0] == "|"
-                currentList.append(line[2:])
-                # XXX the line might not start with a "|" if it's a
-                # continuation line, e.g. if a text node contained a linefeed
+                if line.startswith("|"):
+                    currentList.append(line[2:])
+                else:
+                    currentList.append(line)
             else:
                 currentList.append(line)
         elif line == "#errors":
@@ -35,7 +38,10 @@ def convertTreeDump(treedump):
     treedump = treedump.split("\n")[1:]
     rv = []
     for line in treedump:
-        rv.append(line[3:])
+        if line.startswith("|"):
+            rv.append(line[3:])
+        else:
+            rv.append(line)
     return "\n".join(rv)
 
 class TestCase(unittest.TestCase):
@@ -45,26 +51,30 @@ def runParserTest(self, input, output, errors):
         #concatenate all consecutive character tokens into a single token
         p = parser.HTMLParser()
         document = p.parse(StringIO.StringIO(input))
-        errorMsg = "\n".join(["\n\nExpected:", output, "\nRecieved:", 
-                              convertTreeDump(document.printTree())])
-        self.assertEquals(output, convertTreeDump(document.printTree()), 
-                          errorMsg)
+        errorMsg = "\n".join(["\n\nExpected:", output, "\nRecieved:",
+          convertTreeDump(document.printTree())])
+        self.assertEquals(output, convertTreeDump(document.printTree()),
+          errorMsg)
 
 def test_parser():
     for filename in glob.glob('tree-construction/*.dat'):
         f = open(filename)
         test = []
-        lastLine = ""
+        documentSeen = False
         for line in f:
-            #Assume tests are separated by a blank line
-            if not (line == "\n" and lastLine[0] == "|"):
-                #Strip out newline characters from the end of the string
+            # XXX This algorithm would need to be changed if we want to get rid
+            # of the double newline requirement at the end of test files.
+            if line.startswith("#document"):
+                documentSeen = True
+            if not line == "\n":
+                test.append(line[:-1])
+            elif line == "\n" and not documentSeen:
                 test.append(line[:-1])
             else:
                 input, output, errors = parseTestcase("\n".join(test))
                 yield TestCase.runParserTest, input, output, errors
                 test = []
-            lastLine = line
+                documentSeen = False
 
 def buildTestSuite():
     tests = 0