Empty tags are only a parse error for HTML

rubys · rubys · commit fc3910bba3c3 · 2007-01-09T16:08:34.000Z
--HG--
extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40442
diff --git a/src/html5parser.py b/src/html5parser.py
@@ -29,7 +29,7 @@
 import utils
 from constants import contentModelFlags, spaceCharacters, asciiUpper2Lower
 from constants import scopingElements, formattingElements, specialElements
-from constants import headingElements, tableInsertModeElements
+from constants import headingElements, tableInsertModeElements, voidElements
 
 class HTMLParser(object):
     """HTML parser. Generates a tree structure from a stream of (possibly
@@ -105,10 +105,8 @@ def parse(self, stream, innerHTML=False):
                 method(token["name"], token["data"])
             elif type == "EndTag":
                 method(token["name"])
-            elif type == "ParseError":
-                self.parseError(token["data"])
             else:
-                self.atheistParseError()
+                self.parseError(token["data"])
 
         # When the loop finishes it's EOF
         self.phase.processEOF()
@@ -129,6 +127,16 @@ def normalizeToken(self, token):
         """ HTML5 specific normalizations to the token stream """
        
         if token["type"] == "EmptyTag":
+            # When a solidus (/) is encountered within a tag name what happens
+            # depends on whether the current tag name matches that of a void
+            # element.  If it matches a void element atheists did the wrong
+            # thing and if it doesn't it's wrong for everyone.
+
+            if token["name"] in voidElements:
+                self.atheistParseError()
+            else:
+                self.parseError(_("Solidus (/) incorrectly placed in tag."))
+
             token["type"] = "StartTag"
 
         if token["type"] == "StartTag":
diff --git a/src/tokenizer.py b/src/tokenizer.py
@@ -8,7 +8,7 @@
 _ = gettext.gettext
 
 from constants import contentModelFlags, spaceCharacters
-from constants import entitiesWindows1252, entities, voidElements
+from constants import entitiesWindows1252, entities
 from constants import asciiLowercase, asciiLetters
 from constants import digits, hexDigits, EOF
 
@@ -87,27 +87,19 @@ def __iter__(self):
 
     # Below are various helper functions the tokenizer states use worked out.
     def processSolidusInTag(self):
-        """When a solidus (/) is encountered within a tag name what happens
-        depends on whether the current tag name matches that of a void element.
-        If it matches a void element atheists did the wrong thing and if it
-        doesn't it's wrong for everyone.
+        """If the next character is a '>', convert the currentToken into
+        an EmptyTag
         """
 
-        # We need to consume another character to make sure it's a ">" before
-        # throwing an atheist parse error.
+        # We need to consume another character to make sure it's a ">"
         data = self.stream.char()
 
-        if self.currentToken["name"] in voidElements and data == u">":
-            self.tokenQueue.append({"type": "AtheistParseError", "data":
-              _("Solidus (/) incorrectly placed in tag (atheists only).")})
+        if self.currentToken["type"] == "StartTag" and data == u">":
+            self.currentToken["type"] = "EmptyTag"
         else:
             self.tokenQueue.append({"type": "ParseError", "data":
               _("Solidus (/) incorrectly placed in tag.")})
 
-        # XML/XHTML enablement hook
-        if self.currentToken["type"] == "StartTag" and data == u">":
-            self.currentToken["type"] = "EmptyTag"
-
         # The character we just consumed need to be put back on the stack so it
         # doesn't get lost...
         self.stream.queue.append(data)
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
@@ -41,7 +41,8 @@ def processStartTag(self, token):
         self.outputTokens.append([u"StartTag", token["name"], token["data"]])
 
     def processEmptyTag(self, token):
-        # TODO: convert tests to reflect EmptyTags
+        if token["name"] not in constants.voidElements:
+            self.outputTokens.append(u"ParseError")
         self.outputTokens.append([u"StartTag", token["name"], token["data"]])
 
     def processEndTag(self, token):
@@ -63,10 +64,6 @@ def processEOF(self, token):
     def processParseError(self, token):
         self.outputTokens.append(u"ParseError")
 
-    def processAtheistParseError(self, token):
-        """This error is not an error"""
-        pass
-
 def concatenateCharacterTokens(tokens):
     outputTokens = []
     for token in tokens: