Skip to content

Commit fc3910b

Browse files
committed
Empty tags are only a parse error for HTML
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40442
1 parent 77d539d commit fc3910b

3 files changed

Lines changed: 20 additions & 23 deletions

File tree

src/html5parser.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import utils
3030
from constants import contentModelFlags, spaceCharacters, asciiUpper2Lower
3131
from constants import scopingElements, formattingElements, specialElements
32-
from constants import headingElements, tableInsertModeElements
32+
from constants import headingElements, tableInsertModeElements, voidElements
3333

3434
class HTMLParser(object):
3535
"""HTML parser. Generates a tree structure from a stream of (possibly
@@ -105,10 +105,8 @@ def parse(self, stream, innerHTML=False):
105105
method(token["name"], token["data"])
106106
elif type == "EndTag":
107107
method(token["name"])
108-
elif type == "ParseError":
109-
self.parseError(token["data"])
110108
else:
111-
self.atheistParseError()
109+
self.parseError(token["data"])
112110

113111
# When the loop finishes it's EOF
114112
self.phase.processEOF()
@@ -129,6 +127,16 @@ def normalizeToken(self, token):
129127
""" HTML5 specific normalizations to the token stream """
130128

131129
if token["type"] == "EmptyTag":
130+
# When a solidus (/) is encountered within a tag name what happens
131+
# depends on whether the current tag name matches that of a void
132+
# element. If it matches a void element atheists did the wrong
133+
# thing and if it doesn't it's wrong for everyone.
134+
135+
if token["name"] in voidElements:
136+
self.atheistParseError()
137+
else:
138+
self.parseError(_("Solidus (/) incorrectly placed in tag."))
139+
132140
token["type"] = "StartTag"
133141

134142
if token["type"] == "StartTag":

src/tokenizer.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
_ = gettext.gettext
99

1010
from constants import contentModelFlags, spaceCharacters
11-
from constants import entitiesWindows1252, entities, voidElements
11+
from constants import entitiesWindows1252, entities
1212
from constants import asciiLowercase, asciiLetters
1313
from constants import digits, hexDigits, EOF
1414

@@ -87,27 +87,19 @@ def __iter__(self):
8787

8888
# Below are various helper functions the tokenizer states use worked out.
8989
def processSolidusInTag(self):
90-
"""When a solidus (/) is encountered within a tag name what happens
91-
depends on whether the current tag name matches that of a void element.
92-
If it matches a void element atheists did the wrong thing and if it
93-
doesn't it's wrong for everyone.
90+
"""If the next character is a '>', convert the currentToken into
91+
an EmptyTag
9492
"""
9593

96-
# We need to consume another character to make sure it's a ">" before
97-
# throwing an atheist parse error.
94+
# We need to consume another character to make sure it's a ">"
9895
data = self.stream.char()
9996

100-
if self.currentToken["name"] in voidElements and data == u">":
101-
self.tokenQueue.append({"type": "AtheistParseError", "data":
102-
_("Solidus (/) incorrectly placed in tag (atheists only).")})
97+
if self.currentToken["type"] == "StartTag" and data == u">":
98+
self.currentToken["type"] = "EmptyTag"
10399
else:
104100
self.tokenQueue.append({"type": "ParseError", "data":
105101
_("Solidus (/) incorrectly placed in tag.")})
106102

107-
# XML/XHTML enablement hook
108-
if self.currentToken["type"] == "StartTag" and data == u">":
109-
self.currentToken["type"] = "EmptyTag"
110-
111103
# The character we just consumed need to be put back on the stack so it
112104
# doesn't get lost...
113105
self.stream.queue.append(data)

tests/test_tokenizer.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ def processStartTag(self, token):
4141
self.outputTokens.append([u"StartTag", token["name"], token["data"]])
4242

4343
def processEmptyTag(self, token):
44-
# TODO: convert tests to reflect EmptyTags
44+
if token["name"] not in constants.voidElements:
45+
self.outputTokens.append(u"ParseError")
4546
self.outputTokens.append([u"StartTag", token["name"], token["data"]])
4647

4748
def processEndTag(self, token):
@@ -63,10 +64,6 @@ def processEOF(self, token):
6364
def processParseError(self, token):
6465
self.outputTokens.append(u"ParseError")
6566

66-
def processAtheistParseError(self, token):
67-
"""This error is not an error"""
68-
pass
69-
7067
def concatenateCharacterTokens(tokens):
7168
outputTokens = []
7269
for token in tokens:

0 commit comments

Comments
 (0)