import sys from . import inputstream from . import tokenizer from . import treebuilders from .treebuilders._base import Marker from .treebuilders import simpletree from . import utils from .constants import contentModelFlags, spaceCharacters, asciiUpper2Lower from .constants import scopingElements, formattingElements, specialElements from .constants import headingElements, tableInsertModeElements from .constants import cdataElements, rcdataElements, voidElements from .constants import tokenTypes def parse(doc, treebuilderName="simpletree", encoding=None, implementation=None): tb = treebuilders.getTreeBuilder(treebuilderName, implementation=implementation) p = HTMLParser(tb) return p.parse(doc, encoding=encoding) class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" def __init__(self, tree = simpletree.TreeBuilder, tokenizer = tokenizer.HTMLTokenizer, strict = False, namespaceHTMLElements = False): """ strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) tokenizer - a class that provides a stream of tokens to the treebuilder. This may be replaced for e.g. a sanitizer which converts some tags to text """ # Raise an exception on the first error encountered self.strict = strict self.tree = tree(namespaceHTMLElements) self.tokenizer_class = tokenizer self.errors = [] self.phases = { "initial": InitialPhase(self, self.tree), "beforeHtml": BeforeHtmlPhase(self, self.tree), "beforeHead": BeforeHeadPhase(self, self.tree), "inHead": InHeadPhase(self, self.tree), # XXX "inHeadNoscript": InHeadNoScriptPhase(self, self.tree), "afterHead": AfterHeadPhase(self, self.tree), "inBody": InBodyPhase(self, self.tree), "inCDataRCData": InCDataRCDataPhase(self, self.tree), "inTable": InTablePhase(self, self.tree), "inCaption": InCaptionPhase(self, self.tree), "inColumnGroup": InColumnGroupPhase(self, self.tree), "inTableBody": InTableBodyPhase(self, self.tree), "inRow": InRowPhase(self, self.tree), "inCell": InCellPhase(self, self.tree), "inSelect": InSelectPhase(self, self.tree), "inSelectInTable": InSelectInTablePhase(self, self.tree), "afterBody": AfterBodyPhase(self, self.tree), "inFrameset": InFramesetPhase(self, self.tree), "afterFrameset": AfterFramesetPhase(self, self.tree), "afterAfterBody": AfterAfterBodyPhase(self, self.tree), "afterAfterFrameset": AfterAfterFramesetPhase(self, self.tree), # XXX after after frameset } def _parse(self, stream, innerHTML=False, container="div", encoding=None, parseMeta=True, useChardet=True, **kwargs): self.tree.reset() self.firstStartTag = False self.errors = [] # "quirks" / "limited quirks" / "no quirks" self.compatMode = "no quirks" self.tokenizer = self.tokenizer_class(stream, encoding=encoding, parseMeta=parseMeta, useChardet=useChardet, **kwargs) if innerHTML: self.innerHTML = container.lower() if self.innerHTML in cdataElements: self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["RCDATA"] elif self.innerHTML in rcdataElements: self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["CDATA"] elif self.innerHTML == 'plaintext': self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["PLAINTEXT"] else: # contentModelFlag already is PCDATA #self.tokenizer.contentModelFlag = tokenizer.contentModelFlags["PCDATA"] pass self.phase = self.phases["beforeHtml"] self.phase.insertHtmlElement() self.resetInsertionMode() else: self.innerHTML = False self.phase = self.phases["initial"] # We only seem to have InBodyPhase testcases where the following is # relevant ... need others too self.lastPhase = None self.beforeRCDataPhase = None CharactersToken = tokenTypes["Characters"] SpaceCharactersToken = tokenTypes["SpaceCharacters"] StartTagToken = tokenTypes["StartTag"] EndTagToken = tokenTypes["EndTag"] CommentToken = tokenTypes["Comment"] DoctypeToken = tokenTypes["Doctype"] for token in self.normalizedTokens(): #print self.phase.__class__.__name__ #print token type = token["type"] if type == CharactersToken: self.phase.processCharacters(token) elif type == SpaceCharactersToken: self.phase.processSpaceCharacters(token) elif type == StartTagToken: self.selfClosingAcknowledged = False self.phase.processStartTag(token) if (token["selfClosing"] and not self.selfClosingAcknowledged): self.parseError("non-void-element-with-trailing-soldius", {"name":token["name"]}) elif type == EndTagToken: self.phase.processEndTag(token) elif type == CommentToken: self.phase.processComment(token) elif type == DoctypeToken: self.phase.processDoctype(token) else: self.parseError(token["data"], token.get("datavars", {})) # When the loop finishes it's EOF self.phase.processEOF() def normalizedTokens(self): for token in self.tokenizer: yield self.normalizeToken(token) def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): """Parse a HTML document into a well-formed tree stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ self._parse(stream, innerHTML=False, encoding=encoding) return self.tree.getDocument() def parseFragment(self, stream, container="div", encoding=None, parseMeta=False, useChardet=True): """Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property if set to None, default to 'div' stream - a filelike object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ self._parse(stream, True, container=container, encoding=encoding) return self.tree.getFragment() def parseError(self, errorcode="XXX-undefined-error", datavars={}): # XXX The idea is to make errorcode mandatory. self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) if self.strict: raise ParseError def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == tokenTypes["StartTag"]: token["data"] = dict(token["data"][::-1]) return token def resetInsertionMode(self): # The name of this method is mostly historical. (It's also used in the # specification.) last = False newModes = { "select":"inSelect", "td":"inCell", "th":"inCell", "tr":"inRow", "tbody":"inTableBody", "thead":"inTableBody", "tfoot":"inTableBody", "caption":"inCaption", "colgroup":"inColumnGroup", "table":"inTable", "head":"inBody", "body":"inBody", "frameset":"inFrameset" } for node in self.tree.openElements[::-1]: nodeName = node.name if node == self.tree.openElements[0]: last = True if nodeName not in ['td', 'th']: # XXX assert self.innerHTML nodeName = self.innerHTML # Check for conditions that should only happen in the innerHTML # case if nodeName in ("select", "colgroup", "head", "frameset"): # XXX assert self.innerHTML if nodeName in newModes: self.phase = self.phases[newModes[nodeName]] break elif nodeName == "html": if self.tree.headPointer is None: self.phase = self.phases["beforeHead"] else: self.phase = self.phases["afterHead"] break elif last: self.phase = self.phases["inBody"] break def parseRCDataCData(self, token, contentType): """Generic (R)CDATA Parsing algorithm contentType - RCDATA or CDATA """ assert contentType in ("CDATA", "RCDATA") element = self.tree.insertElement(token) self.tokenizer.contentModelFlag = contentModelFlags[contentType] self.originalPhase = self.phase self.phase = self.phases["inCDataRCData"] class Phase(object): """Base class for helper object that implements each phase of processing """ # Order should be (they can be omitted): # * EOF # * Comment # * Doctype # * SpaceCharacters # * Characters # * StartTag # - startTag* methods # * EndTag # - endTag* methods def __init__(self, parser, tree): self.parser = parser self.tree = tree def processEOF(self): raise NotImplementedError def processComment(self, token): # For most phases the following is correct. Where it's not it will be # overridden. self.tree.insertComment(token, self.tree.openElements[-1]) def processDoctype(self, token): self.parser.parseError("unexpected-doctype") def processSpaceCharacters(self, token): self.tree.insertText(token["data"]) def processStartTag(self, token): self.startTagHandler[token["name"]](token) def startTagHtml(self, token): if self.parser.firstStartTag == False and token["name"] == "html": self.parser.parseError("non-html-root") # XXX Need a check here to see if the first start tag token emitted is # this token... If it's not, invoke self.parser.parseError(). for attr, value in token["data"].items(): if attr not in self.tree.openElements[0].attributes: self.tree.openElements[0].attributes[attr] = value self.parser.firstStartTag = False def processEndTag(self, token): self.endTagHandler[token["name"]](token) class InitialPhase(Phase): # This phase deals with error handling as well which is currently not # covered in the specification. The error handling is typically known as # "quirks mode". It is expected that a future version of HTML5 will defin # this. def processEOF(self): self.parser.parseError("expected-doctype-but-got-eof") self.parser.compatMode = "quirks" self.parser.phase = self.parser.phases["beforeHtml"] self.parser.phase.processEOF() def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] correct = token["correct"] if (name != "html" or publicId != None or systemId != None): self.parser.parseError("unknown-doctype") if publicId is None: publicId = "" if systemId is None: systemId = "" self.tree.insertDoctype(token) if publicId != "": publicId = publicId.translate(asciiUpper2Lower) if ((not correct) or token["name"] != "html" or publicId in ("+//silmaril//dtd html pro v0r11 19970101//en", "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en", "-//as//dtd html 3.0 aswedit + extensions//en", "-//ietf//dtd html 2.0 level 1//en", "-//ietf//dtd html 2.0 level 2//en", "-//ietf//dtd html 2.0 strict level 1//en", "-//ietf//dtd html 2.0 strict level 2//en", "-//ietf//dtd html 2.0 strict//en", "-//ietf//dtd html 2.0//en", "-//ietf//dtd html 2.1e//en", "-//ietf//dtd html 3.0//en", "-//ietf//dtd html 3.0//en//", "-//ietf//dtd html 3.2 final//en", "-//ietf//dtd html 3.2//en", "-//ietf//dtd html 3//en", "-//ietf//dtd html level 0//en", "-//ietf//dtd html level 0//en//2.0", "-//ietf//dtd html level 1//en", "-//ietf//dtd html level 1//en//2.0", "-//ietf//dtd html level 2//en", "-//ietf//dtd html level 2//en//2.0", "-//ietf//dtd html level 3//en", "-//ietf//dtd html level 3//en//3.0", "-//ietf//dtd html strict level 0//en", "-//ietf//dtd html strict level 0//en//2.0", "-//ietf//dtd html strict level 1//en", "-//ietf//dtd html strict level 1//en//2.0", "-//ietf//dtd html strict level 2//en", "-//ietf//dtd html strict level 2//en//2.0", "-//ietf//dtd html strict level 3//en", "-//ietf//dtd html strict level 3//en//3.0", "-//ietf//dtd html strict//en", "-//ietf//dtd html strict//en//2.0", "-//ietf//dtd html strict//en//3.0", "-//ietf//dtd html//en", "-//ietf//dtd html//en//2.0", "-//ietf//dtd html//en//3.0", "-//metrius//dtd metrius presentational//en", "-//microsoft//dtd internet explorer 2.0 html strict//en", "-//microsoft//dtd internet explorer 2.0 html//en", "-//microsoft//dtd internet explorer 2.0 tables//en", "-//microsoft//dtd internet explorer 3.0 html strict//en", "-//microsoft//dtd internet explorer 3.0 html//en", "-//microsoft//dtd internet explorer 3.0 tables//en", "-//netscape comm. corp.//dtd html//en", "-//netscape comm. corp.//dtd strict html//en", "-//o'reilly and associates//dtd html 2.0//en", "-//o'reilly and associates//dtd html extended 1.0//en", "-//o'reilly and associates//dtd html extended relaxed 1.0//en", "-//spyglass//dtd html 2.0 extended//en", "-//sq//dtd html 2.0 hotmetal + extensions//en", "-//sun microsystems corp.//dtd hotjava html//en", "-//sun microsystems corp.//dtd hotjava strict html//en", "-//w3c//dtd html 3 1995-03-24//en", "-//w3c//dtd html 3.2 draft//en", "-//w3c//dtd html 3.2 final//en", "-//w3c//dtd html 3.2//en", "-//w3c//dtd html 3.2s draft//en", "-//w3c//dtd html 4.0 frameset//en", "-//w3c//dtd html 4.0 transitional//en", "-//w3c//dtd html experimental 19960712//en", "-//w3c//dtd html experimental 970421//en", "-//w3c//dtd w3 html//en", "-//w3o//dtd w3 html 3.0//en", "-//w3o//dtd w3 html 3.0//en//", "-//w3o//dtd w3 html strict 3.0//en//", "-//webtechs//dtd mozilla html 2.0//en", "-//webtechs//dtd mozilla html//en", "-/w3c/dtd html 4.0 transitional/en", "html") or (publicId in ("-//w3c//dtd html 4.01 frameset//EN", "-//w3c//dtd html 4.01 transitional//EN") and systemId == None) or (systemId != None and systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")): self.parser.compatMode = "quirks" elif (publicId in ("-//w3c//dtd xhtml 1.0 frameset//EN", "-//w3c//dtd xhtml 1.0 transitional//EN") or (publicId in ("-//w3c//dtd html 4.01 frameset//EN", "-//w3c//dtd html 4.01 transitional//EN") and systemId == None)): self.parser.compatMode = "limited quirks" self.parser.phase = self.parser.phases["beforeHtml"] def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.parser.parseError("expected-doctype-but-got-chars") self.parser.compatMode = "quirks" self.parser.phase = self.parser.phases["beforeHtml"] self.parser.phase.processCharacters(token) def processStartTag(self, token): self.parser.parseError("expected-doctype-but-got-start-tag", {"name": token["name"]}) self.compatMode = "quirks" self.parser.phase = self.parser.phases["beforeHtml"] self.parser.phase.processStartTag(token) def processEndTag(self, token): self.parser.parseError("expected-doctype-but-got-end-tag", {"name": token["name"]}) self.compatMode = "quirks" self.parser.phase = self.parser.phases["beforeHtml"] self.parser.phase.processEndTag(token) class BeforeHtmlPhase(Phase): # helper methods def insertHtmlElement(self): self.tree.insertRoot(impliedTagToken("html", "StartTag")) self.parser.phase = self.parser.phases["beforeHead"] # other def processEOF(self): self.insertHtmlElement() self.parser.phase.processEOF() def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.insertHtmlElement() self.parser.phase.processCharacters(token) def processStartTag(self, token): if token["name"] == "html": self.parser.firstStartTag = True self.insertHtmlElement() self.parser.phase.processStartTag(token) def processEndTag(self, token): self.insertHtmlElement() self.parser.phase.processEndTag(token) class BeforeHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = utils.MethodDispatcher([ (("head", "br"), self.endTagImplyHead) ]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.startTagHead(impliedTagToken("head", "StartTag")) self.parser.phase.processEOF() def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) self.parser.phase.processCharacters(token) def startTagHead(self, token): self.tree.insertElement(token) self.tree.headPointer = self.tree.openElements[-1] self.parser.phase = self.parser.phases["inHead"] def startTagOther(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) self.parser.phase.processStartTag(token) def endTagImplyHead(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) self.parser.phase.processEndTag(token) def endTagOther(self, token): self.parser.parseError("end-tag-after-implied-root", {"name": token["name"]}) class InHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), ("script", self.startTagScript), (("base", "link", "command", "eventsource"), self.startTagBaseLinkCommandEventsource), ("meta", self.startTagMeta), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self. endTagHandler = utils.MethodDispatcher([ ("head", self.endTagHead), ("br", self.endTagBr) ]) self.endTagHandler.default = self.endTagOther # helper def appendToHead(self, element): if self.tree.headPointer is not None: self.tree.headPointer.appendChild(element) else: assert self.parser.innerHTML self.tree.openElementsw[-1].appendChild(element) # the real thing def processEOF (self): self.anythingElse() self.parser.phase.processEOF() def processCharacters(self, token): self.anythingElse() self.parser.phase.processCharacters(token) def startTagHtml(self, token): self.parser.phases["inBody"].processStartTag(token) def startTagHead(self, token): self.parser.parseError("two-heads-are-not-better-than-one") def startTagBaseLinkCommandEventsource(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagMeta(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True attributes = token["data"] if self.parser.tokenizer.stream.charEncoding[1] == "tentative": if "charset" in attributes: codec = inputstream.codecName(attributes["charset"]) self.parser.tokenizer.stream.changeEncoding(codec) elif "content" in attributes: data = inputstream.EncodingBytes( attributes["content"].encode(self.parser.tokenizer.stream.charEncoding[0])) parser = inputstream.ContentAttrParser(data) codec = parser.parse() self.parser.tokenizer.stream.changeEncoding(codec) def startTagTitle(self, token): self.parser.parseRCDataCData(token, "RCDATA") def startTagNoScriptNoFramesStyle(self, token): #Need to decide whether to implement the scripting-disabled case self.parser.parseRCDataCData(token, "CDATA") def startTagScript(self, token): #I think this is equivalent to the CDATA stuff since we don't execute script #self.tree.insertElement(token) self.parser.parseRCDataCData(token, "CDATA") def startTagOther(self, token): self.anythingElse() self.parser.phase.processStartTag(token) def endTagHead(self, token): node = self.parser.tree.openElements.pop() assert node.name == "head", "Expected head got %s"%node.name self.parser.phase = self.parser.phases["afterHead"] def endTagBr(self, token): self.anythingElse() self.parser.phase.processEndTag(token) def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): self.endTagHead(impliedTagToken("head")) # XXX If we implement a parser for which scripting is disabled we need to # implement this phase. # # class InHeadNoScriptPhase(Phase): class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), (("base", "link", "meta", "noframes", "script", "style", "title"), self.startTagFromHead), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = utils.MethodDispatcher([("br", self.endTagBr)]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.anythingElse() self.parser.phase.processEOF() def processCharacters(self, token): self.anythingElse() self.parser.phase.processCharacters(token) def startTagBody(self, token): self.tree.insertElement(token) self.parser.phase = self.parser.phases["inBody"] def startTagFrameset(self, token): self.tree.insertElement(token) self.parser.phase = self.parser.phases["inFrameset"] def startTagFromHead(self, token): self.parser.parseError("unexpected-start-tag-out-of-my-head", {"name": token["name"]}) self.tree.openElements.append(self.tree.headPointer) self.parser.phases["inHead"].processStartTag(token) for node in self.tree.openElements[::-1]: if node.name == "head": self.tree.openElements.remove(node) break def startTagHead(self, token): self.parser.parseError("unexpected-start-tag", {"name":token["name"]}) def startTagOther(self, token): self.anythingElse() self.parser.phase.processStartTag(token) def endTagBr(self, token): #This is not currently in the spec self.anythingElse() self.parser.phase.processEndTag(token) def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name":token["name"]}) def anythingElse(self): self.tree.insertElement(impliedTagToken("body", "StartTag")) self.parser.phase = self.parser.phases["inBody"] class InBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-body # the crazy mode def __init__(self, parser, tree): Phase.__init__(self, parser, tree) #Keep a ref to this for special handling of whitespace in
self.processSpaceCharactersNonPre = self.processSpaceCharacters
self.startTagHandler = utils.MethodDispatcher([
("html", self.startTagHtml),
(("base", "link", "meta", "script", "style", "title"),
self.startTagProcessInHead),
("body", self.startTagBody),
(("address", "article", "aside", "blockquote", "center", "datagrid",
"details", "dialog", "dir", "div", "dl", "fieldset", "figure",
"footer", "h1", "h2", "h3", "h4", "h5", "h6", "header", "listing",
"menu", "nav", "ol", "p", "pre", "section", "ul"),
self.startTagCloseP),
("form", self.startTagForm),
(("li", "dd", "dt"), self.startTagListItem),
("plaintext",self.startTagPlaintext),
(headingElements, self.startTagHeading),
("a", self.startTagA),
(("b", "big", "em", "font", "i", "s", "small", "strike", "strong",
"tt", "u"),self.startTagFormatting),
("nobr", self.startTagNobr),
("button", self.startTagButton),
(("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
("xmp", self.startTagXmp),
("table", self.startTagTable),
(("area", "basefont", "bgsound", "br", "embed", "img", "param",
"spacer", "wbr"), self.startTagVoidFormatting),
("hr", self.startTagHr),
("image", self.startTagImage),
("input", self.startTagInput),
("isindex", self.startTagIsIndex),
("textarea", self.startTagTextarea),
(("iframe", "noembed", "noframes", "noscript"), self.startTagCdata),
("select", self.startTagSelect),
(("rp", "rt"), self.startTagRpRt),
(("option", "optgroup"), self.startTagOpt),
(("caption", "col", "colgroup", "frame", "frameset", "head",
"tbody", "td", "tfoot", "th", "thead",
"tr"), self.startTagMisplaced),
(("event-source", "command"), self.startTagNew)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = utils.MethodDispatcher([
("body",self.endTagBody),
("html",self.endTagHtml),
(("address", "article", "aside", "blockquote", "center", "datagrid",
"details", "dialog", "dir", "div", "dl", "fieldset", "figure",
"footer", "header", "listing", "menu", "nav", "ol", "pre", "section",
"ul"), self.endTagBlock),
("form", self.endTagForm),
("p",self.endTagP),
(("dd", "dt", "li"), self.endTagListItem),
(headingElements, self.endTagHeading),
(("a", "b", "big", "em", "font", "i", "nobr", "s", "small",
"strike", "strong", "tt", "u"), self.endTagFormatting),
(("applet", "button", "marquee", "object"), self.endTagAppletButtonMarqueeObject),
("br", self.endTagBr),
])
self.endTagHandler.default = self.endTagOther
# helper
def addFormattingElement(self, token):
self.tree.insertElement(token)
self.tree.activeFormattingElements.append(
self.tree.openElements[-1])
# the real deal
def processEOF(self):
allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
"tfoot", "th", "thead", "tr", "body",
"html"))
for node in self.tree.openElements[::-1]:
if node.name not in allowed_elements:
self.parser.parseError("expected-closing-tag-but-got-eof")
break
#Stop parsing
def processSpaceCharactersDropNewline(self, token):
# Sometimes (start of , , and