Skip to content

Commit 1ac258a

Browse files
committed
Make non-ascii characters in meta elements work and make compatMode attribute set on the right object
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401302
1 parent a64695b commit 1ac258a

1 file changed

Lines changed: 90 additions & 88 deletions

File tree

src/html5lib/html5parser.py

Lines changed: 90 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ class InitialPhase(Phase):
333333
# this.
334334
def processEOF(self):
335335
self.parser.parseError("expected-doctype-but-got-eof")
336-
self.compatMode = "quirks"
336+
self.parser.compatMode = "quirks"
337337
self.parser.phase = self.parser.phases["beforeHtml"]
338338
self.parser.phase.processEOF()
339339

@@ -357,90 +357,91 @@ def processDoctype(self, name, publicId, systemId, correct):
357357
publicId = publicId.translate(asciiUpper2Lower)
358358

359359

360-
if (not correct) or nameLower != "html"\
361-
or publicId in\
362-
("+//silmaril//dtd html pro v0r11 19970101//en",
363-
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
364-
"-//as//dtd html 3.0 aswedit + extensions//en",
365-
"-//ietf//dtd html 2.0 level 1//en",
366-
"-//ietf//dtd html 2.0 level 2//en",
367-
"-//ietf//dtd html 2.0 strict level 1//en",
368-
"-//ietf//dtd html 2.0 strict level 2//en",
369-
"-//ietf//dtd html 2.0 strict//en",
370-
"-//ietf//dtd html 2.0//en",
371-
"-//ietf//dtd html 2.1e//en",
372-
"-//ietf//dtd html 3.0//en",
373-
"-//ietf//dtd html 3.0//en//",
374-
"-//ietf//dtd html 3.2 final//en",
375-
"-//ietf//dtd html 3.2//en",
376-
"-//ietf//dtd html 3//en",
377-
"-//ietf//dtd html level 0//en",
378-
"-//ietf//dtd html level 0//en//2.0",
379-
"-//ietf//dtd html level 1//en",
380-
"-//ietf//dtd html level 1//en//2.0",
381-
"-//ietf//dtd html level 2//en",
382-
"-//ietf//dtd html level 2//en//2.0",
383-
"-//ietf//dtd html level 3//en",
384-
"-//ietf//dtd html level 3//en//3.0",
385-
"-//ietf//dtd html strict level 0//en",
386-
"-//ietf//dtd html strict level 0//en//2.0",
387-
"-//ietf//dtd html strict level 1//en",
388-
"-//ietf//dtd html strict level 1//en//2.0",
389-
"-//ietf//dtd html strict level 2//en",
390-
"-//ietf//dtd html strict level 2//en//2.0",
391-
"-//ietf//dtd html strict level 3//en",
392-
"-//ietf//dtd html strict level 3//en//3.0",
393-
"-//ietf//dtd html strict//en",
394-
"-//ietf//dtd html strict//en//2.0",
395-
"-//ietf//dtd html strict//en//3.0",
396-
"-//ietf//dtd html//en",
397-
"-//ietf//dtd html//en//2.0",
398-
"-//ietf//dtd html//en//3.0",
399-
"-//metrius//dtd metrius presentational//en",
400-
"-//microsoft//dtd internet explorer 2.0 html strict//en",
401-
"-//microsoft//dtd internet explorer 2.0 html//en",
402-
"-//microsoft//dtd internet explorer 2.0 tables//en",
403-
"-//microsoft//dtd internet explorer 3.0 html strict//en",
404-
"-//microsoft//dtd internet explorer 3.0 html//en",
405-
"-//microsoft//dtd internet explorer 3.0 tables//en",
406-
"-//netscape comm. corp.//dtd html//en",
407-
"-//netscape comm. corp.//dtd strict html//en",
408-
"-//o'reilly and associates//dtd html 2.0//en",
409-
"-//o'reilly and associates//dtd html extended 1.0//en",
410-
"-//spyglass//dtd html 2.0 extended//en",
411-
"-//sq//dtd html 2.0 hotmetal + extensions//en",
412-
"-//sun microsystems corp.//dtd hotjava html//en",
413-
"-//sun microsystems corp.//dtd hotjava strict html//en",
414-
"-//w3c//dtd html 3 1995-03-24//en",
415-
"-//w3c//dtd html 3.2 draft//en",
416-
"-//w3c//dtd html 3.2 final//en",
417-
"-//w3c//dtd html 3.2//en",
418-
"-//w3c//dtd html 3.2s draft//en",
419-
"-//w3c//dtd html 4.0 frameset//en",
420-
"-//w3c//dtd html 4.0 transitional//en",
421-
"-//w3c//dtd html experimental 19960712//en",
422-
"-//w3c//dtd html experimental 970421//en",
423-
"-//w3c//dtd w3 html//en",
424-
"-//w3o//dtd w3 html 3.0//en",
425-
"-//w3o//dtd w3 html 3.0//en//",
426-
"-//w3o//dtd w3 html strict 3.0//en//",
427-
"-//webtechs//dtd mozilla html 2.0//en",
428-
"-//webtechs//dtd mozilla html//en",
429-
"-/w3c/dtd html 4.0 transitional/en",
430-
"html")\
431-
or (publicId in\
432-
("-//w3c//dtd html 4.01 frameset//EN",
433-
"-//w3c//dtd html 4.01 transitional//EN") and systemId == None)\
434-
or (systemId != None and\
435-
systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
436-
self.compatMode = "quirks"
437-
elif publicId in\
360+
if ((not correct) or nameLower != "html"
361+
or publicId in
362+
("+//silmaril//dtd html pro v0r11 19970101//en",
363+
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
364+
"-//as//dtd html 3.0 aswedit + extensions//en",
365+
"-//ietf//dtd html 2.0 level 1//en",
366+
"-//ietf//dtd html 2.0 level 2//en",
367+
"-//ietf//dtd html 2.0 strict level 1//en",
368+
"-//ietf//dtd html 2.0 strict level 2//en",
369+
"-//ietf//dtd html 2.0 strict//en",
370+
"-//ietf//dtd html 2.0//en",
371+
"-//ietf//dtd html 2.1e//en",
372+
"-//ietf//dtd html 3.0//en",
373+
"-//ietf//dtd html 3.0//en//",
374+
"-//ietf//dtd html 3.2 final//en",
375+
"-//ietf//dtd html 3.2//en",
376+
"-//ietf//dtd html 3//en",
377+
"-//ietf//dtd html level 0//en",
378+
"-//ietf//dtd html level 0//en//2.0",
379+
"-//ietf//dtd html level 1//en",
380+
"-//ietf//dtd html level 1//en//2.0",
381+
"-//ietf//dtd html level 2//en",
382+
"-//ietf//dtd html level 2//en//2.0",
383+
"-//ietf//dtd html level 3//en",
384+
"-//ietf//dtd html level 3//en//3.0",
385+
"-//ietf//dtd html strict level 0//en",
386+
"-//ietf//dtd html strict level 0//en//2.0",
387+
"-//ietf//dtd html strict level 1//en",
388+
"-//ietf//dtd html strict level 1//en//2.0",
389+
"-//ietf//dtd html strict level 2//en",
390+
"-//ietf//dtd html strict level 2//en//2.0",
391+
"-//ietf//dtd html strict level 3//en",
392+
"-//ietf//dtd html strict level 3//en//3.0",
393+
"-//ietf//dtd html strict//en",
394+
"-//ietf//dtd html strict//en//2.0",
395+
"-//ietf//dtd html strict//en//3.0",
396+
"-//ietf//dtd html//en",
397+
"-//ietf//dtd html//en//2.0",
398+
"-//ietf//dtd html//en//3.0",
399+
"-//metrius//dtd metrius presentational//en",
400+
"-//microsoft//dtd internet explorer 2.0 html strict//en",
401+
"-//microsoft//dtd internet explorer 2.0 html//en",
402+
"-//microsoft//dtd internet explorer 2.0 tables//en",
403+
"-//microsoft//dtd internet explorer 3.0 html strict//en",
404+
"-//microsoft//dtd internet explorer 3.0 html//en",
405+
"-//microsoft//dtd internet explorer 3.0 tables//en",
406+
"-//netscape comm. corp.//dtd html//en",
407+
"-//netscape comm. corp.//dtd strict html//en",
408+
"-//o'reilly and associates//dtd html 2.0//en",
409+
"-//o'reilly and associates//dtd html extended 1.0//en",
410+
"-//spyglass//dtd html 2.0 extended//en",
411+
"-//sq//dtd html 2.0 hotmetal + extensions//en",
412+
"-//sun microsystems corp.//dtd hotjava html//en",
413+
"-//sun microsystems corp.//dtd hotjava strict html//en",
414+
"-//w3c//dtd html 3 1995-03-24//en",
415+
"-//w3c//dtd html 3.2 draft//en",
416+
"-//w3c//dtd html 3.2 final//en",
417+
"-//w3c//dtd html 3.2//en",
418+
"-//w3c//dtd html 3.2s draft//en",
419+
"-//w3c//dtd html 4.0 frameset//en",
420+
"-//w3c//dtd html 4.0 transitional//en",
421+
"-//w3c//dtd html experimental 19960712//en",
422+
"-//w3c//dtd html experimental 970421//en",
423+
"-//w3c//dtd w3 html//en",
424+
"-//w3o//dtd w3 html 3.0//en",
425+
"-//w3o//dtd w3 html 3.0//en//",
426+
"-//w3o//dtd w3 html strict 3.0//en//",
427+
"-//webtechs//dtd mozilla html 2.0//en",
428+
"-//webtechs//dtd mozilla html//en",
429+
"-/w3c/dtd html 4.0 transitional/en",
430+
"html")
431+
or (publicId in
432+
("-//w3c//dtd html 4.01 frameset//EN",
433+
"-//w3c//dtd html 4.01 transitional//EN") and systemId == None)
434+
or (systemId != None and
435+
systemId ==
436+
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")):
437+
self.parser.compatMode = "quirks"
438+
elif (publicId in
438439
("-//w3c//dtd xhtml 1.0 frameset//EN",
439-
"-//w3c//dtd xhtml 1.0 transitional//EN")\
440-
or (publicId in\
441-
("-//w3c//dtd html 4.01 frameset//EN",
442-
"-//w3c//dtd html 4.01 transitional//EN") and systemId == None):
443-
self.compatMode = "limited quirks"
440+
"-//w3c//dtd xhtml 1.0 transitional//EN")
441+
or (publicId in
442+
("-//w3c//dtd html 4.01 frameset//EN",
443+
"-//w3c//dtd html 4.01 transitional//EN") and systemId == None)):
444+
self.parser.compatMode = "limited quirks"
444445

445446
self.parser.phase = self.parser.phases["beforeHtml"]
446447

@@ -449,21 +450,21 @@ def processSpaceCharacters(self, data):
449450

450451
def processCharacters(self, data):
451452
self.parser.parseError("expected-doctype-but-got-chars")
452-
self.compatMode = "quirks"
453+
self.parser.compatMode = "quirks"
453454
self.parser.phase = self.parser.phases["beforeHtml"]
454455
self.parser.phase.processCharacters(data)
455456

456457
def processStartTag(self, name, attributes):
457458
self.parser.parseError("expected-doctype-but-got-start-tag",
458459
{"name": name})
459-
self.compatMode = "quirks"
460+
self.parser.compatMode = "quirks"
460461
self.parser.phase = self.parser.phases["beforeHtml"]
461462
self.parser.phase.processStartTag(name, attributes)
462463

463464
def processEndTag(self, name):
464465
self.parser.parseError("expected-doctype-but-got-end-tag",
465466
{"name": name})
466-
self.compatMode = "quirks"
467+
self.parser.compatMode = "quirks"
467468
self.parser.phase = self.parser.phases["beforeHtml"]
468469
self.parser.phase.processEndTag(name)
469470

@@ -602,7 +603,8 @@ def startTagMeta(self, name, attributes):
602603
if "charset" in attributes:
603604
self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
604605
elif "content" in attributes:
605-
data = inputstream.EncodingBytes(attributes["content"])
606+
data = inputstream.EncodingBytes(
607+
attributes["content"].encode(self.parser.tokenizer.stream.charEncoding[0]))
606608
parser = inputstream.ContentAttrParser(data)
607609
codec = parser.parse()
608610
self.parser.tokenizer.stream.changeEncoding(codec)

0 commit comments

Comments
 (0)