@@ -39,10 +39,11 @@ class HTMLTokenizer:
3939 # XXX need to fix documentation
4040
4141 def __init__ (self , stream , encoding = None , parseMeta = True , useChardet = True ,
42- lowercaseElementName = True , lowercaseAttrName = True ):
42+ lowercaseElementName = True , lowercaseAttrName = True , parser = None ):
4343
4444 self .stream = HTMLInputStream (stream , encoding , parseMeta , useChardet )
45-
45+ self .parser = parser
46+
4647 #Perform case conversions?
4748 self .lowercaseElementName = lowercaseElementName
4849 self .lowercaseAttrName = lowercaseAttrName
@@ -1062,6 +1063,19 @@ def markupDeclarationOpenState(self):
10621063 "correct" : True }
10631064 self .state = self .doctypeState
10641065 return True
1066+ elif (charStack [- 1 ] == "[" and
1067+ self .parser is not None and
1068+ self .parser .phase == self .parser .phases ["inForeignContent" ] and
1069+ self .parser .tree .openElements [- 1 ].namespace != self .parser .tree .defaultNamespace ):
1070+ matched = True
1071+ for expected in ["C" , "D" , "A" , "T" , "A" , "[" ]:
1072+ charStack .append (self .stream .char ())
1073+ if charStack [- 1 ] != expected :
1074+ matched = False
1075+ break
1076+ if matched :
1077+ self .state = self .cdataSectionState
1078+ return True
10651079
10661080 self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
10671081 "expected-dashes-or-doctype" })
@@ -1563,3 +1577,29 @@ def bogusDoctypeState(self):
15631577 else :
15641578 pass
15651579 return True
1580+
1581+ def cdataSectionState (self ):
1582+ data = []
1583+ while True :
1584+ data .append (self .stream .charsUntil (u"]" ))
1585+ charStack = []
1586+
1587+ for expected in ["]" , "]" , ">" ]:
1588+ charStack .append (self .stream .char ())
1589+ matched = True
1590+ if charStack [- 1 ] == EOF :
1591+ data .extend (charStack [:- 1 ])
1592+ break
1593+ elif charStack [- 1 ] != expected :
1594+ matched = False
1595+ data .extend (charStack )
1596+ break
1597+
1598+ if matched :
1599+ break
1600+ data = "" .join (data )
1601+ if data :
1602+ self .tokenQueue .append ({"type" : tokenTypes ["Characters" ], "data" :
1603+ data })
1604+ self .state = self .dataState
1605+ return True
0 commit comments