@@ -70,6 +70,8 @@ def __init__(self, stream, encoding=None, parseMeta=True):
7070
7171 # Setup the initial tokenizer state
7272 self .contentModelFlag = contentModelFlags ["PCDATA" ]
73+ self .escapeFlag = False
74+ self .lastFourChars = []
7375 self .state = self .states ["data" ]
7476
7577 # The current token being created
@@ -273,12 +275,30 @@ def emitCurrentToken(self):
273275
274276 def dataState (self ):
275277 data = self .stream .char ()
276- if data == u"&" and self .contentModelFlag in \
278+ if self .contentModelFlag in \
279+ (contentModelFlags ["CDATA" ], contentModelFlags ["RCDATA" ]):
280+ if len (self .lastFourChars ) == 4 :
281+ self .lastFourChars .pop (0 )
282+ self .lastFourChars .append (data )
283+ if data == "&" and self .contentModelFlag in \
277284 (contentModelFlags ["PCDATA" ], contentModelFlags ["RCDATA" ]):
278285 self .state = self .states ["entityData" ]
279- elif data == u"<" and self .contentModelFlag != \
280- contentModelFlags ["PLAINTEXT" ]:
286+ elif data == "-" and self .contentModelFlag in \
287+ (contentModelFlags ["CDATA" ], contentModelFlags ["RCDATA" ]) and \
288+ self .escapeFlag == False and \
289+ "" .join (self .lastFourChars ) == "<!--" :
290+ self .escapeFlag = True
291+ self .tokenQueue .append ({"type" : "Characters" , "data" :data })
292+ elif data == "<" and (self .contentModelFlag == \
293+ contentModelFlags ["PCDATA" ] or (self .contentModelFlag in
294+ (contentModelFlags ["CDATA" ], contentModelFlags ["RCDATA" ]) and \
295+ self .escapeFlag == False )):
281296 self .state = self .states ["tagOpen" ]
297+ elif data == ">" and self .contentModelFlag in \
298+ (contentModelFlags ["CDATA" ], contentModelFlags ["RCDATA" ]) and \
299+ self .escapeFlag == True and "" .join (self .lastFourChars )[1 :] == "-->" :
300+ self .escapeFlag = False
301+ self .tokenQueue .append ({"type" : "Characters" , "data" :data })
282302 elif data == EOF :
283303 # Tokenization ends.
284304 return False
@@ -292,7 +312,7 @@ def dataState(self):
292312 data + self .stream .charsUntil (spaceCharacters , True )})
293313 else :
294314 self .tokenQueue .append ({"type" : "Characters" , "data" :
295- data + self .stream .charsUntil ((u "&" , u"< " ))})
315+ data + self .stream .charsUntil (("&" , "<" , ">" , "- " ))})
296316 return True
297317
298318 def entityDataState (self ):
0 commit comments