@@ -354,15 +354,22 @@ def dataState(self):
354354 self .tokenQueue .append ({"type" : "SpaceCharacters" , "data" :
355355 data + self .stream .charsUntil (spaceCharacters , True )})
356356 # No need to update lastFourChars here, since the first space will
357- # have already broken any <!-- or --> sequences
357+ # have already been appended to lastFourChars and will have broken
358+ # any <!-- or --> sequences
358359 else :
359- chars = self .stream .charsUntil (("&" , "<" , ">" , "-" ))
360- self .tokenQueue .append ({"type" : "Characters" , "data" :
360+ if self .contentModelFlag in \
361+ (contentModelFlags ["CDATA" ], contentModelFlags ["RCDATA" ]):
362+ chars = self .stream .charsUntil ((u"&" , u"<" , u">" , u"-" ))
363+ self .lastFourChars += chars [- 4 :]
364+ self .lastFourChars = self .lastFourChars [- 4 :]
365+ else :
366+ chars = self .stream .charsUntil ((u"&" , u"<" ))
367+ # lastFourChars only needs to be kept up-to-date if we're
368+ # in CDATA or RCDATA, so ignore it here
369+ self .tokenQueue .append ({"type" : "Characters" , "data" :
361370 data + chars })
362- self .lastFourChars += chars [- 4 :]
363- self .lastFourChars = self .lastFourChars [- 4 :]
364371 return True
365-
372+
366373 def entityDataState (self ):
367374 entity = self .consumeEntity ()
368375 if entity :
0 commit comments