@@ -178,7 +178,8 @@ def consumeNumberEntity(self, isHex):
178178
179179 # Consume all the characters that are in range.
180180 c = self .consumeChar ()
181- while c in range :
181+ #XXX Explicit check for EOF
182+ while c in range and c is not EOF :
182183 charStack .append (c )
183184 c = self .consumeChar ()
184185
@@ -216,20 +217,29 @@ def consumeEntity(self):
216217 if charStack [0 ] == u"#" :
217218 charStack .append (self .consumeChar ())
218219 charStack .append (self .consumeChar ())
219- if charStack [1 ].lower () == u"x" \
220- and charStack [2 ] in string .hexdigits :
221- # Hexadecimal entity detected.
222- self .characterQueue .append (charStack [2 ])
223- char = self .consumeNumberEntity (True )
224- elif charStack [1 ] in string .digits :
225- # Decimal entity detected.
226- self .characterQueue .append (charStack [1 ])
227- self .characterQueue .append (charStack [2 ])
228- char = self .consumeNumberEntity (False )
229- else :
230- # No number entity detected.
220+ if EOF in charStack :
221+ #If we reach the end of the file put everything up to EOF
222+ #back in the queue
223+ charStack = charStack [:charStack .index (EOF )]
231224 self .characterQueue .extend (charStack )
232225 self .parser .parseError ()
226+ else :
227+ if charStack [1 ].lower () == u"x" \
228+ and charStack [2 ] in string .hexdigits :
229+ # Hexadecimal entity detected.
230+ self .characterQueue .append (charStack [2 ])
231+ char = self .consumeNumberEntity (True )
232+ elif charStack [1 ] in string .digits :
233+ # Decimal entity detected.
234+ self .characterQueue .extend (charStack [1 :])
235+ char = self .consumeNumberEntity (False )
236+ else :
237+ # No number entity detected.
238+ self .characterQueue .extend (charStack )
239+ self .parser .parseError ()
240+ #Break out if we reach the end of the file
241+ elif charStack [0 ] == EOF :
242+ self .parser .parseError ()
233243 else :
234244 # At this point in the process might have named entity. Entities
235245 # are stored in the global variable "entities".
@@ -241,22 +251,30 @@ def consumeEntity(self):
241251 def entitiesStartingWith (name ):
242252 return [e for e in filteredEntityList if e .startswith (name )]
243253
254+ EOFReached = False
244255 while entitiesStartingWith ("" .join (charStack )):
245256 charStack .append (self .consumeChar ())
246-
257+ if charStack [- 1 ] == EOF :
258+ EOFReached = True
259+ break
260+
247261 # At this point we have the name of the named entity or nothing.
248- possibleEntityName = "" .join (charStack )[:- 1 ]
249- if possibleEntityName in entities :
250- char = entities [possibleEntityName ]
251-
252- # Check whether or not the last character returned can be
253- # discarded or needs to be put back.
254- if not charStack [- 1 ] == ";" :
255- self .parser .parseError ()
256- self .characterQueue .append (charStack [- 1 ])
257- else :
262+ if EOFReached :
258263 self .parser .parseError ()
259264 self .characterQueue .extend (charStack )
265+ else :
266+ possibleEntityName = "" .join (charStack )[:- 1 ]
267+ if possibleEntityName in entities :
268+ char = entities [possibleEntityName ]
269+
270+ # Check whether or not the last character returned can be
271+ # discarded or needs to be put back.
272+ if not charStack [- 1 ] == ";" :
273+ self .parser .parseError ()
274+ self .characterQueue .append (charStack [- 1 ])
275+ else :
276+ self .parser .parseError ()
277+ self .characterQueue .extend (charStack )
260278 return char
261279
262280 def processEntityInAttribute (self ):
@@ -571,7 +589,7 @@ def attributeValueUnQuotedState(self):
571589 def bogusCommentState (self ):
572590 assert self .contentModelFlag == contentModelFlags ["PCDATA" ]
573591
574- charStack = [self .ConsumeChar ()]
592+ charStack = [self .consumeChar ()]
575593 while charStack [- 1 ] not in [u">" , EOF ]:
576594 charStack .append (self .consumeChar ())
577595
@@ -595,7 +613,9 @@ def markupDeclarationOpenState(self):
595613 else :
596614 for x in xrange (5 ):
597615 charStack .append (self .consumeChar ())
598- if "" .join (charStack ).upper () == u"DOCTYPE" :
616+ #XXX - put in explicit None check
617+ if (not EOF in charStack and
618+ "" .join (charStack ).upper () == u"DOCTYPE" ):
599619 self .changeState ("doctype" )
600620 else :
601621 self .parser .parseError ()
0 commit comments