Skip to content

Commit c093cb8

Browse files
committed
Check for EOF in a few places when consuming code, with accompnying tests for several cases (need more partial entity tests in particular)
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%4087
1 parent 59c8512 commit c093cb8

1 file changed

Lines changed: 46 additions & 26 deletions

File tree

tokenizer.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ def consumeNumberEntity(self, isHex):
178178

179179
# Consume all the characters that are in range.
180180
c = self.consumeChar()
181-
while c in range:
181+
#XXX Explicit check for EOF
182+
while c in range and c is not EOF:
182183
charStack.append(c)
183184
c = self.consumeChar()
184185

@@ -216,20 +217,29 @@ def consumeEntity(self):
216217
if charStack[0] == u"#":
217218
charStack.append(self.consumeChar())
218219
charStack.append(self.consumeChar())
219-
if charStack[1].lower() == u"x" \
220-
and charStack[2] in string.hexdigits:
221-
# Hexadecimal entity detected.
222-
self.characterQueue.append(charStack[2])
223-
char = self.consumeNumberEntity(True)
224-
elif charStack[1] in string.digits:
225-
# Decimal entity detected.
226-
self.characterQueue.append(charStack[1])
227-
self.characterQueue.append(charStack[2])
228-
char = self.consumeNumberEntity(False)
229-
else:
230-
# No number entity detected.
220+
if EOF in charStack:
221+
#If we reach the end of the file put everything up to EOF
222+
#back in the queue
223+
charStack = charStack[:charStack.index(EOF)]
231224
self.characterQueue.extend(charStack)
232225
self.parser.parseError()
226+
else:
227+
if charStack[1].lower() == u"x" \
228+
and charStack[2] in string.hexdigits:
229+
# Hexadecimal entity detected.
230+
self.characterQueue.append(charStack[2])
231+
char = self.consumeNumberEntity(True)
232+
elif charStack[1] in string.digits:
233+
# Decimal entity detected.
234+
self.characterQueue.extend(charStack[1:])
235+
char = self.consumeNumberEntity(False)
236+
else:
237+
# No number entity detected.
238+
self.characterQueue.extend(charStack)
239+
self.parser.parseError()
240+
#Break out if we reach the end of the file
241+
elif charStack[0] == EOF:
242+
self.parser.parseError()
233243
else:
234244
# At this point in the process might have named entity. Entities
235245
# are stored in the global variable "entities".
@@ -241,22 +251,30 @@ def consumeEntity(self):
241251
def entitiesStartingWith(name):
242252
return [e for e in filteredEntityList if e.startswith(name)]
243253

254+
EOFReached = False
244255
while entitiesStartingWith("".join(charStack)):
245256
charStack.append(self.consumeChar())
246-
257+
if charStack[-1] == EOF:
258+
EOFReached = True
259+
break
260+
247261
# At this point we have the name of the named entity or nothing.
248-
possibleEntityName = "".join(charStack)[:-1]
249-
if possibleEntityName in entities:
250-
char = entities[possibleEntityName]
251-
252-
# Check whether or not the last character returned can be
253-
# discarded or needs to be put back.
254-
if not charStack[-1] == ";":
255-
self.parser.parseError()
256-
self.characterQueue.append(charStack[-1])
257-
else:
262+
if EOFReached:
258263
self.parser.parseError()
259264
self.characterQueue.extend(charStack)
265+
else:
266+
possibleEntityName = "".join(charStack)[:-1]
267+
if possibleEntityName in entities:
268+
char = entities[possibleEntityName]
269+
270+
# Check whether or not the last character returned can be
271+
# discarded or needs to be put back.
272+
if not charStack[-1] == ";":
273+
self.parser.parseError()
274+
self.characterQueue.append(charStack[-1])
275+
else:
276+
self.parser.parseError()
277+
self.characterQueue.extend(charStack)
260278
return char
261279

262280
def processEntityInAttribute(self):
@@ -571,7 +589,7 @@ def attributeValueUnQuotedState(self):
571589
def bogusCommentState(self):
572590
assert self.contentModelFlag == contentModelFlags["PCDATA"]
573591

574-
charStack = [self.ConsumeChar()]
592+
charStack = [self.consumeChar()]
575593
while charStack[-1] not in [u">", EOF]:
576594
charStack.append(self.consumeChar())
577595

@@ -595,7 +613,9 @@ def markupDeclarationOpenState(self):
595613
else:
596614
for x in xrange(5):
597615
charStack.append(self.consumeChar())
598-
if "".join(charStack).upper() == u"DOCTYPE":
616+
#XXX - put in explicit None check
617+
if (not EOF in charStack and
618+
"".join(charStack).upper() == u"DOCTYPE"):
599619
self.changeState("doctype")
600620
else:
601621
self.parser.parseError()

0 commit comments

Comments
 (0)