66from constants import encodings
77from utils import MethodDispatcher
88
9+ #Non-unicode versions of constants for use in the pre-parser
10+ spaceCharactersBytes = [str (item ) for item in spaceCharacters ]
11+ asciiLettersBytes = [str (item ) for item in asciiLetters ]
12+ asciiUppercaseBytes = [str (item ) for item in asciiUppercase ]
13+
914try :
1015 from collections import deque
1116except ImportError :
@@ -357,7 +362,7 @@ def getCurrentByte(self):
357362
358363 currentByte = property (getCurrentByte )
359364
360- def skip (self , chars = spaceCharacters ):
365+ def skip (self , chars = spaceCharactersBytes ):
361366 """Skip past a list of characters"""
362367 while self .currentByte in chars :
363368 self .position += 1
@@ -432,7 +437,7 @@ def handleComment(self):
432437 return self .data .jumpTo ("-->" )
433438
434439 def handleMeta (self ):
435- if self .data .currentByte not in spaceCharacters :
440+ if self .data .currentByte not in spaceCharactersBytes :
436441 #if we have <meta not followed by a space so just keep going
437442 return True
438443 #We have a valid meta element we want to search for attributes
@@ -462,7 +467,7 @@ def handlePossibleEndTag(self):
462467 return self .handlePossibleTag (True )
463468
464469 def handlePossibleTag (self , endTag ):
465- if self .data .currentByte not in asciiLetters :
470+ if self .data .currentByte not in asciiLettersBytes :
466471 #If the next byte is not an ascii letter either ignore this
467472 #fragment (possible start tag case) or treat it according to
468473 #handleOther
@@ -471,7 +476,7 @@ def handlePossibleTag(self, endTag):
471476 self .handleOther ()
472477 return True
473478
474- self .data .findNext (list (spaceCharacters ) + ["<" , ">" ])
479+ self .data .findNext (list (spaceCharactersBytes ) + ["<" , ">" ])
475480 if self .data .currentByte == "<" :
476481 #return to the first step in the overall "two step" algorithm
477482 #reprocessing the < byte
@@ -489,7 +494,7 @@ def handleOther(self):
489494 def getAttribute (self ):
490495 """Return a name,value pair for the next attribute in the stream,
491496 if one is found, or None"""
492- self .data .skip (list (spaceCharacters )+ ["/" ])
497+ self .data .skip (list (spaceCharactersBytes )+ ["/" ])
493498 if self .data .currentByte == "<" :
494499 self .data .position -= 1
495500 return None
@@ -502,12 +507,12 @@ def getAttribute(self):
502507 while True :
503508 if self .data .currentByte == "=" and attrName :
504509 break
505- elif self .data .currentByte in spaceCharacters :
510+ elif self .data .currentByte in spaceCharactersBytes :
506511 spaceFound = True
507512 break
508513 elif self .data .currentByte in ("/" , "<" , ">" ):
509514 return "" .join (attrName ), ""
510- elif self .data .currentByte in asciiUppercase :
515+ elif self .data .currentByte in asciiUppercaseBytes :
511516 attrName .extend (self .data .currentByte .lower ())
512517 else :
513518 attrName .extend (self .data .currentByte )
@@ -536,23 +541,23 @@ def getAttribute(self):
536541 self .data .position += 1
537542 return "" .join (attrName ), "" .join (attrValue )
538543 #11.4
539- elif self .data .currentByte in asciiUppercase :
544+ elif self .data .currentByte in asciiUppercaseBytes :
540545 attrValue .extend (self .data .currentByte .lower ())
541546 #11.5
542547 else :
543548 attrValue .extend (self .data .currentByte )
544549 elif self .data .currentByte in (">" , "<" ):
545550 return "" .join (attrName ), ""
546- elif self .data .currentByte in asciiUppercase :
551+ elif self .data .currentByte in asciiUppercaseBytes :
547552 attrValue .extend (self .data .currentByte .lower ())
548553 else :
549554 attrValue .extend (self .data .currentByte )
550555 while True :
551556 self .data .position += 1
552557 if self .data .currentByte in (
553- list (spaceCharacters ) + [">" , "<" ]):
558+ list (spaceCharactersBytes ) + [">" , "<" ]):
554559 return "" .join (attrName ), "" .join (attrValue )
555- elif self .data .currentByte in asciiUppercase :
560+ elif self .data .currentByte in asciiUppercaseBytes :
556561 attrValue .extend (self .data .currentByte .lower ())
557562 else :
558563 attrValue .extend (self .data .currentByte )
@@ -588,7 +593,7 @@ def parse(self):
588593 #Unquoted value
589594 oldPosition = self .data .position
590595 try :
591- self .data .findNext (spaceCharacters )
596+ self .data .findNext (spaceCharactersBytes )
592597 return self .data [oldPosition :self .data .position ]
593598 except StopIteration :
594599 #Return the whole remaining value
0 commit comments