@@ -139,8 +139,8 @@ def __init__(self, stream):
139139 }
140140
141141 # Setup the initial tokenizer state
142- self .contentModelFlag = contentModelFlags [' PCDATA' ]
143- self .state = self .states [' data' ]
142+ self .contentModelFlag = contentModelFlags [" PCDATA" ]
143+ self .state = self .states [" data" ]
144144
145145 # The current token being created
146146 self .currentToken = None
@@ -163,10 +163,6 @@ def __iter__(self):
163163 while self .tokenQueue :
164164 yield self .tokenQueue .pop (0 )
165165
166- def changeState (self , state ):
167- self .state = self .states [state ]
168-
169-
170166 # Below are various helper functions the tokenizer states use worked out.
171167 def processSolidusInTag (self ):
172168 """When a solidus (/) is encountered within a tag name what happens
@@ -344,7 +340,7 @@ def emitCurrentToken(self):
344340 # Add token to the queue to be yielded
345341 self .tokenQueue .append (token )
346342
347- self .changeState ( "data" )
343+ self .state = self . states [ "data" ]
348344
349345 def emitCurrentTokenWithParseError (self , data = None ):
350346 """This method is equivalent to emitCurrentToken (well, it invokes it)
@@ -358,7 +354,7 @@ def emitCurrentTokenWithParseError(self, data=None):
358354 def attributeValueQuotedStateHandler (self , quoteType ):
359355 data = self .stream .char ()
360356 if data == quoteType :
361- self .changeState ( "beforeAttributeName" )
357+ self .state = self . states [ "beforeAttributeName" ]
362358 elif data == u"&" :
363359 self .processEntityInAttribute ()
364360 elif data == EOF :
@@ -378,10 +374,10 @@ def dataState(self):
378374 if (data == u"&" and
379375 (self .contentModelFlag in
380376 (contentModelFlags ["PCDATA" ], contentModelFlags ["RCDATA" ]))):
381- self .changeState ( "entityData" )
377+ self .state = self . states [ "entityData" ]
382378 elif (data == u"<" and
383379 self .contentModelFlag != contentModelFlags ["PLAINTEXT" ]):
384- self .changeState ( "tagOpen" )
380+ self .state = self . states [ "tagOpen" ]
385381 elif data == EOF :
386382 # Tokenization ends.
387383 return False
@@ -401,40 +397,40 @@ def entityDataState(self):
401397 self .tokenQueue .append (Characters (entity ))
402398 else :
403399 self .tokenQueue .append (Characters (u"&" ))
404- self .changeState ( "data" )
400+ self .state = self . states [ "data" ]
405401 return True
406402
407403 def tagOpenState (self ):
408404 data = self .stream .char ()
409405 if (self .contentModelFlag in
410406 (contentModelFlags ["RCDATA" ], contentModelFlags ["CDATA" ])):
411407 if data == u"/" :
412- self .changeState ( "closeTagOpen" )
408+ self .state = self . states [ "closeTagOpen" ]
413409 else :
414410 self .tokenQueue .append (Characters (u"<" ))
415411 self .stream .queue .append (data )
416- self .changeState ( "data" )
412+ self .state = self . states [ "data" ]
417413 elif self .contentModelFlag == contentModelFlags ['PCDATA' ]:
418414 if data == u"!" :
419- self .changeState ( "markupDeclarationOpen" )
415+ self .state = self . states [ "markupDeclarationOpen" ]
420416 elif data == u"/" :
421- self .changeState ( "closeTagOpen" )
417+ self .state = self . states [ "closeTagOpen" ]
422418 elif data in asciiLetters :
423419 self .currentToken = StartTag (data .lower ())
424- self .changeState ( "tagName" )
420+ self .state = self . states [ "tagName" ]
425421 elif data == u">" :
426422 self .tokenQueue .append (ParseError ())
427423 self .tokenQueue .append (Characters (u"<>" ))
428- self .changeState ( "data" )
424+ self .state = self . states [ "data" ]
429425 elif data == u"?" :
430426 self .tokenQueue .append (ParseError ())
431427 self .stream .queue .append (data )
432- self .changeState ( "bogusComment" )
428+ self .state = self . states [ "bogusComment" ]
433429 else :
434430 self .tokenQueue .append (ParseError ())
435431 self .tokenQueue .append (Characters (u"<" ))
436432 self .stream .queue .append (data )
437- self .changeState ( "data" )
433+ self .state = self . states [ "data" ]
438434 else :
439435 assert False
440436 return True
@@ -469,7 +465,7 @@ def closeTagOpenState(self):
469465 else :
470466 self .tokenQueue .append (ParseError ())
471467 self .tokenQueue .append (Characters (u"</" ))
472- self .changeState ( "data" )
468+ self .state = self . states [ "data" ]
473469
474470 # Need to return here since we don't want the rest of the
475471 # method to be walked through.
@@ -479,25 +475,25 @@ def closeTagOpenState(self):
479475 data = self .stream .char ()
480476 if data in asciiLetters :
481477 self .currentToken = EndTag (data .lower ())
482- self .changeState ( "tagName" )
478+ self .state = self . states [ "tagName" ]
483479 elif data == u">" :
484480 self .tokenQueue .append (ParseError ())
485- self .changeState ( "data" )
481+ self .state = self . states [ "data" ]
486482 elif data == EOF :
487483 self .tokenQueue .append (ParseError ())
488484 self .tokenQueue .append (Characters (u"</" ))
489485 self .stream .queue .append (data )
490- self .changeState ( "data" )
486+ self .state = self . states [ "data" ]
491487 else :
492488 self .tokenQueue .append (ParseError ())
493489 self .stream .queue .append (data )
494- self .changeState ( "bogusComment" )
490+ self .state = self . states [ "bogusComment" ]
495491 return True
496492
497493 def tagNameState (self ):
498494 data = self .stream .char ()
499495 if data in spaceCharacters :
500- self .changeState ( "beforeAttributeName" )
496+ self .state = self . states [ "beforeAttributeName" ]
501497 elif data in asciiLowercase :
502498 data += self .stream .charsUntil (asciiLowercase , True )
503499 self .currentToken .name += data
@@ -510,7 +506,7 @@ def tagNameState(self):
510506 self .emitCurrentTokenWithParseError (data )
511507 elif data == u"/" :
512508 self .processSolidusInTag ()
513- self .changeState ( "beforeAttributeName" )
509+ self .state = self . states [ "beforeAttributeName" ]
514510 else :
515511 self .currentToken .name += data
516512 return True
@@ -521,7 +517,7 @@ def beforeAttributeNameState(self):
521517 pass
522518 elif data in asciiUppercase :
523519 self .currentToken .data .append ([data .lower (), "" ])
524- self .changeState ( "attributeName" )
520+ self .state = self . states [ "attributeName" ]
525521 elif data == u">" :
526522 self .emitCurrentToken ()
527523 elif data == u"/" :
@@ -530,14 +526,14 @@ def beforeAttributeNameState(self):
530526 self .emitCurrentTokenWithParseError (data )
531527 else :
532528 self .currentToken .data .append ([data , "" ])
533- self .changeState ( "attributeName" )
529+ self .state = self . states [ "attributeName" ]
534530 return True
535531
536532 def attributeNameState (self ):
537533 data = self .stream .char ()
538534 leavingThisState = True
539535 if data == u"=" :
540- self .changeState ( "beforeAttributeValue" )
536+ self .state = self . states [ "beforeAttributeValue" ]
541537 elif data in asciiLowercase :
542538 self .currentToken .data [- 1 ][0 ] += data + self .stream .charsUntil (
543539 asciiLowercase , True )
@@ -552,10 +548,10 @@ def attributeNameState(self):
552548 # because data is a dict not a list
553549 pass
554550 elif data in spaceCharacters :
555- self .changeState ( "afterAttributeName" )
551+ self .state = self . states [ "afterAttributeName" ]
556552 elif data == u"/" :
557553 self .processSolidusInTag ()
558- self .changeState ( "beforeAttributeName" )
554+ self .state = self . states [ "beforeAttributeName" ]
559555 elif data == u"<" or data == EOF :
560556 self .emitCurrentTokenWithParseError (data )
561557 leavingThisState = False
@@ -580,40 +576,40 @@ def afterAttributeNameState(self):
580576 if data in spaceCharacters :
581577 pass
582578 elif data == u"=" :
583- self .changeState ( "beforeAttributeValue" )
579+ self .state = self . states [ "beforeAttributeValue" ]
584580 elif data == u">" :
585581 self .emitCurrentToken ()
586582 elif data in asciiUppercase :
587583 self .currentToken .data .append ([data .lower (), "" ])
588- self .changeState ( "attributeName" )
584+ self .state = self . states [ "attributeName" ]
589585 elif data == u"/" :
590586 self .processSolidusInTag ()
591- self .changeState ( "beforeAttributeName" )
587+ self .state = self . states [ "beforeAttributeName" ]
592588 elif data == u"<" or data == EOF :
593589 self .emitCurrentTokenWithParseError (data )
594590 else :
595591 self .currentToken .data .append ([data , "" ])
596- self .changeState ( "attributeName" )
592+ self .state = self . states [ "attributeName" ]
597593 return True
598594
599595 def beforeAttributeValueState (self ):
600596 data = self .stream .char ()
601597 if data in spaceCharacters :
602598 pass
603599 elif data == u"\" " :
604- self .changeState ( "attributeValueDoubleQuoted" )
600+ self .state = self . states [ "attributeValueDoubleQuoted" ]
605601 elif data == u"&" :
606- self .changeState ( "attributeValueUnQuoted" )
602+ self .state = self . states [ "attributeValueUnQuoted" ]
607603 self .stream .queue .append (data );
608604 elif data == u"'" :
609- self .changeState ( "attributeValueSingleQuoted" )
605+ self .state = self . states [ "attributeValueSingleQuoted" ]
610606 elif data == u">" :
611607 self .emitCurrentToken ()
612608 elif data == u"<" or data == EOF :
613609 self .emitCurrentTokenWithParseError (data )
614610 else :
615611 self .currentToken .data [- 1 ][1 ] += data
616- self .changeState ( "attributeValueUnQuoted" )
612+ self .state = self . states [ "attributeValueUnQuoted" ]
617613 return True
618614
619615 def attributeValueDoubleQuotedState (self ):
@@ -630,7 +626,7 @@ def attributeValueSingleQuotedState(self):
630626 def attributeValueUnQuotedState (self ):
631627 data = self .stream .char ()
632628 if data in spaceCharacters :
633- self .changeState ( "beforeAttributeName" )
629+ self .state = self . states [ "beforeAttributeName" ]
634630 elif data == u"&" :
635631 self .processEntityInAttribute ()
636632 elif data == u">" :
@@ -661,24 +657,24 @@ def markupDeclarationOpenState(self):
661657 charStack = [self .stream .char (), self .stream .char ()]
662658 if charStack == [u"-" , u"-" ]:
663659 self .currentToken = Comment ()
664- self .changeState ( "comment" )
660+ self .state = self . states [ "comment" ]
665661 else :
666662 for x in xrange (5 ):
667663 charStack .append (self .stream .char ())
668664 # Put in explicit EOF check
669665 if (not EOF in charStack and
670666 "" .join (charStack ).upper () == u"DOCTYPE" ):
671- self .changeState ( "doctype" )
667+ self .state = self . states [ "doctype" ]
672668 else :
673669 self .tokenQueue .append (ParseError ())
674670 self .stream .queue .extend (charStack )
675- self .changeState ( "bogusComment" )
671+ self .state = self . states [ "bogusComment" ]
676672 return True
677673
678674 def commentState (self ):
679675 data = self .stream .char ()
680676 if data == u"-" :
681- self .changeState ( "commentDash" )
677+ self .state = self . states [ "commentDash" ]
682678 elif data == EOF :
683679 self .emitCurrentTokenWithParseError (data )
684680 else :
@@ -688,7 +684,7 @@ def commentState(self):
688684 def commentDashState (self ):
689685 data = self .stream .char ()
690686 if data == u"-" :
691- self .changeState ( "commentEnd" )
687+ self .state = self . states [ "commentEnd" ]
692688 elif data == EOF :
693689 self .emitCurrentTokenWithParseError (data )
694690 else :
@@ -712,17 +708,17 @@ def commentEndState(self):
712708 else :
713709 self .tokenQueue .append (ParseError ())
714710 self .currentToken .data += u"--" + data
715- self .changeState ( "comment" )
711+ self .state = self . states [ "comment" ]
716712 return True
717713
718714 def doctypeState (self ):
719715 data = self .stream .char ()
720716 if data in spaceCharacters :
721- self .changeState ( "beforeDoctypeName" )
717+ self .state = self . states [ "beforeDoctypeName" ]
722718 else :
723719 self .tokenQueue .append (ParseError ())
724720 self .stream .queue .append (data )
725- self .changeState ( "beforeDoctypeName" )
721+ self .state = self . states [ "beforeDoctypeName" ]
726722 return True
727723
728724 def beforeDoctypeNameState (self ):
@@ -731,7 +727,7 @@ def beforeDoctypeNameState(self):
731727 pass
732728 elif data in asciiLowercase :
733729 self .currentToken = Doctype (data .upper ())
734- self .changeState ( "doctypeName" )
730+ self .state = self . states [ "doctypeName" ]
735731 elif data == u">" :
736732 # Character needs to be consumed per the specification so don't
737733 # invoke emitCurrentTokenWithParseError with "data" as argument.
@@ -740,14 +736,14 @@ def beforeDoctypeNameState(self):
740736 self .emitCurrentTokenWithParseError (data )
741737 else :
742738 self .currentToken = Doctype (data )
743- self .changeState ( "doctypeName" )
739+ self .state = self . states [ "doctypeName" ]
744740 return True
745741
746742 def doctypeNameState (self ):
747743 data = self .stream .char ()
748744 needsDoctypeCheck = False
749745 if data in spaceCharacters :
750- self .changeState ( "afterDoctypeName" )
746+ self .state = self . states [ "afterDoctypeName" ]
751747 needsDoctypeCheck = True
752748 elif data == u">" :
753749 self .emitCurrentToken ()
@@ -779,7 +775,7 @@ def afterDoctypeNameState(self):
779775 else :
780776 self .tokenQueue .append (ParseError ())
781777 self .currentToken .data = True
782- self .changeState ( "bogusDoctype" )
778+ self .state = self . states [ "bogusDoctype" ]
783779 return True
784780
785781 def bogusDoctypeState (self ):
0 commit comments