@@ -51,6 +51,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
5151 "attributeValueDoubleQuoted" :self .attributeValueDoubleQuotedState ,
5252 "attributeValueSingleQuoted" :self .attributeValueSingleQuotedState ,
5353 "attributeValueUnQuoted" :self .attributeValueUnQuotedState ,
54+ "afterAttributeValue" :self .afterAttributeValueState ,
5455 "bogusComment" :self .bogusCommentState ,
5556 "markupDeclarationOpen" :self .markupDeclarationOpenState ,
5657 "commentStart" :self .commentStartState ,
@@ -185,10 +186,11 @@ def consumeNumberEntity(self, isHex):
185186
186187 return char
187188
188- def consumeEntity (self , fromAttribute = False ):
189+ def consumeEntity (self , allowedChar = None , fromAttribute = False ):
189190 char = None
190191 charStack = [self .stream .char ()]
191- if charStack [0 ] in spaceCharacters or charStack [0 ] in (EOF , "<" , "&" ):
192+ if charStack [0 ] in spaceCharacters or charStack [0 ] in (EOF , "<" , "&" )\
193+ or (allowedChar is not None and allowedChar == charStack [0 ]):
192194 self .stream .unget (charStack )
193195 elif charStack [0 ] == u"#" :
194196 # We might have a number entity here.
@@ -260,10 +262,10 @@ def entitiesStartingWith(name):
260262 self .stream .unget (charStack )
261263 return char
262264
263- def processEntityInAttribute (self ):
265+ def processEntityInAttribute (self , allowedChar ):
264266 """This method replaces the need for "entityInAttributeValueState".
265267 """
266- entity = self .consumeEntity (True )
268+ entity = self .consumeEntity (allowedChar = allowedChar , fromAttribute = True )
267269 if entity :
268270 self .currentToken ["data" ][- 1 ][1 ] += entity
269271 else :
@@ -479,6 +481,11 @@ def beforeAttributeNameState(self):
479481 self .emitCurrentToken ()
480482 elif data == u"/" :
481483 self .processSolidusInTag ()
484+ elif data == u"'" or data == u'"' or data == u"=" :
485+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
486+ "invalid-character-in-attribute-name" })
487+ self .currentToken ["data" ].append ([data , "" ])
488+ self .state = self .states ["attributeName" ]
482489 elif data == EOF :
483490 self .tokenQueue .append ({"type" : "ParseError" , "data" :
484491 "expected-attribute-name-but-got-eof" })
@@ -508,6 +515,11 @@ def attributeNameState(self):
508515 elif data == u"/" :
509516 self .processSolidusInTag ()
510517 self .state = self .states ["beforeAttributeName" ]
518+ elif data == u"'" or data == u'"' :
519+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
520+ "invalid-character-in-attribute-name" })
521+ self .currentToken ["data" ][- 1 ][0 ] += data
522+ leavingThisState = False
511523 elif data == EOF :
512524 self .tokenQueue .append ({"type" : "ParseError" , "data" :
513525 "eof-in-attribute-name" })
@@ -570,6 +582,11 @@ def beforeAttributeValueState(self):
570582 self .state = self .states ["attributeValueSingleQuoted" ]
571583 elif data == u">" :
572584 self .emitCurrentToken ()
585+ elif data == u"=" :
586+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
587+ "equals-in-unquoted-attribute-value" })
588+ self .currentToken ["data" ][- 1 ][1 ] += data
589+ self .state = self .states ["attributeValueUnQuoted" ]
573590 elif data == EOF :
574591 self .tokenQueue .append ({"type" : "ParseError" , "data" :
575592 "expected-attribute-value-but-got-eof" })
@@ -582,9 +599,9 @@ def beforeAttributeValueState(self):
582599 def attributeValueDoubleQuotedState (self ):
583600 data = self .stream .char ()
584601 if data == "\" " :
585- self .state = self .states ["beforeAttributeName " ]
602+ self .state = self .states ["afterAttributeValue " ]
586603 elif data == u"&" :
587- self .processEntityInAttribute ()
604+ self .processEntityInAttribute (u'"' )
588605 elif data == EOF :
589606 self .tokenQueue .append ({"type" : "ParseError" , "data" :
590607 "eof-in-attribute-value-double-quote" })
@@ -597,9 +614,9 @@ def attributeValueDoubleQuotedState(self):
597614 def attributeValueSingleQuotedState (self ):
598615 data = self .stream .char ()
599616 if data == "'" :
600- self .state = self .states ["beforeAttributeName " ]
617+ self .state = self .states ["afterAttributeValue " ]
601618 elif data == u"&" :
602- self .processEntityInAttribute ()
619+ self .processEntityInAttribute (u"'" )
603620 elif data == EOF :
604621 self .tokenQueue .append ({"type" : "ParseError" , "data" :
605622 "eof-in-attribute-value-single-quote" })
@@ -614,16 +631,37 @@ def attributeValueUnQuotedState(self):
614631 if data in spaceCharacters :
615632 self .state = self .states ["beforeAttributeName" ]
616633 elif data == u"&" :
617- self .processEntityInAttribute ()
634+ self .processEntityInAttribute (None )
618635 elif data == u">" :
619636 self .emitCurrentToken ()
637+ elif data == u'"' or data == u"'" or data == u"=" :
638+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
639+ "unexpected-character-in-unquoted-attribute-value" })
640+ self .currentToken ["data" ][- 1 ][1 ] += data
620641 elif data == EOF :
621642 self .tokenQueue .append ({"type" : "ParseError" , "data" :
622643 "eof-in-attribute-value-no-quotes" })
623644 self .emitCurrentToken ()
624645 else :
625646 self .currentToken ["data" ][- 1 ][1 ] += data + self .stream .charsUntil ( \
626- frozenset (("&" , ">" ,"<" )) | spaceCharacters )
647+ frozenset (("&" , ">" , "<" , "=" , "'" , '"' )) | spaceCharacters )
648+ return True
649+
650+ def afterAttributeValueState (self ):
651+ data = self .stream .char ()
652+ if data in spaceCharacters :
653+ self .state = self .states ["beforeAttributeName" ]
654+ elif data == u">" :
655+ self .emitCurrentToken ()
656+ self .state = self .states ["data" ]
657+ elif data == u"/" :
658+ self .processSolidusInTag ()
659+ self .state = self .states ["beforeAttributeName" ]
660+ else :
661+ self .tokenQueue .append ({"type" : "ParseError" , "data" :
662+ "unexpected-character-after-attribute-value" })
663+ self .stream .unget (data )
664+ self .state = self .states ["beforeAttributeName" ]
627665 return True
628666
629667 def bogusCommentState (self ):
0 commit comments