99
1010from constants import contentModelFlags , spaceCharacters
1111from constants import entitiesWindows1252 , entities , voidElements
12- from constants import asciiLowercase , asciiUppercase , asciiLetters
12+ from constants import asciiLowercase , asciiLetters
1313from constants import digits , hexDigits , EOF
1414
1515from inputstream import HTMLInputStream
@@ -104,6 +104,10 @@ def processSolidusInTag(self):
104104 self .tokenQueue .append ({"type" : "ParseError" , "data" :
105105 _ ("Solidus (/) incorrectly placed in tag." )})
106106
107+ # XML/XHTML enablement hook
108+ if self .currentToken ["type" ] == "StartTag" and data == u">" :
109+ self .currentToken ["type" ] = "EmptyTag"
110+
107111 # The character we just consumed need to be put back on the stack so it
108112 # doesn't get lost...
109113 self .stream .queue .append (data )
@@ -259,17 +263,10 @@ def emitCurrentToken(self):
259263 # internal usage.
260264
261265 token = self .currentToken
262- # For start tags convert attribute list into a distinct dictionary
263- if token ["type" ] == "StartTag" :
264- # We need to remove the duplicate attributes and convert attributes
265- # to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
266-
267- # AT When Python 2.4 is widespread we should use
268- # dict(reversed(token.data))
269- token ["data" ] = dict (token ["data" ][::- 1 ])
266+
270267 # If an end tag has attributes it's a parse error and they should
271268 # be removed
272- elif token ["type" ] == "EndTag" and token ["data" ]:
269+ if token ["type" ] == "EndTag" and token ["data" ]:
273270 self .tokenQueue .append ({"type" : "ParseError" , "data" :
274271 _ ("End tag contains unexpected attributes." )})
275272 token ["data" ] = {}
@@ -349,7 +346,7 @@ def tagOpenState(self):
349346 self .state = self .states ["closeTagOpen" ]
350347 elif data in asciiLetters :
351348 self .currentToken = \
352- {"type" : "StartTag" , "name" : data . lower () , "data" : []}
349+ {"type" : "StartTag" , "name" : data , "data" : []}
353350 self .state = self .states ["tagName" ]
354351 elif data == u">" :
355352 # XXX In theory it could be something besides a tag name. But
@@ -405,7 +402,7 @@ def closeTagOpenState(self):
405402 # the stack.
406403 self .stream .queue .extend (charStack )
407404
408- if self .currentToken ["name" ] == "" .join (charStack [:- 1 ]).lower () \
405+ if self .currentToken ["name" ]. lower () == "" .join (charStack [:- 1 ]).lower () \
409406 and charStack [- 1 ] in (spaceCharacters |
410407 frozenset ((u">" , u"/" , u"<" , EOF ))):
411408 # Because the characters are correct we can safely switch to
@@ -426,7 +423,7 @@ def closeTagOpenState(self):
426423 data = self .stream .char ()
427424 if data in asciiLetters :
428425 self .currentToken = \
429- {"type" : "EndTag" , "name" : data . lower () , "data" : []}
426+ {"type" : "EndTag" , "name" : data , "data" : []}
430427 self .state = self .states ["tagName" ]
431428 elif data == u">" :
432429 self .tokenQueue .append ({"type" : "ParseError" , "data" :
@@ -449,12 +446,9 @@ def tagNameState(self):
449446 data = self .stream .char ()
450447 if data in spaceCharacters :
451448 self .state = self .states ["beforeAttributeName" ]
452- elif data in asciiLowercase :
449+ elif data in asciiLetters :
453450 self .currentToken ["name" ] += data + \
454- self .stream .charsUntil (asciiLowercase , True )
455- elif data in asciiUppercase :
456- self .currentToken ["name" ] += data .lower () + \
457- self .stream .charsUntil (asciiLetters , True ).lower ()
451+ self .stream .charsUntil (asciiLetters , True )
458452 elif data == u">" :
459453 self .emitCurrentToken ()
460454 elif data == u"<" or data == EOF :
@@ -470,8 +464,8 @@ def beforeAttributeNameState(self):
470464 data = self .stream .char ()
471465 if data in spaceCharacters :
472466 self .stream .charsUntil (spaceCharacters , True )
473- elif data in asciiUppercase :
474- self .currentToken ["data" ].append ([data . lower () , "" ])
467+ elif data in asciiLetters :
468+ self .currentToken ["data" ].append ([data , "" ])
475469 self .state = self .states ["attributeName" ]
476470 elif data == u">" :
477471 self .emitCurrentToken ()
@@ -489,13 +483,9 @@ def attributeNameState(self):
489483 leavingThisState = True
490484 if data == u"=" :
491485 self .state = self .states ["beforeAttributeValue" ]
492- elif data in asciiLowercase :
486+ elif data in asciiLetters :
493487 self .currentToken ["data" ][- 1 ][0 ] += data + \
494- self .stream .charsUntil (asciiLowercase , True )
495- leavingThisState = False
496- elif data in asciiUppercase :
497- self .currentToken ["data" ][- 1 ][0 ] += data .lower () + \
498- self .stream .charsUntil (asciiLetters , True ).lower ()
488+ self .stream .charsUntil (asciiLetters , True )
499489 leavingThisState = False
500490 elif data == u">" :
501491 # XXX If we emit here the attributes are converted to a dict
@@ -535,8 +525,8 @@ def afterAttributeNameState(self):
535525 self .state = self .states ["beforeAttributeValue" ]
536526 elif data == u">" :
537527 self .emitCurrentToken ()
538- elif data in asciiUppercase :
539- self .currentToken ["data" ].append ([data . lower () , "" ])
528+ elif data in asciiLetters :
529+ self .currentToken ["data" ].append ([data , "" ])
540530 self .state = self .states ["attributeName" ]
541531 elif data == u"/" :
542532 self .processSolidusInTag ()
0 commit comments