2222import iso639codes
2323import rfc3987
2424import rfc2046
25- from html5lib .constants import E , spaceCharacters , digits
25+ from html5lib .constants import E , spaceCharacters , digits , tokenTypes
2626from html5lib import tokenizer
2727import gettext
2828_ = gettext .gettext
@@ -267,8 +267,9 @@ def __init__(self, stream, encoding, parseMeta, **kwargs):
267267 self .IDsWeHaveKnownAndLoved = []
268268
269269 def __iter__ (self ):
270+ types = dict ((v ,k ) for k ,v in tokenTypes .iteritems ())
270271 for token in _base .Filter .__iter__ (self ):
271- fakeToken = {"type" : token .get ("type" , "-" ),
272+ fakeToken = {"type" : types . get ( token .get ("type" , "-" ) , "-" ),
272273 "name" : token .get ("name" , "-" ).capitalize ()}
273274 method = getattr (self , "validate%(type)s%(name)s" % fakeToken , None )
274275 if method :
@@ -301,23 +302,23 @@ def validateStartTagInput(self, token):
301302 attrDict = dict ([(name .lower (), value ) for name , value in token .get ("data" , [])])
302303 inputType = attrDict .get ("type" , "text" )
303304 if inputType not in inputTypeAllowedAttributeMap .keys ():
304- yield {"type" : "ParseError" ,
305+ yield {"type" : tokenTypes [ "ParseError" ] ,
305306 "data" : "unknown-input-type" ,
306307 "datavars" : {"attrValue" : inputType }}
307308 allowedAttributes = inputTypeAllowedAttributeMap .get (inputType , [])
308309 for attrName , attrValue in attrDict .items ():
309310 if attrName not in allowedAttributeMap ['input' ]:
310- yield {"type" : "ParseError" ,
311+ yield {"type" : tokenTypes [ "ParseError" ] ,
311312 "data" : "unknown-attribute" ,
312313 "datavars" : {"tagName" : "input" ,
313314 "attributeName" : attrName }}
314315 elif attrName not in allowedAttributes :
315- yield {"type" : "ParseError" ,
316+ yield {"type" : tokenTypes [ "ParseError" ] ,
316317 "data" : "attribute-not-allowed-on-this-input-type" ,
317318 "datavars" : {"attributeName" : attrName ,
318319 "inputType" : inputType }}
319320 if attrName in inputTypeDeprecatedAttributeMap .get (inputType , []):
320- yield {"type" : "ParseError" ,
321+ yield {"type" : tokenTypes [ "ParseError" ] ,
321322 "data" : "deprecated-attribute" ,
322323 "datavars" : {"attributeName" : attrName ,
323324 "inputType" : inputType }}
@@ -330,7 +331,7 @@ def checkUnknownStartTag(self, token):
330331 # check for recognized tag name
331332 name = token .get ("name" , "" ).lower ()
332333 if name not in allowedAttributeMap .keys ():
333- yield {"type" : "ParseError" ,
334+ yield {"type" : tokenTypes [ "ParseError" ] ,
334335 "data" : "unknown-start-tag" ,
335336 "datavars" : {"tagName" : name }}
336337
@@ -342,7 +343,7 @@ def checkStartTagRequiredAttributes(self, token):
342343 in token .get ("data" , [])]
343344 for attrName in requiredAttributeMap [name ]:
344345 if attrName not in attrsPresent :
345- yield {"type" : "ParseError" ,
346+ yield {"type" : tokenTypes [ "ParseError" ] ,
346347 "data" : "missing-required-attribute" ,
347348 "datavars" : {"tagName" : name ,
348349 "attributeName" : attrName }}
@@ -353,7 +354,7 @@ def checkStartTagUnknownAttributes(self, token):
353354 allowedAttributes = globalAttributes | allowedAttributeMap .get (name , frozenset (()))
354355 for attrName , attrValue in token .get ("data" , []):
355356 if attrName .lower () not in allowedAttributes :
356- yield {"type" : "ParseError" ,
357+ yield {"type" : tokenTypes [ "ParseError" ] ,
357358 "data" : "unknown-attribute" ,
358359 "datavars" : {"tagName" : name ,
359360 "attributeName" : attrName }}
@@ -365,40 +366,40 @@ def checkStartTagUnknownAttributes(self, token):
365366# def checkURI(self, token, tagName, attrName, attrValue):
366367# isValid, errorCode = rfc3987.isValidURI(attrValue)
367368# if not isValid:
368- # yield {"type": "ParseError",
369+ # yield {"type": tokenTypes[ "ParseError"] ,
369370# "data": errorCode,
370371# "datavars": {"tagName": tagName,
371372# "attributeName": attrName}}
372- # yield {"type": "ParseError",
373+ # yield {"type": tokenTypes[ "ParseError"] ,
373374# "data": "invalid-attribute-value",
374375# "datavars": {"tagName": tagName,
375376# "attributeName": attrName}}
376377
377378 def checkIRI (self , token , tagName , attrName , attrValue ):
378379 isValid , errorCode = rfc3987 .isValidIRI (attrValue )
379380 if not isValid :
380- yield {"type" : "ParseError" ,
381+ yield {"type" : tokenTypes [ "ParseError" ] ,
381382 "data" : errorCode ,
382383 "datavars" : {"tagName" : tagName ,
383384 "attributeName" : attrName }}
384- yield {"type" : "ParseError" ,
385+ yield {"type" : tokenTypes [ "ParseError" ] ,
385386 "data" : "invalid-attribute-value" ,
386387 "datavars" : {"tagName" : tagName ,
387388 "attributeName" : attrName }}
388389
389390 def checkID (self , token , tagName , attrName , attrValue ):
390391 if not attrValue :
391- yield {"type" : "ParseError" ,
392+ yield {"type" : tokenTypes [ "ParseError" ] ,
392393 "data" : "attribute-value-can-not-be-blank" ,
393394 "datavars" : {"tagName" : tagName ,
394395 "attributeName" : attrName }}
395396 for c in attrValue :
396397 if c in spaceCharacters :
397- yield {"type" : "ParseError" ,
398+ yield {"type" : tokenTypes [ "ParseError" ] ,
398399 "data" : "space-in-id" ,
399400 "datavars" : {"tagName" : tagName ,
400401 "attributeName" : attrName }}
401- yield {"type" : "ParseError" ,
402+ yield {"type" : tokenTypes [ "ParseError" ] ,
402403 "data" : "invalid-attribute-value" ,
403404 "datavars" : {"tagName" : tagName ,
404405 "attributeName" : attrName }}
@@ -427,7 +428,7 @@ def checkTokenList(self, tagName, attrName, attrValue):
427428 valueDict = {}
428429 for currentValue in valueList :
429430 if valueDict .has_key (currentValue ):
430- yield {"type" : "ParseError" ,
431+ yield {"type" : tokenTypes [ "ParseError" ] ,
431432 "data" : "duplicate-value-in-token-list" ,
432433 "datavars" : {"tagName" : tagName ,
433434 "attributeName" : attrName ,
@@ -437,32 +438,32 @@ def checkTokenList(self, tagName, attrName, attrValue):
437438
438439 def checkEnumeratedValue (self , token , tagName , attrName , attrValue , enumeratedValues ):
439440 if not attrValue and ('' not in enumeratedValues ):
440- yield {"type" : "ParseError" ,
441+ yield {"type" : tokenTypes [ "ParseError" ] ,
441442 "data" : "attribute-value-can-not-be-blank" ,
442443 "datavars" : {"tagName" : tagName ,
443444 "attributeName" : attrName }}
444445 return
445446 attrValue = attrValue .lower ()
446447 if attrValue not in enumeratedValues :
447- yield {"type" : "ParseError" ,
448+ yield {"type" : tokenTypes [ "ParseError" ] ,
448449 "data" : "invalid-enumerated-value" ,
449450 "datavars" : {"tagName" : tagName ,
450451 "attributeName" : attrName ,
451452 "enumeratedValues" : tuple (enumeratedValues )}}
452- yield {"type" : "ParseError" ,
453+ yield {"type" : tokenTypes [ "ParseError" ] ,
453454 "data" : "invalid-attribute-value" ,
454455 "datavars" : {"tagName" : tagName ,
455456 "attributeName" : attrName }}
456457
457458 def checkBoolean (self , token , tagName , attrName , attrValue ):
458459 enumeratedValues = frozenset ((attrName , '' ))
459460 if attrValue not in enumeratedValues :
460- yield {"type" : "ParseError" ,
461+ yield {"type" : tokenTypes [ "ParseError" ] ,
461462 "data" : "invalid-boolean-value" ,
462463 "datavars" : {"tagName" : tagName ,
463464 "attributeName" : attrName ,
464465 "enumeratedValues" : tuple (enumeratedValues )}}
465- yield {"type" : "ParseError" ,
466+ yield {"type" : tokenTypes [ "ParseError" ] ,
466467 "data" : "invalid-attribute-value" ,
467468 "datavars" : {"tagName" : tagName ,
468469 "attributeName" : attrName }}
@@ -471,7 +472,7 @@ def checkInteger(self, token, tagName, attrName, attrValue):
471472 sign = 1
472473 numberString = ''
473474 state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
474- error = {"type" : "ParseError" ,
475+ error = {"type" : tokenTypes [ "ParseError" ] ,
475476 "data" : "invalid-integer-value" ,
476477 "datavars" : {"tagName" : tagName ,
477478 "attributeName" : attrName ,
@@ -503,7 +504,7 @@ def checkInteger(self, token, tagName, attrName, attrValue):
503504 elif state == 'trailing-junk' :
504505 pass
505506 if not numberString :
506- yield {"type" : "ParseError" ,
507+ yield {"type" : tokenTypes [ "ParseError" ] ,
507508 "data" : "attribute-value-can-not-be-blank" ,
508509 "datavars" : {"tagName" : tagName ,
509510 "attributeName" : attrName }}
@@ -517,15 +518,15 @@ def checkBrowsingContext(self, token, tagName, attrName, attrValue):
517518 if attrValue [0 ] != '_' : return
518519 attrValue = attrValue .lower ()
519520 if attrValue in frozenset (('_self' , '_parent' , '_top' , '_blank' )): return
520- yield {"type" : "ParseError" ,
521+ yield {"type" : tokenTypes [ "ParseError" ] ,
521522 "data" : "invalid-browsing-context" ,
522523 "datavars" : {"tagName" : tagName ,
523524 "attributeName" : attrName }}
524525
525526 def checkLangCode (self , token , tagName , attrName , attrValue ):
526527 if not attrValue : return # blank is OK
527528 if not iso639codes .isValidLangCode (attrValue ):
528- yield {"type" : "ParseError" ,
529+ yield {"type" : tokenTypes [ "ParseError" ] ,
529530 "data" : "invalid-lang-code" ,
530531 "datavars" : {"tagName" : tagName ,
531532 "attributeName" : attrName ,
@@ -534,13 +535,13 @@ def checkLangCode(self, token, tagName, attrName, attrValue):
534535 def checkMIMEType (self , token , tagName , attrName , attrValue ):
535536 # XXX needs tests
536537 if not attrValue :
537- yield {"type" : "ParseError" ,
538+ yield {"type" : tokenTypes [ "ParseError" ] ,
538539 "data" : "attribute-value-can-not-be-blank" ,
539540 "datavars" : {"tagName" : tagName ,
540541 "attributeName" : attrName }}
541542
542543 if not rfc2046 .isValidMIMEType (attrValue ):
543- yield {"type" : "ParseError" ,
544+ yield {"type" : tokenTypes [ "ParseError" ] ,
544545 "data" : "invalid-mime-type" ,
545546 "datavars" : {"tagName" : tagName ,
546547 "attributeName" : attrName ,
@@ -556,7 +557,7 @@ def checkLinkRelation(self, token, tagName, attrName, attrValue):
556557 allowedValues = (tagName == 'link' ) and linkRelValues or aRelValues
557558 for currentValue in valueList :
558559 if currentValue not in allowedValues :
559- yield {"type" : "ParseError" ,
560+ yield {"type" : tokenTypes [ "ParseError" ] ,
560561 "data" : "invalid-rel" ,
561562 "datavars" : {"tagName" : tagName ,
562563 "attributeName" : attrName }}
@@ -593,7 +594,7 @@ def checkAttributeValues(self, token):
593594 def validateAttributeValueClass (self , token , tagName , attrName , attrValue ):
594595 for t in self .checkTokenList (tagName , attrName , attrValue ) or []:
595596 yield t
596- yield {"type" : "ParseError" ,
597+ yield {"type" : tokenTypes [ "ParseError" ] ,
597598 "data" : "invalid-attribute-value" ,
598599 "datavars" : {"tagName" : tagName ,
599600 "attributeName" : attrName }}
@@ -623,7 +624,7 @@ def validateAttributeValueId(self, token, tagName, attrName, attrValue):
623624 for t in self .checkID (token , tagName , attrName , attrValue ) or []: yield t
624625 if not attrValue : return
625626 if attrValue in self .IDsWeHaveKnownAndLoved :
626- yield {"type" : "ParseError" ,
627+ yield {"type" : tokenTypes [ "ParseError" ] ,
627628 "data" : "duplicate-id" ,
628629 "datavars" : {"tagName" : tagName }}
629630 self .IDsWeHaveKnownAndLoved .append (attrValue )
@@ -641,7 +642,7 @@ def validateAttributeValueTemplate(self, token, tagName, attrName, attrValue):
641642
642643 def validateAttributeValueHtmlXmlns (self , token , tagName , attrName , attrValue ):
643644 if attrValue != "http://www.w3.org/1999/xhtml" :
644- yield {"type" : "ParseError" ,
645+ yield {"type" : tokenTypes [ "ParseError" ] ,
645646 "data" : "invalid-root-namespace" ,
646647 "datavars" : {"tagName" : tagName ,
647648 "attributeName" : attrName }}
@@ -699,7 +700,7 @@ def eof(self):
699700 # hooray for obscure side effects!
700701 attrValue = attrsDict .get ("contextmenu" , "" )
701702 if attrValue and (attrValue not in self .IDsWeHaveKnownAndLoved ):
702- yield {"type" : "ParseError" ,
703+ yield {"type" : tokenTypes [ "ParseError" ] ,
703704 "data" : "id-does-not-exist" ,
704705 "datavars" : {"tagName" : tagName ,
705706 "attributeName" : "contextmenu" ,
@@ -710,6 +711,6 @@ def eof(self):
710711 if not id : continue
711712 if id == attrValue :
712713 if refToken .get ("name" , "" ).lower () != "menu" :
713- yield {"type" : "ParseError" ,
714+ yield {"type" : tokenTypes [ "ParseError" ] ,
714715 "data" : "contextmenu-must-point-to-menu" }
715716 break
0 commit comments