66os .chdir (os .path .split (os .path .abspath (__file__ ))[0 ])
77sys .path .insert (0 , os .path .abspath (os .path .join (os .pardir , "src" )))
88
9+ #RELEASE remove
10+ import html5parser
11+ #Run tests over all treebuilders
12+ #XXX - it would be nice to automate finding all treebuilders or to allow running just one
13+
14+ import treebuilders
15+ #END RELEASE
16+
17+ #RELEASE add
18+ #import html5lib
19+ #from html5lib import html5parser
20+ #from html5lib.treebuilders import simpletree, etreefull, dom
21+ #END RELEASE
22+
923try :
1024 import simplejson
1125except :
@@ -17,5 +31,85 @@ def load(f):
1731 return eval (input .replace ('\r ' ,'' ))
1832 load = staticmethod (load )
1933
34+ #Build a dict of avaliable trees
35+ treeTypes = {"simpletree" :treebuilders .getTreeBuilder ("simpletree" ),
36+ "DOM" :treebuilders .getTreeBuilder ("dom" )}
37+
38+ #Try whatever etree implementations are avaliable from a list that are
39+ #"supposed" to work
40+ try :
41+ import xml .etree .ElementTree as ElementTree
42+ treeTypes ['ElementTree' ] = treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True )
43+ except ImportError :
44+ try :
45+ import elementtree .ElementTree as ElementTree
46+ treeTypes ['ElementTree' ] = treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True )
47+ except ImportError :
48+ pass
49+
50+ try :
51+ import xml .etree .cElementTree as cElementTree
52+ treeTypes ['cElementTree' ] = treebuilders .getTreeBuilder ("etree" , cElementTree , fullTree = True )
53+ except ImportError :
54+ try :
55+ import cElementTree
56+ treeTypes ['cElementTree' ] = treebuilders .getTreeBuilder ("etree" , cElementTree , fullTree = True )
57+ except ImportError :
58+ pass
59+
60+ try :
61+ import lxml .etree as lxml
62+ treeTypes ['lxml' ] = treebuilders .getTreeBuilder ("etree" , lxml , fullTree = True )
63+ except ImportError :
64+ pass
65+
66+ try :
67+ import BeautifulSoup
68+ treeTypes ["beautifulsoup" ] = treebuilders .getTreeBuilder ("beautifulsoup" , fullTree = True )
69+ except ImportError :
70+ pass
71+
2072def html5lib_test_files (subdirectory , files = '*.dat' ):
2173 return glob .glob (os .path .join (os .path .pardir ,os .path .pardir ,'testdata' ,subdirectory ,files ))
74+
75+ class TestData (object ):
76+ def __init__ (self , filename , sections ):
77+ self .f = open (filename )
78+ self .sections = sections
79+
80+ def __iter__ (self ):
81+ data = {}
82+ key = None
83+ for line in self .f :
84+ heading = self .isSectionHeading (line )
85+ if heading :
86+ if data and heading == self .sections [0 ]:
87+ #Remove trailing newline
88+ data [key ] = data [key ][:- 1 ]
89+ yield self .normaliseOutput (data )
90+ data = {}
91+ key = heading
92+ data [key ]= ""
93+ elif key is not None :
94+ data [key ] += line
95+ if data :
96+ yield self .normaliseOutput (data )
97+
98+ def isSectionHeading (self , line ):
99+ """If the current heading is a test section heading return the heading,
100+ otherwise return False"""
101+ line = line .strip ()
102+ if line .startswith ("#" ) and line [1 :] in self .sections :
103+ return line [1 :]
104+ else :
105+ return False
106+
107+ def normaliseOutput (self , data ):
108+ #Remove trailing newlines
109+ for key ,value in data .iteritems ():
110+ if value .endswith ("\n " ):
111+ data [key ] = value [:- 1 ]
112+ for heading in self .sections :
113+ if heading not in data :
114+ data [heading ] = None
115+ return data
0 commit comments