import support from html5lib.treebuilders import dom from html5lib.liberalxmlparser import * import unittest, re def sortattrs(match): name = match.group(1) attrs = re.findall('([-:\w]+)="([^"]*)"', match.group(2)) if not attrs: return "<%s%s%s>" % match.groups() attrs.sort() attrs = ' '.join(['%s="%s"' % (n,v) for n,v in attrs]) return "<%s %s%s>" % (name, attrs, match.group(3)) def ncr(match): return unichr(int(match.group(1))).encode('utf-8') xmlelem = re.compile(r'<(\w+)((?: [-:\w]+="[^"]*")+)(/?)>') class Xhtml5Test(unittest.TestCase): def assertXmlEquals(self, input, expected=None, parser=XMLParser): document = parser(tree=dom.TreeBuilder).parse(input).documentElement #print document.toxml('utf-8') if not expected: expected = xmlelem.sub(sortattrs, input) expected = re.sub('&#(\d+);', ncr, expected) output = xmlelem.sub(sortattrs, document.toxml('utf-8')) self.assertEquals(expected, output) else: self.assertEquals(expected, document.toxml('utf-8')) pass def assertXhtmlEquals(self, input, expected=None, parser=XHTMLParser): self.assertXmlEquals(input, expected, parser) class BasicXhtml5Test(Xhtml5Test): def test_title_body_mismatched_close(self): self.assertXhtmlEquals( 'Xhtmlcontent', '' 'Xhtml' + 'content' + '') def test_title_body_named_charref(self): self.assertXhtmlEquals( 'ntildeA ñ B', '' 'ntilde' + 'A '+ unichr(0xf1).encode('utf-8') + ' B' + '') class BasicXmlTest(Xhtml5Test): def test_comment(self): self.assertXmlEquals("") def test_cdata(self): self.assertXmlEquals("","foo") def test_simple_text(self): self.assertXmlEquals("

foo

","

foo

") def test_optional_close(self): self.assertXmlEquals("

foo","

foo

") def test_html_mismatched(self): self.assertXmlEquals("foo","foo") class OpmlTest(Xhtml5Test): def test_mixedCaseElement(self): self.assertXmlEquals( '' + 'Dave Winer' + '') def test_mixedCaseAttribute(self): self.assertXmlEquals( '' + '' + '') def test_malformed(self): self.assertXmlEquals( '' + '' + '', '' + '' + '',) class XhtmlTest(Xhtml5Test): def test_mathml(self): self.assertXhtmlEquals("""MathML x = - b ± b 2 - 4 a c 2 a """) def test_svg(self): self.assertXhtmlEquals("""SVG """) def test_xlink(self): self.assertXhtmlEquals("""XLINK """) def test_br(self): self.assertXhtmlEquals("""BR
""") def test_strong(self): self.assertXhtmlEquals("""STRONG """) def test_script(self): self.assertXhtmlEquals("""SCRIPT """) def test_script_src(self): self.assertXhtmlEquals(""" SCRIPT """) def test_title(self): self.assertXhtmlEquals("""1 < 2 & 3 """) def test_prolog(self): self.assertXhtmlEquals(""" PROLOG """, """PROLOG """) def test_tagsoup(self): self.assertXhtmlEquals( """ TAGSOUP

""", """TAGSOUP

""") def buildTestSuite(): return unittest.defaultTestLoader.loadTestsFromName(__name__) def main(): buildTestSuite() unittest.main() if __name__ == '__main__': main()