1313from xml .sax .saxutils import XMLGenerator , escape , unescape , quoteattr , \
1414 XMLFilterBase , prepare_input_source
1515from xml .sax .expatreader import create_parser
16- from xml .sax .handler import feature_namespaces , feature_external_ges
16+ from xml .sax .handler import (feature_namespaces , feature_external_ges ,
17+ LexicalHandler )
1718from xml .sax .xmlreader import InputSource , AttributesImpl , AttributesNSImpl
1819from io import BytesIO , StringIO
1920import codecs
@@ -1356,6 +1357,155 @@ def test_nsattrs_wattr(self):
13561357 self .assertEqual (attrs .getQNameByName ((ns_uri , "attr" )), "ns:attr" )
13571358
13581359
1360+ class LexicalHandlerTest (unittest .TestCase ):
1361+ def setUp (self ):
1362+ self .parser = None
1363+
1364+ self .specified_version = '1.0'
1365+ self .specified_encoding = 'UTF-8'
1366+ self .specified_doctype = 'wish'
1367+ self .specified_entity_names = ('nbsp' , 'source' , 'target' )
1368+ self .specified_comment = ('Comment in a DTD' ,
1369+ 'Really! You think so?' )
1370+ self .test_data = StringIO ()
1371+ self .test_data .write ('<?xml version="{}" encoding="{}"?>\n ' .
1372+ format (self .specified_version ,
1373+ self .specified_encoding ))
1374+ self .test_data .write ('<!DOCTYPE {} [\n ' .
1375+ format (self .specified_doctype ))
1376+ self .test_data .write ('<!-- {} -->\n ' .
1377+ format (self .specified_comment [0 ]))
1378+ self .test_data .write ('<!ELEMENT {} (to,from,heading,body,footer)>\n ' .
1379+ format (self .specified_doctype ))
1380+ self .test_data .write ('<!ELEMENT to (#PCDATA)>\n ' )
1381+ self .test_data .write ('<!ELEMENT from (#PCDATA)>\n ' )
1382+ self .test_data .write ('<!ELEMENT heading (#PCDATA)>\n ' )
1383+ self .test_data .write ('<!ELEMENT body (#PCDATA)>\n ' )
1384+ self .test_data .write ('<!ELEMENT footer (#PCDATA)>\n ' )
1385+ self .test_data .write ('<!ENTITY {} " ">\n ' .
1386+ format (self .specified_entity_names [0 ]))
1387+ self .test_data .write ('<!ENTITY {} "Written by: Alexander.">\n ' .
1388+ format (self .specified_entity_names [1 ]))
1389+ self .test_data .write ('<!ENTITY {} "Hope it gets to: Aristotle.">\n ' .
1390+ format (self .specified_entity_names [2 ]))
1391+ self .test_data .write (']>\n ' )
1392+ self .test_data .write ('<{}>' .format (self .specified_doctype ))
1393+ self .test_data .write ('<to>Aristotle</to>\n ' )
1394+ self .test_data .write ('<from>Alexander</from>\n ' )
1395+ self .test_data .write ('<heading>Supplication</heading>\n ' )
1396+ self .test_data .write ('<body>Teach me patience!</body>\n ' )
1397+ self .test_data .write ('<footer>&{};&{};&{};</footer>\n ' .
1398+ format (self .specified_entity_names [1 ],
1399+ self .specified_entity_names [0 ],
1400+ self .specified_entity_names [2 ]))
1401+ self .test_data .write ('<!-- {} -->\n ' .format (self .specified_comment [1 ]))
1402+ self .test_data .write ('</{}>\n ' .format (self .specified_doctype ))
1403+ self .test_data .seek (0 )
1404+
1405+ # Data received from handlers - to be validated
1406+ self .version = None
1407+ self .encoding = None
1408+ self .standalone = None
1409+ self .doctype = None
1410+ self .publicID = None
1411+ self .systemID = None
1412+ self .end_of_dtd = False
1413+ self .comments = []
1414+
1415+ def test_handlers (self ):
1416+ class TestLexicalHandler (LexicalHandler ):
1417+ def __init__ (self , test_harness , * args , ** kwargs ):
1418+ super ().__init__ (* args , ** kwargs )
1419+ self .test_harness = test_harness
1420+
1421+ def startDTD (self , doctype , publicID , systemID ):
1422+ self .test_harness .doctype = doctype
1423+ self .test_harness .publicID = publicID
1424+ self .test_harness .systemID = systemID
1425+
1426+ def endDTD (self ):
1427+ self .test_harness .end_of_dtd = True
1428+
1429+ def comment (self , text ):
1430+ self .test_harness .comments .append (text )
1431+
1432+ self .parser = create_parser ()
1433+ self .parser .setContentHandler (ContentHandler ())
1434+ self .parser .setProperty (
1435+ 'http://xml.org/sax/properties/lexical-handler' ,
1436+ TestLexicalHandler (self ))
1437+ source = InputSource ()
1438+ source .setCharacterStream (self .test_data )
1439+ self .parser .parse (source )
1440+ self .assertEqual (self .doctype , self .specified_doctype )
1441+ self .assertIsNone (self .publicID )
1442+ self .assertIsNone (self .systemID )
1443+ self .assertTrue (self .end_of_dtd )
1444+ self .assertEqual (len (self .comments ),
1445+ len (self .specified_comment ))
1446+ self .assertEqual (f' { self .specified_comment [0 ]} ' , self .comments [0 ])
1447+
1448+
1449+ class CDATAHandlerTest (unittest .TestCase ):
1450+ def setUp (self ):
1451+ self .parser = None
1452+ self .specified_chars = []
1453+ self .specified_chars .append (('Parseable character data' , False ))
1454+ self .specified_chars .append (('<> &% - assorted other XML junk.' , True ))
1455+ self .char_index = 0 # Used to index specified results within handlers
1456+ self .test_data = StringIO ()
1457+ self .test_data .write ('<root_doc>\n ' )
1458+ self .test_data .write ('<some_pcdata>\n ' )
1459+ self .test_data .write (f'{ self .specified_chars [0 ][0 ]} \n ' )
1460+ self .test_data .write ('</some_pcdata>\n ' )
1461+ self .test_data .write ('<some_cdata>\n ' )
1462+ self .test_data .write (f'<![CDATA[{ self .specified_chars [1 ][0 ]} ]]>\n ' )
1463+ self .test_data .write ('</some_cdata>\n ' )
1464+ self .test_data .write ('</root_doc>\n ' )
1465+ self .test_data .seek (0 )
1466+
1467+ # Data received from handlers - to be validated
1468+ self .chardata = []
1469+ self .in_cdata = False
1470+
1471+ def test_handlers (self ):
1472+ class TestLexicalHandler (LexicalHandler ):
1473+ def __init__ (self , test_harness , * args , ** kwargs ):
1474+ super ().__init__ (* args , ** kwargs )
1475+ self .test_harness = test_harness
1476+
1477+ def startCDATA (self ):
1478+ self .test_harness .in_cdata = True
1479+
1480+ def endCDATA (self ):
1481+ self .test_harness .in_cdata = False
1482+
1483+ class TestCharHandler (ContentHandler ):
1484+ def __init__ (self , test_harness , * args , ** kwargs ):
1485+ super ().__init__ (* args , ** kwargs )
1486+ self .test_harness = test_harness
1487+
1488+ def characters (self , content ):
1489+ if content != '\n ' :
1490+ h = self .test_harness
1491+ t = h .specified_chars [h .char_index ]
1492+ h .assertEqual (t [0 ], content )
1493+ h .assertEqual (t [1 ], h .in_cdata )
1494+ h .char_index += 1
1495+
1496+ self .parser = create_parser ()
1497+ self .parser .setContentHandler (TestCharHandler (self ))
1498+ self .parser .setProperty (
1499+ 'http://xml.org/sax/properties/lexical-handler' ,
1500+ TestLexicalHandler (self ))
1501+ source = InputSource ()
1502+ source .setCharacterStream (self .test_data )
1503+ self .parser .parse (source )
1504+
1505+ self .assertFalse (self .in_cdata )
1506+ self .assertEqual (self .char_index , 2 )
1507+
1508+
13591509def test_main ():
13601510 run_unittest (MakeParserTest ,
13611511 ParseTest ,
@@ -1368,7 +1518,10 @@ def test_main():
13681518 StreamReaderWriterXmlgenTest ,
13691519 ExpatReaderTest ,
13701520 ErrorReportingTest ,
1371- XmlReaderTest )
1521+ XmlReaderTest ,
1522+ LexicalHandlerTest ,
1523+ CDATAHandlerTest )
1524+
13721525
13731526if __name__ == "__main__" :
13741527 test_main ()
0 commit comments