1010
1111from lxml import etree
1212
13- from docx .oxml import oxml_parser
13+ from docx .oxml import oxml_parser , parse_xml , register_custom_element_class
14+ from docx .oxml .shared import OxmlBaseElement
1415
1516
1617class DescribeOxmlParser (object ):
@@ -32,3 +33,47 @@ def whitespace_fixture(self):
3233 )
3334 stripped_xml_text = '<foø><bår>text</bår></foø>'
3435 return pretty_xml_text , stripped_xml_text
36+
37+
38+ class DescribeParseXml (object ):
39+
40+ def it_accepts_bytes_and_assumes_utf8_encoding (self , xml_bytes ):
41+ parse_xml (xml_bytes )
42+
43+ def it_accepts_unicode_providing_there_is_no_encoding_declaration (self ):
44+ non_enc_decl = '<?xml version="1.0" standalone="yes"?>'
45+ enc_decl = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
46+ xml_body = '<foo><bar>føøbår</bar></foo>'
47+ # unicode body by itself doesn't raise
48+ parse_xml (xml_body )
49+ # adding XML decl without encoding attr doesn't raise either
50+ xml_text = '%s\n %s' % (non_enc_decl , xml_body )
51+ parse_xml (xml_text )
52+ # but adding encoding in the declaration raises ValueError
53+ xml_text = '%s\n %s' % (enc_decl , xml_body )
54+ with pytest .raises (ValueError ):
55+ parse_xml (xml_text )
56+
57+ def it_uses_registered_element_classes (self , xml_bytes ):
58+ register_custom_element_class ('a:foo' , CustElmCls )
59+ element = parse_xml (xml_bytes )
60+ assert isinstance (element , CustElmCls )
61+
62+ # fixture components ---------------------------------------------
63+
64+ @pytest .fixture
65+ def xml_bytes (self ):
66+ return (
67+ '<a:foo xmlns:a="http://schemas.openxmlformats.org/drawingml/200'
68+ '6/main">\n '
69+ ' <a:bar>foøbår</a:bar>\n '
70+ '</a:foo>\n '
71+ ).encode ('utf-8' )
72+
73+
74+ # ===========================================================================
75+ # static fixture
76+ # ===========================================================================
77+
78+ class CustElmCls (OxmlBaseElement ):
79+ pass
0 commit comments