Skip to content

Commit 259fcfd

Browse files
author
Steve Canny
committed
oxml: add XmlString for xml __eq__ override
lxml on Python 3 is not so predictable as to how it orders namespace declarations when serializing XML output. Because the unit tests generally rely on a string compare to confirm to XML elements are equal, this override on the OxmlBaseElement.xml.__eq__() is required to avoid false positives when namespace and/or attribute order differs but their contents do not.
1 parent 6de4426 commit 259fcfd

2 files changed

Lines changed: 133 additions & 1 deletion

File tree

docx/oxml/shared.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from lxml import etree
88

9+
import re
10+
911
from .exceptions import ValidationError
1012

1113

@@ -145,13 +147,79 @@ def serialize_for_reading(element):
145147
Serialize *element* to human-readable XML suitable for tests. No XML
146148
declaration.
147149
"""
148-
return etree.tostring(element, encoding='unicode', pretty_print=True)
150+
xml = etree.tostring(element, encoding='unicode', pretty_print=True)
151+
return XmlString(xml)
149152

150153

151154
def _SubElement(parent, tag):
152155
return etree.SubElement(parent, qn(tag), nsmap=nsmap)
153156

154157

158+
class XmlString(str):
159+
"""
160+
Provides string comparison override suitable for serialized XML that is
161+
useful for tests.
162+
"""
163+
164+
# ' <w:xyz xmlns:a="http://ns/decl/a" attr_name="val">text</w:xyz>'
165+
# | | || |
166+
# +----------+------------------------------------------++-----------+
167+
# front attrs | text
168+
# close
169+
170+
_xml_elm_line_patt = re.compile(
171+
'( *</?[\w:]+)(.*?)(/?>)([^<]*</\w+>)?'
172+
)
173+
174+
def __eq__(self, other):
175+
lines = self.splitlines()
176+
lines_other = other.splitlines()
177+
if len(lines) != len(lines_other):
178+
return False
179+
for line, line_other in zip(lines, lines_other):
180+
if not self._eq_elm_strs(line, line_other):
181+
return False
182+
return True
183+
184+
def __ne__(self, other):
185+
return not self.__eq__(other)
186+
187+
def _attr_seq(self, attrs):
188+
"""
189+
Return a sequence of attribute strings parsed from *attrs*. Each
190+
attribute string is stripped of whitespace on both ends.
191+
"""
192+
attrs = attrs.strip()
193+
attr_lst = attrs.split()
194+
return sorted(attr_lst)
195+
196+
def _eq_elm_strs(self, line, line_2):
197+
"""
198+
Return True if the element in *line_2* is XML equivalent to the
199+
element in *line*.
200+
"""
201+
front, attrs, close, text = self._parse_line(line)
202+
front_2, attrs_2, close_2, text_2 = self._parse_line(line_2)
203+
if front != front_2:
204+
return False
205+
if self._attr_seq(attrs) != self._attr_seq(attrs_2):
206+
return False
207+
if close != close_2:
208+
return False
209+
if text != text_2:
210+
return False
211+
return True
212+
213+
def _parse_line(self, line):
214+
"""
215+
Return front, attrs, close, text 4-tuple result of parsing XML element
216+
string *line*.
217+
"""
218+
match = self._xml_elm_line_patt.match(line)
219+
front, attrs, close, text = [match.group(n) for n in range(1, 5)]
220+
return front, attrs, close, text
221+
222+
155223
# ===========================================================================
156224
# shared custom element classes
157225
# ===========================================================================

tests/oxml/test_shared.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# encoding: utf-8
2+
3+
"""
4+
Test suite for docx.oxml.shared
5+
"""
6+
7+
from __future__ import (
8+
absolute_import, division, print_function, unicode_literals
9+
)
10+
11+
import pytest
12+
13+
from docx.oxml.shared import XmlString
14+
15+
16+
class DescribeXmlString(object):
17+
18+
def it_knows_if_two_xml_lines_are_equivalent(self, xml_line_case):
19+
line, other, differs = xml_line_case
20+
xml = XmlString(line)
21+
assert xml == other
22+
assert xml != differs
23+
24+
# fixtures ---------------------------------------------
25+
26+
@pytest.fixture(params=[
27+
'simple_elm', 'nsp_tagname', 'indent', 'attrs', 'nsdecl_order',
28+
'closing_elm',
29+
])
30+
def xml_line_case(self, request):
31+
cases = {
32+
'simple_elm': (
33+
'<name/>',
34+
'<name/>',
35+
'<name>',
36+
),
37+
'nsp_tagname': (
38+
'<xyz:name/>',
39+
'<xyz:name/>',
40+
'<abc:name/>',
41+
),
42+
'indent': (
43+
' <xyz:name/>',
44+
' <xyz:name/>',
45+
'<xyz:name/>',
46+
),
47+
'attrs': (
48+
' <abc:Name foo="bar" bar="foo">',
49+
' <abc:Name bar="foo" foo="bar">',
50+
' <abc:Name far="boo" foo="bar">',
51+
),
52+
'nsdecl_order': (
53+
' <name xmlns:a="http://ns/1" xmlns:b="http://ns/2"/>',
54+
' <name xmlns:b="http://ns/2" xmlns:a="http://ns/1"/>',
55+
' <name xmlns:b="http://ns/2" xmlns:a="http://ns/1">',
56+
),
57+
'closing_elm': (
58+
'</xyz:name>',
59+
'</xyz:name>',
60+
'<xyz:name>',
61+
),
62+
}
63+
line, other, differs = cases[request.param]
64+
return line, other, differs

0 commit comments

Comments
 (0)