Skip to content

Commit 85abe74

Browse files
author
j.s@google.com
committed
Added unit tests for character encoding combinations used in the v2 atom core.
1 parent 7db56ef commit 85abe74

2 files changed

Lines changed: 113 additions & 8 deletions

File tree

src/atom/core.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,14 @@
3535
from elementtree import ElementTree
3636

3737

38+
STRING_ENCODING = 'utf-8'
39+
40+
3841
class XmlElement(object):
42+
"""Represents an element node in an XML document.
43+
44+
The text member is a UTF-8 encoded str.
45+
"""
3946
_qname = None
4047
_other_elements = None
4148
_other_attributes = None
@@ -243,12 +250,12 @@ def _harvest_tree(self, tree, version=1):
243250
if tree.text:
244251
self.text = tree.text
245252

246-
def _to_tree(self, version=1):
253+
def _to_tree(self, version=1, encoding=None):
247254
new_tree = ElementTree.Element(_get_qname(self, version))
248-
self._attach_members(new_tree, version)
255+
self._attach_members(new_tree, version, encoding)
249256
return new_tree
250257

251-
def _attach_members(self, tree, version=1):
258+
def _attach_members(self, tree, version=1, encoding=None):
252259
"""Convert members to XML elements/attributes and add them to the tree.
253260
254261
Args:
@@ -282,11 +289,16 @@ def _attach_members(self, tree, version=1):
282289
for key, value in self._other_attributes.iteritems():
283290
tree.attrib[key] = value
284291
if self.text:
285-
tree.text = self.text
292+
if isinstance(self.text, unicode):
293+
tree.text = self.text
294+
elif encoding is not None:
295+
tree.text = self.text.decode(encoding)
296+
else:
297+
tree.text = self.text.decode(STRING_ENCODING)
286298

287-
def to_string(self, version=1):
299+
def to_string(self, version=1, encoding=None):
288300
"""Converts this object to XML."""
289-
return ElementTree.tostring(self._to_tree(version))
301+
return ElementTree.tostring(self._to_tree(version, encoding))
290302

291303
ToString = to_string
292304

@@ -356,7 +368,7 @@ def _qname_matches(tag, namespace, qname):
356368

357369

358370
def xml_element_from_string(xml_string, target_class,
359-
version=1, encoding='UTF-8'):
371+
version=1, encoding=None):
360372
"""Parses the XML string according to the rules for the target_class.
361373
362374
Args:
@@ -365,6 +377,11 @@ def xml_element_from_string(xml_string, target_class,
365377
version: int (optional) The version of the schema which should be used when
366378
converting the XML into an object. The default is 1.
367379
"""
380+
if isinstance(xml_string, unicode):
381+
if encoding is None:
382+
xml_string = xml_string.encode(STRING_ENCODING)
383+
else:
384+
xml_string = xml_string.encode(encoding)
368385
tree = ElementTree.fromstring(xml_string)
369386
return _xml_element_from_tree(tree, target_class, version)
370387

tests/atom_tests/core_test.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,97 @@ def testMatchQnames(self):
303303
'foo', '', 'bar') == False)
304304

305305

306+
class Chars(atom.core.XmlElement):
307+
_qname = u'{http://example.com/}chars'
308+
y = 'y'
309+
alpha = 'a'
310+
311+
312+
class Strs(atom.core.XmlElement):
313+
_qname = '{http://example.com/}strs'
314+
chars = [Chars]
315+
delta = u'd'
316+
317+
318+
def parse(string):
319+
return atom.core.xml_element_from_string(string, atom.core.XmlElement)
320+
321+
322+
def create(tag, string):
323+
element = atom.core.XmlElement(text=string)
324+
element._qname = tag
325+
return element
326+
327+
328+
class CharacterEncodingTest(unittest.TestCase):
329+
330+
def testUnicodeInputString(self):
331+
# Test parsing the inner text.
332+
self.assertEqual(parse(u'<x>&#948;</x>').text, u'\u03b4')
333+
self.assertEqual(parse(u'<x>\u03b4</x>').text, u'\u03b4')
334+
335+
# Test output valid XML.
336+
self.assertEqual(parse(u'<x>&#948;</x>').to_string(), '<x>&#948;</x>')
337+
self.assertEqual(parse(u'<x>\u03b4</x>').to_string(), '<x>&#948;</x>')
338+
339+
# Test setting the inner text and output valid XML.
340+
e = create(u'x', u'\u03b4')
341+
self.assertEqual(e.to_string(), '<x>&#948;</x>')
342+
self.assertEqual(e.text, u'\u03b4')
343+
self.assertTrue(isinstance(e.text, unicode))
344+
self.assertEqual(create(u'x', '\xce\xb4'.decode('utf-8')).to_string(),
345+
'<x>&#948;</x>')
346+
347+
348+
def testUtf8InputString(self):
349+
# Test parsing inner text.
350+
self.assertEqual(parse('<x>&#948;</x>').text, u'\u03b4')
351+
self.assertEqual(parse(u'<x>\u03b4</x>'.encode('utf-8')).text, u'\u03b4')
352+
self.assertEqual(parse('<x>\xce\xb4</x>').text, u'\u03b4')
353+
354+
# Test output valid XML.
355+
self.assertEqual(parse('<x>&#948;</x>').to_string(), '<x>&#948;</x>')
356+
self.assertEqual(parse(u'<x>\u03b4</x>'.encode('utf-8')).to_string(),
357+
'<x>&#948;</x>')
358+
self.assertEqual(parse('<x>\xce\xb4</x>').to_string(), '<x>&#948;</x>')
359+
360+
# Test setting the inner text and output valid XML.
361+
e = create('x', '\xce\xb4')
362+
self.assertEqual(e.to_string(), '<x>&#948;</x>')
363+
# Don't change the encoding until the we convert to an XML string.
364+
self.assertEqual(e.text, '\xce\xb4')
365+
self.assertTrue(isinstance(e.text, str))
366+
self.assertTrue(isinstance(e.to_string(), str))
367+
self.assertEqual(create('x', u'\u03b4'.encode('utf-8')).to_string(),
368+
'<x>&#948;</x>')
369+
370+
371+
372+
def testOtherEncodingOnInputString(self):
373+
# Test parsing inner text.
374+
self.assertEqual(parse(u'<x>\u03b4</x>'.encode('utf-16')).text, u'\u03b4')
375+
376+
# Test output valid XML.
377+
self.assertEqual(parse(u'<x>\u03b4</x>'.encode('utf-16')).to_string(),
378+
'<x>&#948;</x>')
379+
380+
# Test setting the inner text and output valid XML.
381+
e = create('x', u'\u03b4'.encode('utf-16'))
382+
self.assertEqual(e.to_string(encoding='utf-16'), '<x>&#948;</x>')
383+
# Don't change the encoding until the we convert to an XML string.
384+
self.assertEqual(e.text, '\xff\xfe\xb4\x03')
385+
self.assertTrue(isinstance(e.text, str))
386+
self.assertTrue(isinstance(e.to_string(encoding='utf-16'), str))
387+
self.assertEqual(
388+
create('x', '\xff\xfe\xb4\x03').to_string(encoding='utf-16'),
389+
'<x>&#948;</x>')
390+
391+
392+
306393
def suite():
307394
return unittest.TestSuite((unittest.makeSuite(XmlElementTest, 'test'),
308-
unittest.makeSuite(UtilityFunctionTest, 'test')))
395+
unittest.makeSuite(UtilityFunctionTest, 'test'),
396+
unittest.makeSuite(CharacterEncodingTest, 'test'),))
309397

310398

311399
if __name__ == '__main__':

0 commit comments

Comments
 (0)