Skip to content

Commit 094ad8b

Browse files
author
api.jscudder
committed
Adding dbrattli's patch for improving unicode support in the XML object members.
1 parent a3f0109 commit 094ad8b

2 files changed

Lines changed: 60 additions & 16 deletions

File tree

src/atom/__init__.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,11 @@
6565
# This encoding is used for converting strings before translating the XML
6666
# into an object.
6767
XML_STRING_ENCODING = 'utf-8'
68-
# The desired string encoding for object members.
68+
# The desired string encoding for object members. set or monkey-patch to
69+
# unicode if you want object members to be Python unicode strings, instead of
70+
# encoded strings
6971
MEMBER_STRING_ENCODING = 'utf-8'
70-
72+
#MEMBER_STRING_ENCODING = unicode
7173

7274
def CreateClassFromXMLString(target_class, xml_string, string_encoding=None):
7375
"""Creates an instance of the target class from the string contents.
@@ -147,7 +149,10 @@ def _HarvestElementTree(self, tree):
147149
self._ConvertElementAttributeToMember(attribute, value)
148150
# Encode the text string according to the desired encoding type. (UTF-8)
149151
if tree.text:
150-
self.text = tree.text.encode(MEMBER_STRING_ENCODING)
152+
if MEMBER_STRING_ENCODING is unicode:
153+
self.text = tree.text
154+
else:
155+
self.text = tree.text.encode(MEMBER_STRING_ENCODING)
151156

152157
def _ConvertElementTreeToMember(self, child_tree, current_class=None):
153158
self.extension_elements.append(_ExtensionElementFromElementTree(
@@ -156,7 +161,10 @@ def _ConvertElementTreeToMember(self, child_tree, current_class=None):
156161
def _ConvertElementAttributeToMember(self, attribute, value):
157162
# Encode the attribute value's string with the desired type Default UTF-8
158163
if value:
159-
self.extension_attributes[attribute] = value.encode(
164+
if MEMBER_STRING_ENCODING is unicode:
165+
self.extension_attributes[attribute] = value
166+
else:
167+
self.extension_attributes[attribute] = value.encode(
160168
MEMBER_STRING_ENCODING)
161169

162170
# One method to create an ElementTree from an object
@@ -165,15 +173,16 @@ def _AddMembersToElementTree(self, tree):
165173
child._BecomeChildElement(tree)
166174
for attribute, value in self.extension_attributes.iteritems():
167175
if value:
168-
# Decode the value from the desired encoding (default UTF-8).
169-
if not isinstance(value, unicode):
170-
tree.attrib[attribute] = value.decode(MEMBER_STRING_ENCODING)
171-
else:
176+
if isinstance(value, unicode) or MEMBER_STRING_ENCODING is unicode:
172177
tree.attrib[attribute] = value
173-
if self.text and not isinstance(self.text, unicode):
174-
tree.text = self.text.decode(MEMBER_STRING_ENCODING)
175-
else:
176-
tree.text = self.text
178+
else:
179+
# Decode the value from the desired encoding (default UTF-8).
180+
tree.attrib[attribute] = value.decode(MEMBER_STRING_ENCODING)
181+
if self.text:
182+
if isinstance(self.text, unicode) or MEMBER_STRING_ENCODING is unicode:
183+
tree.text = self.text
184+
else:
185+
tree.text = self.text.decode(MEMBER_STRING_ENCODING)
177186

178187
def FindExtensions(self, tag=None, namespace=None):
179188
"""Searches extension elements for child nodes with the desired name.
@@ -252,7 +261,10 @@ def _ConvertElementAttributeToMember(self, attribute, value):
252261
# desired value (using self.__dict__).
253262
if value:
254263
# Encode the string to capture non-ascii characters (default UTF-8)
255-
setattr(self, self.__class__._attributes[attribute],
264+
if MEMBER_STRING_ENCODING is unicode:
265+
setattr(self, self.__class__._attributes[attribute], value)
266+
else:
267+
setattr(self, self.__class__._attributes[attribute],
256268
value.encode(MEMBER_STRING_ENCODING))
257269
else:
258270
ExtensionContainer._ConvertElementAttributeToMember(self, attribute,
@@ -278,10 +290,10 @@ def _AddMembersToElementTree(self, tree):
278290
for xml_attribute, member_name in self.__class__._attributes.iteritems():
279291
member = getattr(self, member_name)
280292
if member is not None:
281-
if not isinstance(member, unicode):
282-
tree.attrib[xml_attribute] = member.decode(MEMBER_STRING_ENCODING)
283-
else:
293+
if isinstance(member, unicode) or MEMBER_STRING_ENCODING is unicode:
284294
tree.attrib[xml_attribute] = member
295+
else:
296+
tree.attrib[xml_attribute] = member.decode(MEMBER_STRING_ENCODING)
285297
# Lastly, call the ExtensionContainers's _AddMembersToElementTree to
286298
# convert any extension attributes.
287299
ExtensionContainer._AddMembersToElementTree(self, tree)

tests/atom_test.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,38 @@ def testMemberStringEncoding(self):
590590
self.assert_(atom_entry.title.type == u'\u03B1\u03BB\u03C6\u03B1'.encode('utf-8'))
591591
self.assert_(atom_entry.title.text == u'\u03B1\u03BB\u03C6\u03B1'.encode('utf-8'))
592592

593+
# Setting object members to unicode strings is supported even if
594+
# MEMBER_STRING_ENCODING is set 'utf-8' (should it be?)
595+
atom_entry.title.type = u'\u03B1\u03BB\u03C6\u03B1'
596+
xml = atom_entry.ToString()
597+
self.assert_(u'\u03B1\u03BB\u03C6\u03B1'.encode('utf-8') in xml)
598+
599+
# Make sure that we can use plain text when MEMBER_STRING_ENCODING is utf8
600+
atom_entry.title.type = "plain text"
601+
atom_entry.title.text = "more text"
602+
xml = atom_entry.ToString()
603+
self.assert_("plain text" in xml)
604+
self.assert_("more text" in xml)
605+
606+
# Test something else than utf-8
607+
atom.MEMBER_STRING_ENCODING = 'iso8859_7'
608+
atom_entry = atom.EntryFromString(self.test_xml)
609+
self.assert_(atom_entry.title.type == u'\u03B1\u03BB\u03C6\u03B1'.encode('iso8859_7'))
610+
self.assert_(atom_entry.title.text == u'\u03B1\u03BB\u03C6\u03B1'.encode('iso8859_7'))
611+
612+
# Test using unicode strings directly for object members
613+
atom.MEMBER_STRING_ENCODING = unicode
614+
atom_entry = atom.EntryFromString(self.test_xml)
615+
self.assert_(atom_entry.title.type == u'\u03B1\u03BB\u03C6\u03B1')
616+
self.assert_(atom_entry.title.text == u'\u03B1\u03BB\u03C6\u03B1')
617+
618+
# Make sure that we can use plain text when MEMBER_STRING_ENCODING is unicode
619+
atom_entry.title.type = "plain text"
620+
atom_entry.title.text = "more text"
621+
xml = atom_entry.ToString()
622+
self.assert_("plain text" in xml)
623+
self.assert_("more text" in xml)
624+
593625
def testConvertExampleXML(self):
594626
try:
595627
entry = atom.CreateClassFromXMLString(atom.Entry, test_data.GBASE_STRING_ENCODING_ENTRY)

0 commit comments

Comments
 (0)