|
1 | 1 | from test import test_support |
2 | 2 | import unittest |
3 | 3 | import codecs |
4 | | -import StringIO |
| 4 | +import sys, StringIO |
5 | 5 |
|
6 | 6 | class Queue(object): |
7 | 7 | """ |
@@ -453,6 +453,54 @@ def test_decode(self): |
453 | 453 | for uni, puny in punycode_testcases: |
454 | 454 | self.assertEquals(uni, puny.decode("punycode")) |
455 | 455 |
|
| 456 | +class UnicodeInternalTest(unittest.TestCase): |
| 457 | + def test_bug1251300(self): |
| 458 | + # Decoding with unicode_internal used to not correctly handle "code |
| 459 | + # points" above 0x10ffff on UCS-4 builds. |
| 460 | + if sys.maxunicode > 0xffff: |
| 461 | + ok = [ |
| 462 | + ("\x00\x10\xff\xff", u"\U0010ffff"), |
| 463 | + ("\x00\x00\x01\x01", u"\U00000101"), |
| 464 | + ("", u""), |
| 465 | + ] |
| 466 | + not_ok = [ |
| 467 | + "\x7f\xff\xff\xff", |
| 468 | + "\x80\x00\x00\x00", |
| 469 | + "\x81\x00\x00\x00", |
| 470 | + "\x00", |
| 471 | + "\x00\x00\x00\x00\x00", |
| 472 | + ] |
| 473 | + for internal, uni in ok: |
| 474 | + if sys.byteorder == "little": |
| 475 | + internal = "".join(reversed(internal)) |
| 476 | + self.assertEquals(uni, internal.decode("unicode_internal")) |
| 477 | + for internal in not_ok: |
| 478 | + if sys.byteorder == "little": |
| 479 | + internal = "".join(reversed(internal)) |
| 480 | + self.assertRaises(UnicodeDecodeError, internal.decode, |
| 481 | + "unicode_internal") |
| 482 | + |
| 483 | + def test_decode_error_attributes(self): |
| 484 | + if sys.maxunicode > 0xffff: |
| 485 | + try: |
| 486 | + "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") |
| 487 | + except UnicodeDecodeError, ex: |
| 488 | + self.assertEquals("unicode_internal", ex.encoding) |
| 489 | + self.assertEquals("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object) |
| 490 | + self.assertEquals(4, ex.start) |
| 491 | + self.assertEquals(8, ex.end) |
| 492 | + else: |
| 493 | + self.fail() |
| 494 | + |
| 495 | + def test_decode_callback(self): |
| 496 | + if sys.maxunicode > 0xffff: |
| 497 | + codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) |
| 498 | + decoder = codecs.getdecoder("unicode_internal") |
| 499 | + ab = u"ab".encode("unicode_internal") |
| 500 | + ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), |
| 501 | + "UnicodeInternalTest") |
| 502 | + self.assertEquals((u"ab", 12), ignored) |
| 503 | + |
456 | 504 | # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html |
457 | 505 | nameprep_tests = [ |
458 | 506 | # 3.1 Map to nothing. |
@@ -885,6 +933,7 @@ def test_main(): |
885 | 933 | EscapeDecodeTest, |
886 | 934 | RecodingTest, |
887 | 935 | PunycodeTest, |
| 936 | + UnicodeInternalTest, |
888 | 937 | NameprepTest, |
889 | 938 | CodecTest, |
890 | 939 | CodecsModuleTest, |
|
0 commit comments