@@ -573,93 +573,82 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
573573 if (PyObject_IsInstance (exc , PyExc_UnicodeEncodeError )) {
574574 PyObject * restuple ;
575575 PyObject * object ;
576+ Py_ssize_t i , o ;
576577 Py_ssize_t start ;
577578 Py_ssize_t end ;
578579 PyObject * res ;
579- Py_UNICODE * p ;
580- Py_UNICODE * startp ;
581- Py_UNICODE * outp ;
580+ unsigned char * outp ;
582581 int ressize ;
582+ Py_UCS4 ch ;
583583 if (PyUnicodeEncodeError_GetStart (exc , & start ))
584584 return NULL ;
585585 if (PyUnicodeEncodeError_GetEnd (exc , & end ))
586586 return NULL ;
587587 if (!(object = PyUnicodeEncodeError_GetObject (exc )))
588588 return NULL ;
589- startp = PyUnicode_AS_UNICODE (object );
590- for (p = startp + start , ressize = 0 ; p < startp + end ; ++ p ) {
591- if (* p < 10 )
589+ for (i = start , ressize = 0 ; i < end ; ++ i ) {
590+ /* object is guaranteed to be "ready" */
591+ ch = PyUnicode_READ_CHAR (object , i );
592+ if (ch < 10 )
592593 ressize += 2 + 1 + 1 ;
593- else if (* p < 100 )
594+ else if (ch < 100 )
594595 ressize += 2 + 2 + 1 ;
595- else if (* p < 1000 )
596+ else if (ch < 1000 )
596597 ressize += 2 + 3 + 1 ;
597- else if (* p < 10000 )
598+ else if (ch < 10000 )
598599 ressize += 2 + 4 + 1 ;
599- #ifndef Py_UNICODE_WIDE
600- else
601- ressize += 2 + 5 + 1 ;
602- #else
603- else if (* p < 100000 )
600+ else if (ch < 100000 )
604601 ressize += 2 + 5 + 1 ;
605- else if (* p < 1000000 )
602+ else if (ch < 1000000 )
606603 ressize += 2 + 6 + 1 ;
607604 else
608605 ressize += 2 + 7 + 1 ;
609- #endif
610606 }
611607 /* allocate replacement */
612- res = PyUnicode_FromUnicode ( NULL , ressize );
608+ res = PyUnicode_New ( ressize , 127 );
613609 if (res == NULL ) {
614610 Py_DECREF (object );
615611 return NULL ;
616612 }
613+ outp = PyUnicode_1BYTE_DATA (res );
617614 /* generate replacement */
618- for (p = startp + start , outp = PyUnicode_AS_UNICODE (res );
619- p < startp + end ; ++ p ) {
620- Py_UNICODE c = * p ;
615+ for (i = start , o = 0 ; i < end ; ++ i ) {
616+ ch = PyUnicode_READ_CHAR (object , i );
621617 int digits ;
622618 int base ;
623619 * outp ++ = '&' ;
624620 * outp ++ = '#' ;
625- if (* p < 10 ) {
621+ if (ch < 10 ) {
626622 digits = 1 ;
627623 base = 1 ;
628624 }
629- else if (* p < 100 ) {
625+ else if (ch < 100 ) {
630626 digits = 2 ;
631627 base = 10 ;
632628 }
633- else if (* p < 1000 ) {
629+ else if (ch < 1000 ) {
634630 digits = 3 ;
635631 base = 100 ;
636632 }
637- else if (* p < 10000 ) {
633+ else if (ch < 10000 ) {
638634 digits = 4 ;
639635 base = 1000 ;
640636 }
641- #ifndef Py_UNICODE_WIDE
642- else {
643- digits = 5 ;
644- base = 10000 ;
645- }
646- #else
647- else if (* p < 100000 ) {
637+ else if (ch < 100000 ) {
648638 digits = 5 ;
649639 base = 10000 ;
650640 }
651- else if (* p < 1000000 ) {
641+ else if (ch < 1000000 ) {
652642 digits = 6 ;
653643 base = 100000 ;
654644 }
655645 else {
656646 digits = 7 ;
657647 base = 1000000 ;
658648 }
659- #endif
660649 while (digits -- > 0 ) {
661- * outp ++ = '0' + c /base ;
662- c %= base ;
650+ * outp ++ = '0' + ch /base ;
651+ ch %= base ;
663652 base /= 10 ;
664653 }
665654 * outp ++ = ';' ;
@@ -677,58 +666,41 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
677666
678667PyObject * PyCodec_BackslashReplaceErrors (PyObject * exc )
679668{
680- #ifndef Py_UNICODE_WIDE
681- #define IS_SURROGATE_PAIR (p , end ) \
682- (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
683- *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
684- #else
685- #define IS_SURROGATE_PAIR (p , end ) 0
686- #endif
687669 if (PyObject_IsInstance (exc , PyExc_UnicodeEncodeError )) {
688670 PyObject * restuple ;
689671 PyObject * object ;
672+ Py_ssize_t i ;
690673 Py_ssize_t start ;
691674 Py_ssize_t end ;
692675 PyObject * res ;
693- Py_UNICODE * p ;
694- Py_UNICODE * startp ;
695- Py_UNICODE * outp ;
676+ unsigned char * outp ;
696677 int ressize ;
678+ Py_UCS4 c ;
697679 if (PyUnicodeEncodeError_GetStart (exc , & start ))
698680 return NULL ;
699681 if (PyUnicodeEncodeError_GetEnd (exc , & end ))
700682 return NULL ;
701683 if (!(object = PyUnicodeEncodeError_GetObject (exc )))
702684 return NULL ;
703- startp = PyUnicode_AS_UNICODE ( object );
704- for ( p = startp + start , ressize = 0 ; p < startp + end ; ++ p ) {
705- #ifdef Py_UNICODE_WIDE
706- if (* p >= 0x00010000 )
685+ for ( i = start , ressize = 0 ; i < end ; ++ i ) {
686+ /* object is guaranteed to be "ready" */
687+ c = PyUnicode_READ_CHAR ( object , i );
688+ if (c >= 0x10000 ) {
707689 ressize += 1 + 1 + 8 ;
708- else
709- #endif
710- if (* p >= 0x100 ) {
711- if (IS_SURROGATE_PAIR (p , startp + end )) {
712- ressize += 1 + 1 + 8 ;
713- ++ p ;
714- }
715- else
716- ressize += 1 + 1 + 4 ;
690+ }
691+ else if (c >= 0x100 ) {
692+ ressize += 1 + 1 + 4 ;
717693 }
718694 else
719695 ressize += 1 + 1 + 2 ;
720696 }
721- res = PyUnicode_FromUnicode ( NULL , ressize );
697+ res = PyUnicode_New ( ressize , 127 );
722698 if (res == NULL )
723699 return NULL ;
724- for (p = startp + start , outp = PyUnicode_AS_UNICODE (res );
725- p < startp + end ; ++ p ) {
726- Py_UCS4 c = ( Py_UCS4 ) * p ;
700+ for (i = start , outp = PyUnicode_1BYTE_DATA (res );
701+ i < end ; ++ i ) {
702+ c = PyUnicode_READ_CHAR ( object , i ) ;
727703 * outp ++ = '\\' ;
728- if (IS_SURROGATE_PAIR (p , startp + end )) {
729- c = ((* p & 0x3FF ) << 10 ) + (* (p + 1 ) & 0x3FF ) + 0x10000 ;
730- ++ p ;
731- }
732704 if (c >= 0x00010000 ) {
733705 * outp ++ = 'U' ;
734706 * outp ++ = Py_hexdigits [(c >>28 )& 0xf ];
@@ -758,7 +730,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
758730 wrong_exception_type (exc );
759731 return NULL ;
760732 }
761- #undef IS_SURROGATE_PAIR
762733}
763734
764735/* This handler is declared static until someone demonstrates
@@ -768,28 +739,27 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
768739{
769740 PyObject * restuple ;
770741 PyObject * object ;
742+ Py_ssize_t i ;
771743 Py_ssize_t start ;
772744 Py_ssize_t end ;
773745 PyObject * res ;
774746 if (PyObject_IsInstance (exc , PyExc_UnicodeEncodeError )) {
775- Py_UNICODE * p ;
776- Py_UNICODE * startp ;
777747 char * outp ;
778748 if (PyUnicodeEncodeError_GetStart (exc , & start ))
779749 return NULL ;
780750 if (PyUnicodeEncodeError_GetEnd (exc , & end ))
781751 return NULL ;
782752 if (!(object = PyUnicodeEncodeError_GetObject (exc )))
783753 return NULL ;
784- startp = PyUnicode_AS_UNICODE (object );
785754 res = PyBytes_FromStringAndSize (NULL , 3 * (end - start ));
786755 if (!res ) {
787756 Py_DECREF (object );
788757 return NULL ;
789758 }
790759 outp = PyBytes_AsString (res );
791- for (p = startp + start ; p < startp + end ; p ++ ) {
792- Py_UNICODE ch = * p ;
760+ for (i = start ; i < end ; i ++ ) {
761+ /* object is guaranteed to be "ready" */
762+ Py_UCS4 ch = PyUnicode_READ_CHAR (object , i );
793763 if (ch < 0xd800 || ch > 0xdfff ) {
794764 /* Not a surrogate, fail with original exception */
795765 PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
@@ -847,28 +817,27 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc)
847817{
848818 PyObject * restuple ;
849819 PyObject * object ;
820+ Py_ssize_t i ;
850821 Py_ssize_t start ;
851822 Py_ssize_t end ;
852823 PyObject * res ;
853824 if (PyObject_IsInstance (exc , PyExc_UnicodeEncodeError )) {
854- Py_UNICODE * p ;
855- Py_UNICODE * startp ;
856825 char * outp ;
857826 if (PyUnicodeEncodeError_GetStart (exc , & start ))
858827 return NULL ;
859828 if (PyUnicodeEncodeError_GetEnd (exc , & end ))
860829 return NULL ;
861830 if (!(object = PyUnicodeEncodeError_GetObject (exc )))
862831 return NULL ;
863- startp = PyUnicode_AS_UNICODE (object );
864832 res = PyBytes_FromStringAndSize (NULL , end - start );
865833 if (!res ) {
866834 Py_DECREF (object );
867835 return NULL ;
868836 }
869837 outp = PyBytes_AsString (res );
870- for (p = startp + start ; p < startp + end ; p ++ ) {
871- Py_UNICODE ch = * p ;
838+ for (i = start ; i < end ; i ++ ) {
839+ /* object is guaranteed to be "ready" */
840+ Py_UCS4 ch = PyUnicode_READ_CHAR (object , i );
872841 if (ch < 0xdc80 || ch > 0xdcff ) {
873842 /* Not a UTF-8b surrogate, fail with original exception */
874843 PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
0 commit comments