Skip to content

Commit 1db7c13

Browse files
committed
Port encoders from Py_UNICODE API to unicode object API.
1 parent df8077e commit 1db7c13

3 files changed

Lines changed: 249 additions & 264 deletions

File tree

Include/unicodeobject.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
10881088
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
10891089
const char *errors /* error handling */
10901090
);
1091+
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
1092+
PyObject *unicode, /* Unicode object */
1093+
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
1094+
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
1095+
const char *errors /* error handling */
1096+
);
10911097
#endif
10921098

10931099
/* --- UTF-8 Codecs ------------------------------------------------------- */
@@ -1195,6 +1201,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
11951201
const char *errors, /* error handling */
11961202
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
11971203
);
1204+
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
1205+
PyObject *object, /* Unicode object */
1206+
const char *errors, /* error handling */
1207+
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1208+
);
11981209
#endif
11991210

12001211
/* --- UTF-16 Codecs ------------------------------------------------------ */
@@ -1275,6 +1286,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
12751286
const char *errors, /* error handling */
12761287
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
12771288
);
1289+
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
1290+
PyObject* unicode, /* Unicode object */
1291+
const char *errors, /* error handling */
1292+
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1293+
);
12781294
#endif
12791295

12801296
/* --- Unicode-Escape Codecs ---------------------------------------------- */

Modules/_codecsmodule.c

Lines changed: 46 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -235,8 +235,10 @@ unicode_internal_decode(PyObject *self,
235235
return NULL;
236236

237237
if (PyUnicode_Check(obj)) {
238+
if (PyUnicode_READY(obj) < 0)
239+
return NULL;
238240
Py_INCREF(obj);
239-
return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
241+
return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
240242
}
241243
else {
242244
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
@@ -676,10 +678,12 @@ unicode_internal_encode(PyObject *self,
676678
return NULL;
677679

678680
if (PyUnicode_Check(obj)) {
681+
if (PyUnicode_READY(obj) < 0)
682+
return NULL;
679683
data = PyUnicode_AS_DATA(obj);
680684
size = PyUnicode_GET_DATA_SIZE(obj);
681685
return codec_tuple(PyBytes_FromStringAndSize(data, size),
682-
PyUnicode_GET_SIZE(obj));
686+
PyUnicode_GET_LENGTH(obj));
683687
}
684688
else {
685689
if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
@@ -700,14 +704,10 @@ utf_7_encode(PyObject *self,
700704
return NULL;
701705

702706
str = PyUnicode_FromObject(str);
703-
if (str == NULL)
704-
return NULL;
705-
v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
706-
PyUnicode_GET_SIZE(str),
707-
0,
708-
0,
709-
errors),
710-
PyUnicode_GET_SIZE(str));
707+
if (str == NULL || PyUnicode_READY(str) < 0)
708+
return NULL;
709+
v = codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
710+
PyUnicode_GET_LENGTH(str));
711711
Py_DECREF(str);
712712
return v;
713713
}
@@ -752,13 +752,10 @@ utf_16_encode(PyObject *self,
752752
return NULL;
753753

754754
str = PyUnicode_FromObject(str);
755-
if (str == NULL)
755+
if (str == NULL || PyUnicode_READY(str) < 0)
756756
return NULL;
757-
v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
758-
PyUnicode_GET_SIZE(str),
759-
errors,
760-
byteorder),
761-
PyUnicode_GET_SIZE(str));
757+
v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
758+
PyUnicode_GET_LENGTH(str));
762759
Py_DECREF(str);
763760
return v;
764761
}
@@ -775,13 +772,10 @@ utf_16_le_encode(PyObject *self,
775772
return NULL;
776773

777774
str = PyUnicode_FromObject(str);
778-
if (str == NULL)
775+
if (str == NULL || PyUnicode_READY(str) < 0)
779776
return NULL;
780-
v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
781-
PyUnicode_GET_SIZE(str),
782-
errors,
783-
-1),
784-
PyUnicode_GET_SIZE(str));
777+
v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
778+
PyUnicode_GET_LENGTH(str));
785779
Py_DECREF(str);
786780
return v;
787781
}
@@ -798,13 +792,10 @@ utf_16_be_encode(PyObject *self,
798792
return NULL;
799793

800794
str = PyUnicode_FromObject(str);
801-
if (str == NULL)
795+
if (str == NULL || PyUnicode_READY(str) < 0)
802796
return NULL;
803-
v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
804-
PyUnicode_GET_SIZE(str),
805-
errors,
806-
+1),
807-
PyUnicode_GET_SIZE(str));
797+
v = codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
798+
PyUnicode_GET_LENGTH(str));
808799
Py_DECREF(str);
809800
return v;
810801
}
@@ -829,13 +820,10 @@ utf_32_encode(PyObject *self,
829820
return NULL;
830821

831822
str = PyUnicode_FromObject(str);
832-
if (str == NULL)
823+
if (str == NULL || PyUnicode_READY(str) < 0)
833824
return NULL;
834-
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
835-
PyUnicode_GET_SIZE(str),
836-
errors,
837-
byteorder),
838-
PyUnicode_GET_SIZE(str));
825+
v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
826+
PyUnicode_GET_LENGTH(str));
839827
Py_DECREF(str);
840828
return v;
841829
}
@@ -852,13 +840,10 @@ utf_32_le_encode(PyObject *self,
852840
return NULL;
853841

854842
str = PyUnicode_FromObject(str);
855-
if (str == NULL)
843+
if (str == NULL || PyUnicode_READY(str) < 0)
856844
return NULL;
857-
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
858-
PyUnicode_GET_SIZE(str),
859-
errors,
860-
-1),
861-
PyUnicode_GET_SIZE(str));
845+
v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
846+
PyUnicode_GET_LENGTH(str));
862847
Py_DECREF(str);
863848
return v;
864849
}
@@ -875,13 +860,10 @@ utf_32_be_encode(PyObject *self,
875860
return NULL;
876861

877862
str = PyUnicode_FromObject(str);
878-
if (str == NULL)
863+
if (str == NULL || PyUnicode_READY(str) < 0)
879864
return NULL;
880-
v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
881-
PyUnicode_GET_SIZE(str),
882-
errors,
883-
+1),
884-
PyUnicode_GET_SIZE(str));
865+
v = codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
866+
PyUnicode_GET_LENGTH(str));
885867
Py_DECREF(str);
886868
return v;
887869
}
@@ -898,11 +880,10 @@ unicode_escape_encode(PyObject *self,
898880
return NULL;
899881

900882
str = PyUnicode_FromObject(str);
901-
if (str == NULL)
883+
if (str == NULL || PyUnicode_READY(str) < 0)
902884
return NULL;
903-
v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
904-
PyUnicode_GET_SIZE(str)),
905-
PyUnicode_GET_SIZE(str));
885+
v = codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
886+
PyUnicode_GET_LENGTH(str));
906887
Py_DECREF(str);
907888
return v;
908889
}
@@ -919,12 +900,10 @@ raw_unicode_escape_encode(PyObject *self,
919900
return NULL;
920901

921902
str = PyUnicode_FromObject(str);
922-
if (str == NULL)
903+
if (str == NULL || PyUnicode_READY(str) < 0)
923904
return NULL;
924-
v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
925-
PyUnicode_AS_UNICODE(str),
926-
PyUnicode_GET_SIZE(str)),
927-
PyUnicode_GET_SIZE(str));
905+
v = codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
906+
PyUnicode_GET_LENGTH(str));
928907
Py_DECREF(str);
929908
return v;
930909
}
@@ -941,13 +920,10 @@ latin_1_encode(PyObject *self,
941920
return NULL;
942921

943922
str = PyUnicode_FromObject(str);
944-
if (str == NULL)
923+
if (str == NULL || PyUnicode_READY(str) < 0)
945924
return NULL;
946-
v = codec_tuple(PyUnicode_EncodeLatin1(
947-
PyUnicode_AS_UNICODE(str),
948-
PyUnicode_GET_SIZE(str),
949-
errors),
950-
PyUnicode_GET_SIZE(str));
925+
v = codec_tuple(_PyUnicode_AsLatin1String(str, errors),
926+
PyUnicode_GET_LENGTH(str));
951927
Py_DECREF(str);
952928
return v;
953929
}
@@ -964,13 +940,10 @@ ascii_encode(PyObject *self,
964940
return NULL;
965941

966942
str = PyUnicode_FromObject(str);
967-
if (str == NULL)
943+
if (str == NULL || PyUnicode_READY(str) < 0)
968944
return NULL;
969-
v = codec_tuple(PyUnicode_EncodeASCII(
970-
PyUnicode_AS_UNICODE(str),
971-
PyUnicode_GET_SIZE(str),
972-
errors),
973-
PyUnicode_GET_SIZE(str));
945+
v = codec_tuple(_PyUnicode_AsASCIIString(str, errors),
946+
PyUnicode_GET_LENGTH(str));
974947
Py_DECREF(str);
975948
return v;
976949
}
@@ -990,10 +963,10 @@ charmap_encode(PyObject *self,
990963
mapping = NULL;
991964

992965
str = PyUnicode_FromObject(str);
993-
if (str == NULL)
966+
if (str == NULL || PyUnicode_READY(str) < 0)
994967
return NULL;
995968
v = codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
996-
PyUnicode_GET_SIZE(str));
969+
PyUnicode_GET_LENGTH(str));
997970
Py_DECREF(str);
998971
return v;
999972
}
@@ -1021,13 +994,10 @@ mbcs_encode(PyObject *self,
1021994
return NULL;
1022995

1023996
str = PyUnicode_FromObject(str);
1024-
if (str == NULL)
997+
if (str == NULL || PyUnicode_READY(str) < 0)
1025998
return NULL;
1026-
v = codec_tuple(PyUnicode_EncodeMBCS(
1027-
PyUnicode_AS_UNICODE(str),
1028-
PyUnicode_GET_SIZE(str),
1029-
errors),
1030-
PyUnicode_GET_SIZE(str));
999+
v = codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
1000+
PyUnicode_GET_LENGTH(str));
10311001
Py_DECREF(str);
10321002
return v;
10331003
}
@@ -1045,7 +1015,7 @@ code_page_encode(PyObject *self,
10451015
return NULL;
10461016

10471017
str = PyUnicode_FromObject(str);
1048-
if (str == NULL)
1018+
if (str == NULL || PyUnicode_READY(str) < 0)
10491019
return NULL;
10501020
v = codec_tuple(PyUnicode_EncodeCodePage(code_page,
10511021
str,

0 commit comments

Comments
 (0)