@@ -145,6 +145,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
145145# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
146146# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
147147# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
148+ # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
148149# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
149150# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
150151# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
@@ -159,6 +160,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
159160# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
160161# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
161162# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
163+ # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
164+ # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
162165# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
163166# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
164167# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
@@ -170,6 +173,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
170173# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
171174# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
172175# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
176+ # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
173177# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
174178# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
175179# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
@@ -223,6 +227,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
223227# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
224228# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
225229# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
230+ # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
226231# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
227232# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
228233# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
@@ -237,6 +242,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
237242# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
238243# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
239244# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
245+ # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
246+ # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
240247# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
241248# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
242249# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
@@ -248,6 +255,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
248255# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
249256# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
250257# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
258+ # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
251259# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
252260# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
253261# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
@@ -701,6 +709,80 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
701709 const char * errors /* error handling */
702710 );
703711
712+ /* --- UTF-32 Codecs ------------------------------------------------------ */
713+
714+ /* Decodes length bytes from a UTF-32 encoded buffer string and returns
715+ the corresponding Unicode object.
716+
717+ errors (if non-NULL) defines the error handling. It defaults
718+ to "strict".
719+
720+ If byteorder is non-NULL, the decoder starts decoding using the
721+ given byte order:
722+
723+ *byteorder == -1: little endian
724+ *byteorder == 0: native order
725+ *byteorder == 1: big endian
726+
727+ In native mode, the first four bytes of the stream are checked for a
728+ BOM mark. If found, the BOM mark is analysed, the byte order
729+ adjusted and the BOM skipped. In the other modes, no BOM mark
730+ interpretation is done. After completion, *byteorder is set to the
731+ current byte order at the end of input data.
732+
733+ If byteorder is NULL, the codec starts in native order mode.
734+
735+ */
736+
737+ PyAPI_FUNC (PyObject * ) PyUnicode_DecodeUTF32 (
738+ const char * string , /* UTF-32 encoded string */
739+ Py_ssize_t length , /* size of string */
740+ const char * errors , /* error handling */
741+ int * byteorder /* pointer to byteorder to use
742+ 0=native;-1=LE,1=BE; updated on
743+ exit */
744+ );
745+
746+ PyAPI_FUNC (PyObject * ) PyUnicode_DecodeUTF32Stateful (
747+ const char * string , /* UTF-32 encoded string */
748+ Py_ssize_t length , /* size of string */
749+ const char * errors , /* error handling */
750+ int * byteorder , /* pointer to byteorder to use
751+ 0=native;-1=LE,1=BE; updated on
752+ exit */
753+ Py_ssize_t * consumed /* bytes consumed */
754+ );
755+
756+ /* Returns a Python string using the UTF-32 encoding in native byte
757+ order. The string always starts with a BOM mark. */
758+
759+ PyAPI_FUNC (PyObject * ) PyUnicode_AsUTF32String (
760+ PyObject * unicode /* Unicode object */
761+ );
762+
763+ /* Returns a Python string object holding the UTF-32 encoded value of
764+ the Unicode data.
765+
766+ If byteorder is not 0, output is written according to the following
767+ byte order:
768+
769+ byteorder == -1: little endian
770+ byteorder == 0: native byte order (writes a BOM mark)
771+ byteorder == 1: big endian
772+
773+ If byteorder is 0, the output string will always start with the
774+ Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
775+ prepended.
776+
777+ */
778+
779+ PyAPI_FUNC (PyObject * ) PyUnicode_EncodeUTF32 (
780+ const Py_UNICODE * data , /* Unicode char buffer */
781+ Py_ssize_t length , /* number of Py_UNICODE chars to encode */
782+ const char * errors , /* error handling */
783+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
784+ );
785+
704786/* --- UTF-16 Codecs ------------------------------------------------------ */
705787
706788/* Decodes length bytes from a UTF-16 encoded buffer string and returns
0 commit comments