Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c84f314
gh-119609: Add PyUnicode_Export() function
vstinner May 27, 2024
d0cdbd1
Address reviews
vstinner Sep 5, 2024
9b33dca
Exclude from limited C API 3.13 and older
vstinner Sep 5, 2024
cf1f74a
Replace PyErr_Format() with PyErr_SetString()
vstinner Sep 5, 2024
93d4470
Fix test_collections: implement UserString.__release_buffer__()
vstinner Sep 5, 2024
17ad7b9
Add format parameter to PyUnicode_Export()
vstinner Sep 9, 2024
d683d0a
format must not be NULL
vstinner Sep 9, 2024
78a70fa
Fix memory leak in unicode_releasebuffer()
vstinner Sep 10, 2024
79207f5
Remove PyUnicode_GetBufferFormat() documentation
vstinner Sep 10, 2024
bc0fb69
Apply suggestions from code review
vstinner Sep 10, 2024
2cdbc27
Set format to 0 on error
vstinner Sep 10, 2024
b5be22d
Remove trailing space
vstinner Sep 10, 2024
2960b25
Change constant values
vstinner Sep 10, 2024
bcb41f3
Update constants value in the doc
vstinner Sep 11, 2024
44cb702
Remove unicode_releasebuffer(); use bytes instead
vstinner Sep 12, 2024
1809d8d
PyUnicode_Export() returns the format
vstinner Sep 12, 2024
6707ef4
Fix PyUnicode_Export() signature in doc
vstinner Sep 12, 2024
abf5c58
Use _PyUnicode_EncodeUTF16() and _PyUnicode_EncodeUTF32()
vstinner Sep 12, 2024
033fc07
Use signed int in C tests
vstinner Sep 12, 2024
078dfcf
Update stable_abi: remove PyUnicode_GetBufferFormat()
vstinner Sep 12, 2024
79c6d01
Revert "Use _PyUnicode_EncodeUTF16() and _PyUnicode_EncodeUTF32()"
vstinner Sep 12, 2024
5479ab2
Allow surrogate characters in UTF-8
vstinner Sep 12, 2024
ab2f9b0
Merge branch 'main' into unicode_view
vstinner Sep 13, 2024
f71f230
Avoid a second copy in the UTF-8 export
vstinner Sep 13, 2024
492f10a
UCS-4 export: remove one memory copy
vstinner Sep 13, 2024
b031163
Update Py_buffer format
vstinner Sep 16, 2024
21e6012
Add PyUnicode_EXPORT_COPY flag
vstinner Sep 23, 2024
3267ce6
doc
vstinner Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
PyUnicode_Export() returns the format
Use signed int32_t for the format.
  • Loading branch information
vstinner committed Sep 12, 2024
commit 1809d8d1eecc3cb6f2035ebc50a4640d04cb36b7
11 changes: 6 additions & 5 deletions Doc/c-api/unicode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -341,12 +341,12 @@ APIs:
.. versionadded:: 3.3


.. c:function:: int PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_buffer *view, uint32_t *format)
.. c:function:: int PyUnicode_Export(PyObject *unicode, int32_t requested_formats, Py_buffer *view)
Comment thread
vstinner marked this conversation as resolved.
Outdated

Export the contents of the *unicode* string in one of the *requested_formats*.

* On success, fill *view*, set *\*format*, and return ``0``.
* On error, set an exception, set *\*format* to 0, and return ``-1``.
* On success, fill *view*, and return a format (greater than ``0``).
* On error, set an exception, and return ``-1``.
*view* is left unchanged.

After a successful call to :c:func:`PyUnicode_Export`,
Expand Down Expand Up @@ -375,14 +375,15 @@ APIs:

*requested_formats* can be a single format or a bitwise combination of the
formats in the table above.
On success, *\*format* will be set to a single one of the requested flags.
On success, the returned format will be set to a single one of the requested
flags.

Note that future versions of Python may introduce additional formats.

.. versionadded:: 3.14


.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, int32_t format)

Create a Unicode string object from a buffer in a supported format.

Expand Down
9 changes: 4 additions & 5 deletions Include/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,15 +255,14 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
#define PyUnicode_FORMAT_UTF8 0x08 // char*
#define PyUnicode_FORMAT_ASCII 0x10 // char* (ASCII string)

PyAPI_FUNC(int) PyUnicode_Export(
PyAPI_FUNC(int32_t) PyUnicode_Export(
PyObject *unicode,
uint32_t requested_formats,
Py_buffer *view,
uint32_t *format);
int32_t requested_formats,
Py_buffer *view);
PyAPI_FUNC(PyObject*) PyUnicode_Import(
const void *data,
Py_ssize_t nbytes,
uint32_t format);
int32_t format);
#endif

/* --- wchar_t support for platforms which support it --------------------- */
Expand Down
7 changes: 3 additions & 4 deletions Modules/_testlimitedcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1849,9 +1849,8 @@ unicode_export(PyObject *self, PyObject *args)
}

Py_buffer view;
uint32_t format = (uint32_t)UNINITIALIZED_INT;
if (PyUnicode_Export(obj, requested_formats, &view, &format) < 0) {
assert(format == 0);
int32_t format = PyUnicode_Export(obj, requested_formats, &view);
if (format < 0) {
return NULL;
}

Expand Down Expand Up @@ -1899,7 +1898,7 @@ unicode_import(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) {
return NULL;
}
return PyUnicode_Import(data, nbytes, format);
return PyUnicode_Import(data, nbytes, (int32_t)format);
}


Expand Down
54 changes: 24 additions & 30 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2332,27 +2332,25 @@ PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
}


static int
unicode_export(PyObject *obj, Py_buffer *view, uint32_t *pformat,
static int32_t
unicode_export(PyObject *obj, Py_buffer *view,
Py_ssize_t len, const void *buf,
int itemsize, const char *format, uint32_t internal_format)
int itemsize, const char *format, int32_t internal_format)
{
if (PyBuffer_FillInfo(view, obj, (void*)buf, len,
1, PyBUF_SIMPLE) < 0) {
*pformat = 0;
return -1;
}
view->itemsize = itemsize;
view->format = (char*)format;
view->internal = (void*)(uintptr_t)internal_format;
Comment thread
vstinner marked this conversation as resolved.
Outdated
*pformat = internal_format;
return 0;
return internal_format;
}


int
PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
Py_buffer *view, uint32_t *format)
int32_t
PyUnicode_Export(PyObject *unicode, int32_t requested_formats,
Py_buffer *view)
{
#if SIZEOF_INT == 4
# define BUFFER_UCS4 "I"
Expand All @@ -2364,15 +2362,15 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,

if (!PyUnicode_Check(unicode)) {
PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
goto error;
return -1;
}
Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);

// Native ASCII
if (PyUnicode_IS_ASCII(unicode)
&& (requested_formats & PyUnicode_FORMAT_ASCII))
{
return unicode_export(unicode, view, format,
return unicode_export(unicode, view,
len, PyUnicode_1BYTE_DATA(unicode),
1, "B", PyUnicode_FORMAT_ASCII);
}
Expand All @@ -2382,7 +2380,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
if (kind == PyUnicode_1BYTE_KIND
&& (requested_formats & PyUnicode_FORMAT_UCS1))
{
return unicode_export(unicode, view, format,
return unicode_export(unicode, view,
len, PyUnicode_1BYTE_DATA(unicode),
1, "B", PyUnicode_FORMAT_UCS1);
}
Expand All @@ -2391,7 +2389,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
if (kind == PyUnicode_2BYTE_KIND
&& (requested_formats & PyUnicode_FORMAT_UCS2))
{
return unicode_export(unicode, view, format,
return unicode_export(unicode, view,
len, PyUnicode_2BYTE_DATA(unicode),
2, "H", PyUnicode_FORMAT_UCS2);
}
Expand All @@ -2402,7 +2400,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
{
PyObject *bytes = PyBytes_FromStringAndSize(NULL, (len + 1) * 2);
Comment thread
vstinner marked this conversation as resolved.
if (!bytes) {
goto error;
return -1;
}
Py_UCS2 *ucs2 = (Py_UCS2*)PyBytes_AS_STRING(bytes);

Expand All @@ -2412,9 +2410,9 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
ucs2);
ucs2[len] = 0;

int res = unicode_export(bytes, view, format,
len, ucs2,
2, "H", PyUnicode_FORMAT_UCS2);
int32_t res = unicode_export(bytes, view,
len, ucs2,
2, "H", PyUnicode_FORMAT_UCS2);
Py_DECREF(bytes);
return res;
}
Expand All @@ -2423,7 +2421,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
if (kind == PyUnicode_4BYTE_KIND
&& (requested_formats & PyUnicode_FORMAT_UCS4))
{
return unicode_export(unicode, view, format,
return unicode_export(unicode, view,
len, PyUnicode_4BYTE_DATA(unicode),
4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
}
Expand All @@ -2432,19 +2430,19 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
if (requested_formats & PyUnicode_FORMAT_UCS4) {
Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
Comment thread
vstinner marked this conversation as resolved.
Outdated
if (ucs4 == NULL) {
goto error;
return -1;
}

PyObject *bytes = PyBytes_FromStringAndSize((char*)ucs4, (len + 1) * 4);
PyMem_Free(ucs4);
if (bytes == NULL) {
goto error;
return -1;
}
ucs4 = (Py_UCS4*)PyBytes_AS_STRING(bytes);

int res = unicode_export(bytes, view, format,
len, ucs4,
4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
int32_t res = unicode_export(bytes, view,
len, ucs4,
4, BUFFER_UCS4, PyUnicode_FORMAT_UCS4);
Py_DECREF(bytes);
return res;
}
Expand All @@ -2454,19 +2452,15 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,
Py_ssize_t nbytes;
const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &nbytes);
if (utf8 == NULL) {
goto error;
return -1;
}
return unicode_export(unicode, view, format,
return unicode_export(unicode, view,
nbytes, utf8,
1, "B", PyUnicode_FORMAT_UTF8);
}

PyErr_SetString(PyExc_ValueError,
"unable to find a matching export format");
goto error;

error:
*format = 0;
return -1;

#undef BUFFER_UCS4
Expand All @@ -2475,7 +2469,7 @@ PyUnicode_Export(PyObject *unicode, uint32_t requested_formats,

PyObject*
PyUnicode_Import(const void *data, Py_ssize_t nbytes,
uint32_t format)
int32_t format)
{
if (nbytes < 0) {
PyErr_SetString(PyExc_ValueError, "Negative nbytes");
Expand Down