Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion Doc/c-api/sys.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,18 @@ Operating System Utilities
surrogate character, escape the bytes using the surrogateescape error
handler instead of decoding them.

Encoding, highest priority to lowest priority:

* ``UTF-8`` on macOS and Android;
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
and :c:func:`mbstowcs` and :c:func:`wcstombs` functions use the
``ISO-8859-1`` encoding.
* the current locale encoding (``LC_CTYPE`` locale).

Return a pointer to a newly allocated wide character string, use
:c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
the number of wide characters excluding the null character into ``*size``
the number of wide characters excluding the null character into ``*size``.

Return ``NULL`` on decoding error or memory allocation error. If *size* is
not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to
Expand All @@ -94,6 +103,15 @@ Operating System Utilities
:ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.

Encoding, highest priority to lowest priority:

* ``UTF-8`` on macOS and Android;
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
and :c:func:`mbstowcs` and :c:func:`wcstombs` functions uses the
``ISO-8859-1`` encoding.
* the current locale encoding.

Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
to free the memory. Return ``NULL`` on encoding error or memory allocation
error
Expand Down
12 changes: 12 additions & 0 deletions Doc/c-api/unicode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,12 @@ system.

.. versionadded:: 3.3

.. versionchanged:: 3.6.5
The function now also uses the current locale encoding for the
``surrogateescape`` error handler. Previously, :c:func:`Py_DecodeLocale`
was used for the ``surrogateescape``, and the current locale encoding was
used for ``strict``.


.. c:function:: PyObject* PyUnicode_DecodeLocale(const char *str, const char *errors)

Expand Down Expand Up @@ -800,6 +806,12 @@ system.

.. versionadded:: 3.3

.. versionchanged:: 3.6.5
The function now also uses the current locale encoding for the
``surrogateescape`` error handler. Previously, :c:func:`Py_EncodeLocale`
was used for the ``surrogateescape``, and the current locale encoding was
used for ``strict``.


File System Encoding
""""""""""""""""""""
Expand Down
10 changes: 10 additions & 0 deletions Include/fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ PyAPI_FUNC(char*) Py_EncodeLocale(

#ifndef Py_LIMITED_API

PyAPI_FUNC(wchar_t *) _Py_DecodeLocaleEx(
const char *arg,
size_t *size,
int current_locale);

PyAPI_FUNC(char*) _Py_EncodeLocaleEx(
const wchar_t *text,
size_t *error_pos,
int current_locale);

PyAPI_FUNC(PyObject *) _Py_device_encoding(int);

#ifdef MS_WINDOWS
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
On FreeBSD and Solaris, os.strerror() now always decode the byte string from
the current locale encoding, rather than using ASCII/surrogateescape in some
cases.
36 changes: 26 additions & 10 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3439,8 +3439,9 @@ locale_error_handler(const char *errors, int *surrogateescape)
}
}

PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
static PyObject *
unicode_encode_locale(PyObject *unicode, const char *errors,
int current_locale)
{
Py_ssize_t wlen, wlen2;
wchar_t *wstr;
Expand Down Expand Up @@ -3469,7 +3470,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
/* "surrogateescape" error handler */
char *str;

str = Py_EncodeLocale(wstr, &error_pos);
str = _Py_EncodeLocaleEx(wstr, &error_pos, current_locale);
if (str == NULL) {
if (error_pos == (size_t)-1) {
PyErr_NoMemory();
Expand Down Expand Up @@ -3549,6 +3550,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
return NULL;
}

PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
return unicode_encode_locale(unicode, errors, 1);
}

PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
Expand All @@ -3571,7 +3578,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
Py_FileSystemDefaultEncodeErrors);
}
else {
return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors);
return unicode_encode_locale(unicode,
Py_FileSystemDefaultEncodeErrors, 0);
}
#endif
}
Expand Down Expand Up @@ -3741,9 +3749,9 @@ mbstowcs_errorpos(const char *str, size_t len)
return 0;
}

PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
static PyObject*
unicode_decode_locale(const char *str, Py_ssize_t len,
const char *errors, int current_locale)
{
wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
Expand All @@ -3766,7 +3774,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,

if (surrogateescape) {
/* "surrogateescape" error handler */
wstr = Py_DecodeLocale(str, &wlen);
wstr = _Py_DecodeLocaleEx(str, &wlen, current_locale);
if (wstr == NULL) {
if (wlen == (size_t)-1)
PyErr_NoMemory();
Expand Down Expand Up @@ -3844,11 +3852,18 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
return NULL;
}

PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t size,
const char *errors)
{
return unicode_decode_locale(str, size, errors, 1);
}

PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
return unicode_decode_locale(str, size, errors, 1);
}


Expand Down Expand Up @@ -3880,7 +3895,8 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
Py_FileSystemDefaultEncodeErrors);
}
else {
return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncodeErrors);
return unicode_decode_locale(s, size,
Py_FileSystemDefaultEncodeErrors, 0);
}
#endif
}
Expand Down
Loading