Skip to content

Commit 2660e42

Browse files
committed
(Merge 3.2) Issue #16416: On Mac OS X, operating system data are now always
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no locale environment variable is set), to avoid inconsistencies with os.fsencode() and os.fsdecode() functions which are already using UTF-8/surrogateescape.
2 parents a2816c2 + 27b1ca2 commit 2660e42

4 files changed

Lines changed: 65 additions & 18 deletions

File tree

Misc/NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ What's New in Python 3.3.1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #16416: On Mac OS X, operating system data are now always
16+
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
17+
(which may be ASCII if no locale environment variable is set), to avoid
18+
inconsistencies with os.fsencode() and os.fsdecode() functions which are
19+
already using UTF-8/surrogateescape.
20+
1521
- Issue #16588: Silence unused-but-set warnings in Python/thread_pthread
1622

1723
- Issue #16546: Fix: ast.YieldFrom argument is now mandatory.

Modules/python.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ wmain(int argc, wchar_t **argv)
1515
}
1616
#else
1717

18-
#ifdef __APPLE__
19-
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
20-
#endif
21-
2218
int
2319
main(int argc, char **argv)
2420
{
@@ -45,11 +41,7 @@ main(int argc, char **argv)
4541
oldloc = strdup(setlocale(LC_ALL, NULL));
4642
setlocale(LC_ALL, "");
4743
for (i = 0; i < argc; i++) {
48-
#ifdef __APPLE__
49-
argv_copy[i] = _Py_DecodeUTF8_surrogateescape(argv[i], strlen(argv[i]));
50-
#else
5144
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
52-
#endif
5345
if (!argv_copy[i]) {
5446
free(oldloc);
5547
fprintf(stderr, "Fatal Python error: "

Objects/unicodeobject.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4809,7 +4809,10 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
48094809
#ifdef __APPLE__
48104810

48114811
/* Simplified UTF-8 decoder using surrogateescape error handler,
4812-
used to decode the command line arguments on Mac OS X. */
4812+
used to decode the command line arguments on Mac OS X.
4813+
4814+
Return a pointer to a newly allocated wide character string (use
4815+
PyMem_Free() to free the memory), or NULL on memory allocation error. */
48134816

48144817
wchar_t*
48154818
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@@ -4820,10 +4823,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
48204823

48214824
/* Note: size will always be longer than the resulting Unicode
48224825
character count */
4823-
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
4824-
PyErr_NoMemory();
4826+
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
48254827
return NULL;
4826-
}
48274828
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
48284829
if (!unicode)
48294830
return NULL;

Python/fileutils.c

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#include <langinfo.h>
99
#endif
1010

11+
#ifdef __APPLE__
12+
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
13+
#endif
14+
1115
PyObject *
1216
_Py_device_encoding(int fd)
1317
{
@@ -60,6 +64,17 @@ _Py_device_encoding(int fd)
6064
wchar_t*
6165
_Py_char2wchar(const char* arg, size_t *size)
6266
{
67+
#ifdef __APPLE__
68+
wchar_t *wstr;
69+
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
70+
if (size != NULL) {
71+
if (wstr != NULL)
72+
*size = wcslen(wstr);
73+
else
74+
*size = (size_t)-1;
75+
}
76+
return wstr;
77+
#else
6378
wchar_t *res;
6479
#ifdef HAVE_BROKEN_MBSTOWCS
6580
/* Some platforms have a broken implementation of
@@ -145,7 +160,7 @@ _Py_char2wchar(const char* arg, size_t *size)
145160
argsize -= converted;
146161
out++;
147162
}
148-
#else
163+
#else /* HAVE_MBRTOWC */
149164
/* Cannot use C locale for escaping; manually escape as if charset
150165
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
151166
correctly in the locale's charset, which must be an ASCII superset. */
@@ -160,14 +175,15 @@ _Py_char2wchar(const char* arg, size_t *size)
160175
else
161176
*out++ = 0xdc00 + *in++;
162177
*out = 0;
163-
#endif
178+
#endif /* HAVE_MBRTOWC */
164179
if (size != NULL)
165180
*size = out - res;
166181
return res;
167182
oom:
168183
if (size != NULL)
169184
*size = (size_t)-1;
170185
return NULL;
186+
#endif /* __APPLE__ */
171187
}
172188

173189
/* Encode a (wide) character string to the locale encoding with the
@@ -184,14 +200,42 @@ _Py_char2wchar(const char* arg, size_t *size)
184200
char*
185201
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
186202
{
203+
#ifdef __APPLE__
204+
Py_ssize_t len;
205+
PyObject *unicode, *bytes = NULL;
206+
char *cpath;
207+
208+
unicode = PyUnicode_FromWideChar(text, wcslen(text));
209+
if (unicode == NULL)
210+
return NULL;
211+
212+
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
213+
Py_DECREF(unicode);
214+
if (bytes == NULL) {
215+
PyErr_Clear();
216+
if (error_pos != NULL)
217+
*error_pos = (size_t)-1;
218+
return NULL;
219+
}
220+
221+
len = PyBytes_GET_SIZE(bytes);
222+
cpath = PyMem_Malloc(len+1);
223+
if (cpath == NULL) {
224+
PyErr_Clear();
225+
Py_DECREF(bytes);
226+
if (error_pos != NULL)
227+
*error_pos = (size_t)-1;
228+
return NULL;
229+
}
230+
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
231+
Py_DECREF(bytes);
232+
return cpath;
233+
#else /* __APPLE__ */
187234
const size_t len = wcslen(text);
188235
char *result = NULL, *bytes = NULL;
189236
size_t i, size, converted;
190237
wchar_t c, buf[2];
191238

192-
if (error_pos != NULL)
193-
*error_pos = (size_t)-1;
194-
195239
/* The function works in two steps:
196240
1. compute the length of the output buffer in bytes (size)
197241
2. outputs the bytes */
@@ -238,11 +282,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
238282

239283
size += 1; /* nul byte at the end */
240284
result = PyMem_Malloc(size);
241-
if (result == NULL)
285+
if (result == NULL) {
286+
if (error_pos != NULL)
287+
*error_pos = (size_t)-1;
242288
return NULL;
289+
}
243290
bytes = result;
244291
}
245292
return result;
293+
#endif /* __APPLE__ */
246294
}
247295

248296
/* In principle, this should use HAVE__WSTAT, and _wstat

0 commit comments

Comments
 (0)