Skip to content

Commit 3c9ce74

Browse files
Issue #23908: os functions, open() and the io.FileIO constructor now reject
unicode paths with embedded null character on Windows instead of silently truncate them.
1 parent 3626e5e commit 3c9ce74

9 files changed

Lines changed: 125 additions & 57 deletions

File tree

Lib/test/test_fileio.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,11 @@ def testBytesOpen(self):
361361
finally:
362362
os.unlink(TESTFN)
363363

364+
def testConstructorHandlesNULChars(self):
365+
fn_with_NUL = 'foo\0bar'
366+
self.assertRaises(TypeError, _FileIO, fn_with_NUL, 'w')
367+
self.assertRaises(TypeError, _FileIO, fn_with_NUL.encode('ascii'), 'w')
368+
364369
def testInvalidFd(self):
365370
self.assertRaises(ValueError, _FileIO, -10)
366371
self.assertRaises(OSError, _FileIO, make_bad_fd())

Lib/test/test_getargs2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ def test_et_hash(self):
779779
def test_u(self):
780780
from _testcapi import getargs_u
781781
self.assertEqual(getargs_u(u'abc\xe9'), u'abc\xe9')
782-
self.assertEqual(getargs_u(u'nul:\0'), u'nul:')
782+
self.assertRaises(TypeError, getargs_u, u'nul:\0')
783783
self.assertRaises(TypeError, getargs_u, 'bytes')
784784
self.assertRaises(TypeError, getargs_u, bytearray('bytearray'))
785785
self.assertRaises(TypeError, getargs_u, memoryview('memoryview'))

Lib/test/test_io.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,15 @@ def test_invalid_operations(self):
349349
self.assertRaises(IOError, fp.write, "blah")
350350
self.assertRaises(IOError, fp.writelines, ["blah\n"])
351351

352+
def test_open_handles_NUL_chars(self):
353+
fn_with_NUL = 'foo\0bar'
354+
self.assertRaises(TypeError, self.open, fn_with_NUL, 'w')
355+
356+
bytes_fn = fn_with_NUL.encode('ascii')
357+
with warnings.catch_warnings():
358+
warnings.simplefilter("ignore", DeprecationWarning)
359+
self.assertRaises(TypeError, self.open, bytes_fn, 'w')
360+
352361
def test_raw_file_io(self):
353362
with self.open(support.TESTFN, "wb", buffering=0) as f:
354363
self.assertEqual(f.readable(), False)

Lib/test/test_posix.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,44 @@ def test_getgroups(self):
577577
set([int(x) for x in groups.split()]),
578578
set(posix.getgroups() + [posix.getegid()]))
579579

580+
@test_support.requires_unicode
581+
def test_path_with_null_unicode(self):
582+
fn = test_support.TESTFN_UNICODE
583+
fn_with_NUL = fn + u'\0'
584+
self.addCleanup(test_support.unlink, fn)
585+
test_support.unlink(fn)
586+
fd = None
587+
try:
588+
with self.assertRaises(TypeError):
589+
fd = os.open(fn_with_NUL, os.O_WRONLY | os.O_CREAT) # raises
590+
finally:
591+
if fd is not None:
592+
os.close(fd)
593+
self.assertFalse(os.path.exists(fn))
594+
self.assertRaises(TypeError, os.mkdir, fn_with_NUL)
595+
self.assertFalse(os.path.exists(fn))
596+
open(fn, 'wb').close()
597+
self.assertRaises(TypeError, os.stat, fn_with_NUL)
598+
599+
def test_path_with_null_byte(self):
600+
fn = test_support.TESTFN
601+
fn_with_NUL = fn + '\0'
602+
self.addCleanup(test_support.unlink, fn)
603+
test_support.unlink(fn)
604+
fd = None
605+
try:
606+
with self.assertRaises(TypeError):
607+
fd = os.open(fn_with_NUL, os.O_WRONLY | os.O_CREAT) # raises
608+
finally:
609+
if fd is not None:
610+
os.close(fd)
611+
self.assertFalse(os.path.exists(fn))
612+
self.assertRaises(TypeError, os.mkdir, fn_with_NUL)
613+
self.assertFalse(os.path.exists(fn))
614+
open(fn, 'wb').close()
615+
self.assertRaises(TypeError, os.stat, fn_with_NUL)
616+
617+
580618
class PosixGroupsTester(unittest.TestCase):
581619

582620
def setUp(self):

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ What's New in Python 2.7.13?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #23908: os functions, open() and the io.FileIO constructor now reject
14+
unicode paths with embedded null character on Windows instead of silently
15+
truncate them.
16+
1317
Library
1418
-------
1519

Modules/_io/fileio.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,13 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
224224
}
225225

226226
#ifdef MS_WINDOWS
227-
if (PyUnicode_Check(nameobj))
227+
if (PyUnicode_Check(nameobj)) {
228228
widename = PyUnicode_AS_UNICODE(nameobj);
229+
if (wcslen(widename) != (size_t)PyUnicode_GET_SIZE(nameobj)) {
230+
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
231+
return -1;
232+
}
233+
}
229234
if (widename == NULL)
230235
#endif
231236
if (fd < 0)
@@ -234,6 +239,10 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
234239
Py_ssize_t namelen;
235240
if (PyObject_AsCharBuffer(nameobj, &name, &namelen) < 0)
236241
return -1;
242+
if (strlen(name) != (size_t)namelen) {
243+
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
244+
return -1;
245+
}
237246
}
238247
else {
239248
PyObject *u = PyUnicode_FromObject(nameobj);
@@ -252,6 +261,10 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
252261
goto error;
253262
}
254263
name = PyBytes_AS_STRING(stringobj);
264+
if (strlen(name) != (size_t)PyBytes_GET_SIZE(stringobj)) {
265+
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
266+
goto error;
267+
}
255268
}
256269
}
257270

Modules/posixmodule.c

Lines changed: 34 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,13 +1646,9 @@ posix_do_stat(PyObject *self, PyObject *args,
16461646
PyObject *result;
16471647

16481648
#ifdef MS_WINDOWS
1649-
PyUnicodeObject *po;
1650-
if (PyArg_ParseTuple(args, wformat, &po)) {
1651-
Py_UNICODE *wpath = PyUnicode_AS_UNICODE(po);
1652-
1649+
Py_UNICODE *wpath;
1650+
if (PyArg_ParseTuple(args, wformat, &wpath)) {
16531651
Py_BEGIN_ALLOW_THREADS
1654-
/* PyUnicode_AS_UNICODE result OK without
1655-
thread lock as it is a simple dereference. */
16561652
res = wstatfunc(wpath, &st);
16571653
Py_END_ALLOW_THREADS
16581654

@@ -1706,12 +1702,10 @@ posix_access(PyObject *self, PyObject *args)
17061702

17071703
#ifdef MS_WINDOWS
17081704
DWORD attr;
1709-
PyUnicodeObject *po;
1710-
if (PyArg_ParseTuple(args, "Ui:access", &po, &mode)) {
1705+
Py_UNICODE *wpath;
1706+
if (PyArg_ParseTuple(args, "ui:access", &wpath, &mode)) {
17111707
Py_BEGIN_ALLOW_THREADS
1712-
/* PyUnicode_AS_UNICODE OK without thread lock as
1713-
it is a simple dereference. */
1714-
attr = GetFileAttributesW(PyUnicode_AS_UNICODE(po));
1708+
attr = GetFileAttributesW(wpath);
17151709
Py_END_ALLOW_THREADS
17161710
goto finish;
17171711
}
@@ -1858,23 +1852,22 @@ posix_chmod(PyObject *self, PyObject *args)
18581852
int res;
18591853
#ifdef MS_WINDOWS
18601854
DWORD attr;
1861-
PyUnicodeObject *po;
1862-
if (PyArg_ParseTuple(args, "Ui|:chmod", &po, &i)) {
1855+
Py_UNICODE *wpath;
1856+
if (PyArg_ParseTuple(args, "ui|:chmod", &wpath, &i)) {
18631857
Py_BEGIN_ALLOW_THREADS
1864-
attr = GetFileAttributesW(PyUnicode_AS_UNICODE(po));
1858+
attr = GetFileAttributesW(wpath);
18651859
if (attr != 0xFFFFFFFF) {
18661860
if (i & _S_IWRITE)
18671861
attr &= ~FILE_ATTRIBUTE_READONLY;
18681862
else
18691863
attr |= FILE_ATTRIBUTE_READONLY;
1870-
res = SetFileAttributesW(PyUnicode_AS_UNICODE(po), attr);
1864+
res = SetFileAttributesW(wpath, attr);
18711865
}
18721866
else
18731867
res = 0;
18741868
Py_END_ALLOW_THREADS
18751869
if (!res)
1876-
return win32_error_unicode("chmod",
1877-
PyUnicode_AS_UNICODE(po));
1870+
return win32_error_unicode("chmod", wpath);
18781871
Py_INCREF(Py_None);
18791872
return Py_None;
18801873
}
@@ -2300,18 +2293,18 @@ posix_listdir(PyObject *self, PyObject *args)
23002293
char *bufptr = namebuf;
23012294
Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */
23022295

2303-
PyObject *po;
2304-
if (PyArg_ParseTuple(args, "U:listdir", &po)) {
2296+
Py_UNICODE *wpath;
2297+
if (PyArg_ParseTuple(args, "u:listdir", &wpath)) {
23052298
WIN32_FIND_DATAW wFileData;
23062299
Py_UNICODE *wnamebuf;
23072300
/* Overallocate for \\*.*\0 */
2308-
len = PyUnicode_GET_SIZE(po);
2301+
len = wcslen(wpath);
23092302
wnamebuf = malloc((len + 5) * sizeof(wchar_t));
23102303
if (!wnamebuf) {
23112304
PyErr_NoMemory();
23122305
return NULL;
23132306
}
2314-
wcscpy(wnamebuf, PyUnicode_AS_UNICODE(po));
2307+
wcscpy(wnamebuf, wpath);
23152308
if (len > 0) {
23162309
Py_UNICODE wch = wnamebuf[len-1];
23172310
if (wch != L'/' && wch != L'\\' && wch != L':')
@@ -2615,9 +2608,8 @@ posix__getfullpathname(PyObject *self, PyObject *args)
26152608
char outbuf[MAX_PATH*2];
26162609
char *temp;
26172610

2618-
PyUnicodeObject *po;
2619-
if (PyArg_ParseTuple(args, "U|:_getfullpathname", &po)) {
2620-
Py_UNICODE *wpath = PyUnicode_AS_UNICODE(po);
2611+
Py_UNICODE *wpath;
2612+
if (PyArg_ParseTuple(args, "u|:_getfullpathname", &wpath)) {
26212613
Py_UNICODE woutbuf[MAX_PATH*2], *woutbufp = woutbuf;
26222614
Py_UNICODE *wtemp;
26232615
DWORD result;
@@ -2670,15 +2662,13 @@ posix_mkdir(PyObject *self, PyObject *args)
26702662
int mode = 0777;
26712663

26722664
#ifdef MS_WINDOWS
2673-
PyUnicodeObject *po;
2674-
if (PyArg_ParseTuple(args, "U|i:mkdir", &po, &mode)) {
2665+
Py_UNICODE *wpath;
2666+
if (PyArg_ParseTuple(args, "u|i:mkdir", &wpath, &mode)) {
26752667
Py_BEGIN_ALLOW_THREADS
2676-
/* PyUnicode_AS_UNICODE OK without thread lock as
2677-
it is a simple dereference. */
2678-
res = CreateDirectoryW(PyUnicode_AS_UNICODE(po), NULL);
2668+
res = CreateDirectoryW(wpath, NULL);
26792669
Py_END_ALLOW_THREADS
26802670
if (!res)
2681-
return win32_error_unicode("mkdir", PyUnicode_AS_UNICODE(po));
2671+
return win32_error_unicode("mkdir", wpath);
26822672
Py_INCREF(Py_None);
26832673
return Py_None;
26842674
}
@@ -2689,8 +2679,6 @@ posix_mkdir(PyObject *self, PyObject *args)
26892679
Py_FileSystemDefaultEncoding, &path, &mode))
26902680
return NULL;
26912681
Py_BEGIN_ALLOW_THREADS
2692-
/* PyUnicode_AS_UNICODE OK without thread lock as
2693-
it is a simple dereference. */
26942682
res = CreateDirectoryA(path, NULL);
26952683
Py_END_ALLOW_THREADS
26962684
if (!res) {
@@ -2833,7 +2821,7 @@ static PyObject *
28332821
posix_stat(PyObject *self, PyObject *args)
28342822
{
28352823
#ifdef MS_WINDOWS
2836-
return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
2824+
return posix_do_stat(self, args, "et:stat", STAT, "u:stat", win32_wstat);
28372825
#else
28382826
return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
28392827
#endif
@@ -2969,7 +2957,6 @@ posix_utime(PyObject *self, PyObject *args)
29692957
{
29702958
#ifdef MS_WINDOWS
29712959
PyObject *arg;
2972-
PyUnicodeObject *obwpath;
29732960
wchar_t *wpath = NULL;
29742961
char *apath = NULL;
29752962
HANDLE hFile;
@@ -2978,8 +2965,7 @@ posix_utime(PyObject *self, PyObject *args)
29782965
FILETIME atime, mtime;
29792966
PyObject *result = NULL;
29802967

2981-
if (PyArg_ParseTuple(args, "UO|:utime", &obwpath, &arg)) {
2982-
wpath = PyUnicode_AS_UNICODE(obwpath);
2968+
if (PyArg_ParseTuple(args, "uO|:utime", &wpath, &arg)) {
29832969
Py_BEGIN_ALLOW_THREADS
29842970
hFile = CreateFileW(wpath, FILE_WRITE_ATTRIBUTES, 0,
29852971
NULL, OPEN_EXISTING,
@@ -4440,14 +4426,11 @@ PyDoc_STRVAR(posix__isdir__doc__,
44404426
static PyObject *
44414427
posix__isdir(PyObject *self, PyObject *args)
44424428
{
4443-
PyObject *opath;
44444429
char *path;
4445-
PyUnicodeObject *po;
4430+
Py_UNICODE *wpath;
44464431
DWORD attributes;
44474432

4448-
if (PyArg_ParseTuple(args, "U|:_isdir", &po)) {
4449-
Py_UNICODE *wpath = PyUnicode_AS_UNICODE(po);
4450-
4433+
if (PyArg_ParseTuple(args, "u|:_isdir", &wpath)) {
44514434
attributes = GetFileAttributesW(wpath);
44524435
if (attributes == INVALID_FILE_ATTRIBUTES)
44534436
Py_RETURN_FALSE;
@@ -6326,7 +6309,7 @@ posix_lstat(PyObject *self, PyObject *args)
63266309
return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
63276310
#else /* !HAVE_LSTAT */
63286311
#ifdef MS_WINDOWS
6329-
return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
6312+
return posix_do_stat(self, args, "et:lstat", STAT, "u:lstat", win32_wstat);
63306313
#else
63316314
return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
63326315
#endif
@@ -6600,12 +6583,10 @@ posix_open(PyObject *self, PyObject *args)
66006583
int fd;
66016584

66026585
#ifdef MS_WINDOWS
6603-
PyUnicodeObject *po;
6604-
if (PyArg_ParseTuple(args, "Ui|i:mkdir", &po, &flag, &mode)) {
6586+
Py_UNICODE *wpath;
6587+
if (PyArg_ParseTuple(args, "ui|i:mkdir", &wpath, &flag, &mode)) {
66056588
Py_BEGIN_ALLOW_THREADS
6606-
/* PyUnicode_AS_UNICODE OK without thread
6607-
lock as it is a simple dereference. */
6608-
fd = _wopen(PyUnicode_AS_UNICODE(po), flag, mode);
6589+
fd = _wopen(wpath, flag, mode);
66096590
Py_END_ALLOW_THREADS
66106591
if (fd < 0)
66116592
return posix_error();
@@ -8662,12 +8643,13 @@ static PyObject *
86628643
win32_startfile(PyObject *self, PyObject *args)
86638644
{
86648645
char *filepath;
8646+
Py_UNICODE *wpath;
86658647
char *operation = NULL;
86668648
HINSTANCE rc;
86678649

8668-
PyObject *unipath, *woperation = NULL;
8669-
if (!PyArg_ParseTuple(args, "U|s:startfile",
8670-
&unipath, &operation)) {
8650+
PyObject *woperation = NULL;
8651+
if (!PyArg_ParseTuple(args, "u|s:startfile",
8652+
&wpath, &operation)) {
86718653
PyErr_Clear();
86728654
goto normal;
86738655
}
@@ -8684,14 +8666,13 @@ win32_startfile(PyObject *self, PyObject *args)
86848666

86858667
Py_BEGIN_ALLOW_THREADS
86868668
rc = ShellExecuteW((HWND)0, woperation ? PyUnicode_AS_UNICODE(woperation) : 0,
8687-
PyUnicode_AS_UNICODE(unipath),
8669+
wpath,
86888670
NULL, NULL, SW_SHOWNORMAL);
86898671
Py_END_ALLOW_THREADS
86908672

86918673
Py_XDECREF(woperation);
86928674
if (rc <= (HINSTANCE)32) {
8693-
PyObject *errval = win32_error_unicode("startfile",
8694-
PyUnicode_AS_UNICODE(unipath));
8675+
PyObject *errval = win32_error_unicode("startfile", wpath);
86958676
return errval;
86968677
}
86978678
Py_INCREF(Py_None);

Objects/fileobject.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2394,7 +2394,8 @@ file_init(PyObject *self, PyObject *args, PyObject *kwds)
23942394

23952395
#ifdef MS_WINDOWS
23962396
if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2397-
kwlist, &po, &mode, &bufsize)) {
2397+
kwlist, &po, &mode, &bufsize) &&
2398+
wcslen(PyUnicode_AS_UNICODE(po)) == (size_t)PyUnicode_GET_SIZE(po)) {
23982399
wideargument = 1;
23992400
if (fill_file_fields(foself, NULL, po, mode,
24002401
fclose) == NULL)

0 commit comments

Comments
 (0)