Skip to content

Commit c2e2e03

Browse files
author
amaury.forgeotdarc
committed
#2798: PyArg_ParseTuple did not correctly handle the "s" code in case of unicode strings
with chars outside the 7bit ascii (s# was already correct). This is necessary to allow python run from a non-ASCII directory, and seems enough on some platforms, probably where the default PyUnicode encoding (utf-8) is also the default filesystem encoding. git-svn-id: http://svn.python.org/projects/python/branches/py3k@63161 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 65cea6f commit c2e2e03

3 files changed

Lines changed: 45 additions & 8 deletions

File tree

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ What's new in Python 3.0b1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now
16+
allows any unicode string and returns a utf-8 encoded buffer, just like the
17+
"s#" code already does. The "z" code was corrected as well.
18+
1519
- Issue 2801: fix bug in the float.is_integer method where a ValueError
1620
was sometimes incorrectly raised.
1721

Modules/_testcapimodule.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,38 @@ test_k_code(PyObject *self)
475475
}
476476

477477

478+
/* Test the s and z codes for PyArg_ParseTuple.
479+
*/
480+
static PyObject *
481+
test_s_code(PyObject *self)
482+
{
483+
/* Unicode strings should be accepted */
484+
PyObject *tuple, *obj;
485+
char *value;
486+
487+
tuple = PyTuple_New(1);
488+
if (tuple == NULL)
489+
return NULL;
490+
491+
obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"),
492+
"latin-1", NULL);
493+
if (obj == NULL)
494+
return NULL;
495+
496+
PyTuple_SET_ITEM(tuple, 0, obj);
497+
498+
/* These two blocks used to raise a TypeError:
499+
* "argument must be string without null bytes, not str"
500+
*/
501+
if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0)
502+
return NULL;
503+
504+
if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0)
505+
return NULL;
506+
507+
Py_RETURN_NONE;
508+
}
509+
478510
/* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
479511
of an error.
480512
*/
@@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = {
952984
{"codec_incrementaldecoder",
953985
(PyCFunction)codec_incrementaldecoder, METH_VARARGS},
954986
#endif
987+
{"test_s_code", (PyCFunction)test_s_code, METH_NOARGS},
955988
{"test_u_code", (PyCFunction)test_u_code, METH_NOARGS},
956989
{"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS},
957990
#ifdef WITH_THREAD

Python/getargs.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
822822
}
823823
else
824824
return converterr("string", arg, msgbuf, bufsize);
825-
/* XXX(gb): this test is completely wrong -- p is a
826-
* byte string while arg is a Unicode. I *think* it should
827-
* check against the size of uarg... */
828-
if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
825+
if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
829826
return converterr("string without null bytes",
830827
arg, msgbuf, bufsize);
831828
}
@@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
874871
format++;
875872
} else {
876873
char **p = va_arg(*p_va, char **);
874+
uarg = NULL;
877875

878876
if (arg == Py_None)
879877
*p = 0;
880-
else if (PyString_Check(arg))
878+
else if (PyString_Check(arg)) {
879+
/* Enable null byte check below */
880+
uarg = arg;
881881
*p = PyString_AS_STRING(arg);
882+
}
882883
else if (PyUnicode_Check(arg)) {
883884
uarg = UNICODE_DEFAULT_ENCODING(arg);
884885
if (uarg == NULL)
@@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
900901
}
901902
format++;
902903
}
903-
/* XXX(gb): same comment as for 's' applies here... */
904-
else if (*p != NULL &&
905-
(Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
904+
else if (*p != NULL && uarg != NULL &&
905+
(Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
906906
return converterr(
907907
"string without null bytes or None",
908908
arg, msgbuf, bufsize);

0 commit comments

Comments
 (0)