Skip to content

Commit d1ba443

Browse files
committed
This patch adds a new Python C API called PyString_AsStringAndSize()
which implements the automatic conversion from Unicode to a string object using the default encoding. The new API is then put to use to have eval() and exec accept Unicode objects as code parameter. This closes bugs #110924 and #113890. As side-effect, the traditional C APIs PyString_Size() and PyString_AsString() will also accept Unicode objects as parameters.
1 parent f8d0713 commit d1ba443

File tree

8 files changed

+126
-19
lines changed

8 files changed

+126
-19
lines changed

Doc/api/api.tex

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2105,6 +2105,23 @@ \subsection{String Objects \label{stringObjects}}
21052105
checking.
21062106
\end{cfuncdesc}
21072107

2108+
\begin{cfuncdesc}{int}{PyString_AsStringAndSize}{PyObject *obj,
2109+
char **buffer,
2110+
int *length}
2111+
Returns a null-terminated representation of the contents of the object
2112+
\var{obj} through the output variables \var{buffer} and \var{length}.
2113+
2114+
The function accepts both string and Unicode objects as input. For
2115+
Unicode objects it returns the default encoded version of the object.
2116+
If \var{length} is set to \NULL{}, the resulting buffer may not contain
2117+
null characters; if it does, the function returns -1 and a
2118+
TypeError is raised.
2119+
2120+
The buffer refers to an internal string buffer of \var{obj}, not a
2121+
copy. The data must not be modified in any way. It must not be
2122+
de-allocated.
2123+
\end{cfuncdesc}
2124+
21082125
\begin{cfuncdesc}{void}{PyString_Concat}{PyObject **string,
21092126
PyObject *newpart}
21102127
Creates a new string object in \var{*string} containing the

Doc/api/refcounts.dat

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,11 @@ PyString_AS_STRING:PyObject*:string:0:
760760
PyString_AsString:char*:::
761761
PyString_AsString:PyObject*:string:0:
762762

763+
PyString_AsStringAndSize:int:::
764+
PyString_AsStringAndSize:PyObject*:obj:0:
765+
PyString_AsStringAndSize:char**:buffer::
766+
PyString_AsStringAndSize:int*:length::
767+
763768
PyString_Check:int:::
764769
PyString_Check:PyObject*:o:0:
765770

Include/stringobject.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,21 @@ extern DL_IMPORT(PyObject*) PyString_AsEncodedString(
103103
const char *errors /* error handling */
104104
);
105105

106+
/* Provides access to the internal data buffer and size of a string
107+
object or the default encoded version of an Unicode object. Passing
108+
NULL as *len parameter will force the string buffer to be
109+
0-terminated (passing a string with embedded NULL characters will
110+
cause an exception). */
111+
112+
extern DL_IMPORT(int) PyString_AsStringAndSize(
113+
register PyObject *obj, /* string or Unicode object */
114+
register char **s, /* pointer to buffer variable */
115+
register int *len /* pointer to length variable or NULL
116+
(only possible for 0-terminated
117+
strings) */
118+
);
119+
120+
106121
#ifdef __cplusplus
107122
}
108123
#endif

Lib/test/test_b1.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@ def __complex__(self): return 3.14j
161161
raise TestFailed, "eval(3)"
162162
if eval('c', globals, locals) <> 300:
163163
raise TestFailed, "eval(4)"
164+
if eval(u'1+1') <> 2: raise TestFailed, 'eval(u\'1+1\')'
165+
if eval(u' 1+1\n') <> 2: raise TestFailed, 'eval(u\' 1+1\\n\')'
166+
globals = {'a': 1, 'b': 2}
167+
locals = {'b': 200, 'c': 300}
168+
if eval(u'a', globals) <> 1:
169+
raise TestFailed, "eval(1) == %s" % eval(u'a', globals)
170+
if eval(u'a', globals, locals) <> 1:
171+
raise TestFailed, "eval(2)"
172+
if eval(u'b', globals, locals) <> 200:
173+
raise TestFailed, "eval(3)"
174+
if eval(u'c', globals, locals) <> 300:
175+
raise TestFailed, "eval(4)"
164176

165177
print 'execfile'
166178
z = 0

Lib/test/test_grammar.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,13 @@ def f():
355355
del z
356356
exec 'z=1+1'
357357
if z <> 2: raise TestFailed, 'exec \'z=1+1\''
358+
z = None
359+
del z
360+
exec u'z=1+1\n'
361+
if z <> 2: raise TestFailed, 'exec u\'z=1+1\'\\n'
362+
del z
363+
exec u'z=1+1'
364+
if z <> 2: raise TestFailed, 'exec u\'z=1+1\''
358365
f()
359366
g = {}
360367
exec 'z = 1' in g

Objects/stringobject.c

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -239,24 +239,80 @@ string_dealloc(PyObject *op)
239239
PyObject_DEL(op);
240240
}
241241

242+
static int
243+
string_getsize(register PyObject *op)
244+
{
245+
char *s;
246+
int len;
247+
if (PyString_AsStringAndSize(op, &s, &len))
248+
return -1;
249+
return len;
250+
}
251+
252+
static /*const*/ char *
253+
string_getbuffer(register PyObject *op)
254+
{
255+
char *s;
256+
int len;
257+
if (PyString_AsStringAndSize(op, &s, &len))
258+
return NULL;
259+
return s;
260+
}
261+
242262
int
243263
PyString_Size(register PyObject *op)
244264
{
245-
if (!PyString_Check(op)) {
246-
PyErr_BadInternalCall();
247-
return -1;
248-
}
265+
if (!PyString_Check(op))
266+
return string_getsize(op);
249267
return ((PyStringObject *)op) -> ob_size;
250268
}
251269

252270
/*const*/ char *
253271
PyString_AsString(register PyObject *op)
254272
{
255-
if (!PyString_Check(op)) {
273+
if (!PyString_Check(op))
274+
return string_getbuffer(op);
275+
return ((PyStringObject *)op) -> ob_sval;
276+
}
277+
278+
/* Internal API needed by PyString_AsStringAndSize(): */
279+
extern
280+
PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
281+
const char *errors);
282+
283+
int
284+
PyString_AsStringAndSize(register PyObject *obj,
285+
register char **s,
286+
register int *len)
287+
{
288+
if (s == NULL) {
256289
PyErr_BadInternalCall();
257-
return NULL;
290+
return -1;
258291
}
259-
return ((PyStringObject *)op) -> ob_sval;
292+
293+
if (!PyString_Check(obj)) {
294+
if (PyUnicode_Check(obj)) {
295+
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
296+
if (obj == NULL)
297+
return -1;
298+
}
299+
else {
300+
PyErr_Format(PyExc_TypeError,
301+
"expected string or Unicode object, "
302+
"%.200s found", obj->ob_type->tp_name);
303+
return -1;
304+
}
305+
}
306+
307+
*s = PyString_AS_STRING(obj);
308+
if (len != NULL)
309+
*len = PyString_GET_SIZE(obj);
310+
else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
311+
PyErr_SetString(PyExc_TypeError,
312+
"expected string without null bytes");
313+
return -1;
314+
}
315+
return 0;
260316
}
261317

262318
/* Methods */

Python/bltinmodule.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -748,17 +748,14 @@ builtin_eval(PyObject *self, PyObject *args)
748748
}
749749
if (PyCode_Check(cmd))
750750
return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals);
751-
if (!PyString_Check(cmd)) {
751+
if (!PyString_Check(cmd) &&
752+
!PyUnicode_Check(cmd)) {
752753
PyErr_SetString(PyExc_TypeError,
753754
"eval() argument 1 must be string or code object");
754755
return NULL;
755756
}
756-
str = PyString_AsString(cmd);
757-
if (strlen(str) != (size_t)PyString_Size(cmd)) {
758-
PyErr_SetString(PyExc_ValueError,
759-
"embedded '\\0' in string arg");
757+
if (PyString_AsStringAndSize(cmd, &str, NULL))
760758
return NULL;
761-
}
762759
while (*str == ' ' || *str == '\t')
763760
str++;
764761
return PyRun_String(str, Py_eval_input, globals, locals);

Python/ceval.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3042,6 +3042,7 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals,
30423042
else if (locals == Py_None)
30433043
locals = globals;
30443044
if (!PyString_Check(prog) &&
3045+
!PyUnicode_Check(prog) &&
30453046
!PyCode_Check(prog) &&
30463047
!PyFile_Check(prog)) {
30473048
PyErr_SetString(PyExc_TypeError,
@@ -3064,13 +3065,10 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals,
30643065
v = PyRun_File(fp, name, Py_file_input, globals, locals);
30653066
}
30663067
else {
3067-
char *s = PyString_AsString(prog);
3068-
if (strlen(s) != (size_t)PyString_Size(prog)) {
3069-
PyErr_SetString(PyExc_ValueError,
3070-
"embedded '\\0' in exec string");
3068+
char *str;
3069+
if (PyString_AsStringAndSize(prog, &str, NULL))
30713070
return -1;
3072-
}
3073-
v = PyRun_String(s, Py_file_input, globals, locals);
3071+
v = PyRun_String(str, Py_file_input, globals, locals);
30743072
}
30753073
if (plain)
30763074
PyFrame_LocalsToFast(f, 0);

0 commit comments

Comments
 (0)