Skip to content

Commit 7d270ee

Browse files
committed
Issue python#16129: Add Py_SetStandardStreamEncoding
This new pre-initialization API allows embedding applications like Blender to force a particular encoding and error handler for the standard IO streams. Also refactors Modules/_testembed.c to let us start testing multiple embedding scenarios. (Initial patch by Bastien Montagne)
1 parent 26f9268 commit 7d270ee

File tree

8 files changed

+266
-48
lines changed

8 files changed

+266
-48
lines changed

Doc/c-api/init.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,33 @@ Process-wide parameters
8686
=======================
8787

8888

89+
.. c:function:: int Py_SetStandardStreamEncoding(char *encoding, char *errors)
90+
91+
.. index::
92+
single: Py_Initialize()
93+
single: main()
94+
triple: stdin; stdout; sdterr
95+
96+
This function should be called before :c:func:`Py_Initialize`. It
97+
specifies which encoding and error handling to use with standard io,
98+
with the same meanings as in :func:`str.encode`.
99+
100+
It overrides :envvar:`PYTHONIOENCODING` values, and allows embedding code
101+
to control io encoding when the environment variable does not work.
102+
103+
``encoding`` and/or ``errors`` may be NULL to use
104+
:envvar:`PYTHONIOENCODING` and/or default values (depending on other
105+
settings).
106+
107+
Note that :data:`sys.stderr` always uses the "backslashreplace" error
108+
handler, regardless of this (or any other) setting.
109+
110+
If :c:func:`Py_Finalize` is called, this function will need to be called
111+
again in order to affect subsequent calls to :c:func:`Py_Initialize`.
112+
113+
Returns 0 if successful.
114+
115+
89116
.. c:function:: void Py_SetProgramName(wchar_t *name)
90117
91118
.. index::

Doc/whatsnew/3.4.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,10 @@ Build and C API Changes
564564

565565
Changes to Python's build process and to the C API include:
566566

567-
* None yet.
567+
* The new :c:func:`Py_SetStandardStreamEncoding` pre-initialization API
568+
allows applications embedding the CPython interpreter to reliably force
569+
a particular encoding and error handler for the standard streams
570+
(Contributed by Bastien Montagne and Nick Coghlan in :issue:`16129`)
568571

569572

570573
Deprecated

Include/pythonrun.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ PyAPI_FUNC(int) Py_IsInitialized(void);
3838
PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void);
3939
PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *);
4040

41+
PyAPI_FUNC(int) Py_SetStandardStreamEncoding(const char *encoding, const char *errors);
42+
4143
#ifndef Py_LIMITED_API
4244
PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *);
4345
PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *);

Lib/test/test_capi.py

Lines changed: 71 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import sys
1010
import time
1111
import unittest
12+
import textwrap
1213
from test import support
1314
try:
1415
import _posixsubprocess
@@ -218,36 +219,81 @@ def test(self):
218219
self.assertEqual(_testcapi.argparsing("Hello", "World"), 1)
219220

220221

221-
class EmbeddingTest(unittest.TestCase):
222+
@unittest.skipIf(
223+
sys.platform.startswith('win'),
224+
"interpreter embedding tests aren't built under Windows")
225+
class EmbeddingTests(unittest.TestCase):
226+
# XXX only tested under Unix checkouts
222227

223-
@unittest.skipIf(
224-
sys.platform.startswith('win'),
225-
"test doesn't work under Windows")
226-
def test_subinterps(self):
227-
# XXX only tested under Unix checkouts
228+
def setUp(self):
228229
basepath = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
229-
oldcwd = os.getcwd()
230+
self.test_exe = exe = os.path.join(basepath, "Modules", "_testembed")
231+
if not os.path.exists(exe):
232+
self.skipTest("%r doesn't exist" % exe)
230233
# This is needed otherwise we get a fatal error:
231234
# "Py_Initialize: Unable to get the locale encoding
232235
# LookupError: no codec search functions registered: can't find encoding"
236+
self.oldcwd = os.getcwd()
233237
os.chdir(basepath)
234-
try:
235-
exe = os.path.join(basepath, "Modules", "_testembed")
236-
if not os.path.exists(exe):
237-
self.skipTest("%r doesn't exist" % exe)
238-
p = subprocess.Popen([exe],
239-
stdout=subprocess.PIPE,
240-
stderr=subprocess.PIPE)
241-
(out, err) = p.communicate()
242-
self.assertEqual(p.returncode, 0,
243-
"bad returncode %d, stderr is %r" %
244-
(p.returncode, err))
245-
if support.verbose:
246-
print()
247-
print(out.decode('latin1'))
248-
print(err.decode('latin1'))
249-
finally:
250-
os.chdir(oldcwd)
238+
239+
def tearDown(self):
240+
os.chdir(self.oldcwd)
241+
242+
def run_embedded_interpreter(self, *args):
243+
"""Runs a test in the embedded interpreter"""
244+
cmd = [self.test_exe]
245+
cmd.extend(args)
246+
p = subprocess.Popen(cmd,
247+
stdout=subprocess.PIPE,
248+
stderr=subprocess.PIPE)
249+
(out, err) = p.communicate()
250+
self.assertEqual(p.returncode, 0,
251+
"bad returncode %d, stderr is %r" %
252+
(p.returncode, err))
253+
return out.decode("latin1"), err.decode("latin1")
254+
255+
def test_subinterps(self):
256+
# This is just a "don't crash" test
257+
out, err = self.run_embedded_interpreter()
258+
if support.verbose:
259+
print()
260+
print(out)
261+
print(err)
262+
263+
def test_forced_io_encoding(self):
264+
# Checks forced configuration of embedded interpreter IO streams
265+
out, err = self.run_embedded_interpreter("forced_io_encoding")
266+
if support.verbose:
267+
print()
268+
print(out)
269+
print(err)
270+
expected_output = textwrap.dedent("""\
271+
--- Use defaults ---
272+
Expected encoding: default
273+
Expected errors: default
274+
stdin: {0.stdin.encoding}:strict
275+
stdout: {0.stdout.encoding}:strict
276+
stderr: {0.stderr.encoding}:backslashreplace
277+
--- Set errors only ---
278+
Expected encoding: default
279+
Expected errors: surrogateescape
280+
stdin: {0.stdin.encoding}:surrogateescape
281+
stdout: {0.stdout.encoding}:surrogateescape
282+
stderr: {0.stderr.encoding}:backslashreplace
283+
--- Set encoding only ---
284+
Expected encoding: latin-1
285+
Expected errors: default
286+
stdin: latin-1:strict
287+
stdout: latin-1:strict
288+
stderr: latin-1:backslashreplace
289+
--- Set encoding and errors ---
290+
Expected encoding: latin-1
291+
Expected errors: surrogateescape
292+
stdin: latin-1:surrogateescape
293+
stdout: latin-1:surrogateescape
294+
stderr: latin-1:backslashreplace""").format(sys)
295+
296+
self.assertEqual(out.strip(), expected_output)
251297

252298
class SkipitemTest(unittest.TestCase):
253299

@@ -358,7 +404,7 @@ def callback():
358404

359405
def test_main():
360406
support.run_unittest(CAPITest, TestPendingCalls, Test6012,
361-
EmbeddingTest, SkipitemTest, TestThreadState,
407+
EmbeddingTests, SkipitemTest, TestThreadState,
362408
SubinterpreterTest)
363409

364410
for name in dir(_testcapi):

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,7 @@ Zubin Mithra
872872
Florian Mladitsch
873873
Doug Moen
874874
The Dragon De Monsyne
875+
Bastien Montagne
875876
Skip Montanaro
876877
Peter Moody
877878
Paul Moore

Misc/NEWS

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,14 @@ Library
120120
- Issue #4366: Fix building extensions on all platforms when --enable-shared
121121
is used.
122122

123+
C API
124+
-----
125+
126+
- Issue #16129: Added a `Py_SetStandardStreamEncoding` pre-initialization API
127+
to allow embedding applications like Blender to force a particular
128+
encoding and error handler for the standard IO streams (initial patch by
129+
Bastien Montagne)
130+
123131
Tests
124132
-----
125133

Modules/_testembed.c

Lines changed: 88 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,26 @@
11
#include <Python.h>
22
#include <stdio.h>
33

4-
void print_subinterp(void)
4+
/*********************************************************
5+
* Embedded interpreter tests that need a custom exe
6+
*
7+
* Executed via 'EmbeddingTests' in Lib/test/test_capi.py
8+
*********************************************************/
9+
10+
static void _testembed_Py_Initialize(void)
11+
{
12+
/* HACK: the "./" at front avoids a search along the PATH in
13+
Modules/getpath.c */
14+
Py_SetProgramName(L"./_testembed");
15+
Py_Initialize();
16+
}
17+
18+
19+
/*****************************************************
20+
* Test repeated initalisation and subinterpreters
21+
*****************************************************/
22+
23+
static void print_subinterp(void)
524
{
625
/* Just output some debug stuff */
726
PyThreadState *ts = PyThreadState_Get();
@@ -14,7 +33,7 @@ void print_subinterp(void)
1433
);
1534
}
1635

17-
int main(int argc, char *argv[])
36+
static void test_repeated_init_and_subinterpreters(void)
1837
{
1938
PyThreadState *mainstate, *substate;
2039
#ifdef WITH_THREAD
@@ -24,10 +43,7 @@ int main(int argc, char *argv[])
2443

2544
for (i=0; i<3; i++) {
2645
printf("--- Pass %d ---\n", i);
27-
/* HACK: the "./" at front avoids a search along the PATH in
28-
Modules/getpath.c */
29-
Py_SetProgramName(L"./_testembed");
30-
Py_Initialize();
46+
_testembed_Py_Initialize();
3147
mainstate = PyThreadState_Get();
3248

3349
#ifdef WITH_THREAD
@@ -54,5 +70,71 @@ int main(int argc, char *argv[])
5470
PyEval_RestoreThread(mainstate);
5571
Py_Finalize();
5672
}
73+
}
74+
75+
/*****************************************************
76+
* Test forcing a particular IO encoding
77+
*****************************************************/
78+
79+
static void check_stdio_details(const char *encoding, const char * errors)
80+
{
81+
/* Output info for the test case to check */
82+
if (encoding) {
83+
printf("Expected encoding: %s\n", encoding);
84+
} else {
85+
printf("Expected encoding: default\n");
86+
}
87+
if (errors) {
88+
printf("Expected errors: %s\n", errors);
89+
} else {
90+
printf("Expected errors: default\n");
91+
}
92+
fflush(stdout);
93+
/* Force the given IO encoding */
94+
Py_SetStandardStreamEncoding(encoding, errors);
95+
_testembed_Py_Initialize();
96+
PyRun_SimpleString(
97+
"import sys;"
98+
"print('stdin: {0.encoding}:{0.errors}'.format(sys.stdin));"
99+
"print('stdout: {0.encoding}:{0.errors}'.format(sys.stdout));"
100+
"print('stderr: {0.encoding}:{0.errors}'.format(sys.stderr));"
101+
"sys.stdout.flush()"
102+
);
103+
Py_Finalize();
104+
}
105+
106+
static void test_forced_io_encoding(void)
107+
{
108+
/* Check various combinations */
109+
printf("--- Use defaults ---\n");
110+
check_stdio_details(NULL, NULL);
111+
printf("--- Set errors only ---\n");
112+
check_stdio_details(NULL, "surrogateescape");
113+
printf("--- Set encoding only ---\n");
114+
check_stdio_details("latin-1", NULL);
115+
printf("--- Set encoding and errors ---\n");
116+
check_stdio_details("latin-1", "surrogateescape");
117+
118+
/* Check calling after initialization fails */
119+
Py_Initialize();
120+
121+
if (Py_SetStandardStreamEncoding(NULL, NULL) == 0) {
122+
printf("Unexpected success calling Py_SetStandardStreamEncoding");
123+
}
124+
Py_Finalize();
125+
}
126+
127+
/* Different embedding tests */
128+
int main(int argc, char *argv[])
129+
{
130+
131+
/* TODO: Check the argument string to allow for more test cases */
132+
if (argc > 1) {
133+
/* For now: assume "forced_io_encoding */
134+
test_forced_io_encoding();
135+
} else {
136+
/* Run the original embedding test case by default */
137+
test_repeated_init_and_subinterpreters();
138+
}
57139
return 0;
58140
}

0 commit comments

Comments
 (0)