Skip to content

Commit 0314055

Browse files
author
martin.v.loewis
committed
New environment variable PYTHONIOENCODING.
git-svn-id: http://svn.python.org/projects/python/trunk@63846 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 2dd6006 commit 0314055

10 files changed

Lines changed: 153 additions & 52 deletions

File tree

Doc/c-api/file.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ change in future releases of Python.
130130
.. versionadded:: 2.3
131131

132132

133+
.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
134+
135+
Set the file's encoding for Unicode output to *enc*, and its error
136+
mode to *err*. Return 1 on success and 0 on failure.
137+
138+
.. versionadded:: 2.6
139+
140+
133141
.. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
134142

135143
.. index:: single: softspace (file attribute)

Doc/library/stdtypes.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,6 +2165,13 @@ the particular object.
21652165
.. versionadded:: 2.3
21662166

21672167

2168+
.. attribute:: file.errors
2169+
2170+
The Unicode error handler used to along with the encoding.
2171+
2172+
.. versionadded:: 2.6
2173+
2174+
21682175
.. attribute:: file.mode
21692176

21702177
The I/O mode for the file. If the file was created using the :func:`open`

Doc/using/cmdline.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,13 @@ These environment variables influence Python's behavior.
481481

482482
.. versionadded:: 2.6
483483

484+
.. envvar:: PYTHONIOENCODING
485+
486+
Overrides the encoding used for stdin/stdout/stderr, in the syntax
487+
encodingname:errorhandler, with the :errors part being optional.
488+
489+
.. versionadded:: 2.6
490+
484491

485492
.. envvar:: PYTHONNOUSERSITE
486493

Include/fileobject.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef struct {
2424
int f_newlinetypes; /* Types of newlines seen */
2525
int f_skipnextlf; /* Skip next \n */
2626
PyObject *f_encoding;
27+
PyObject *f_errors;
2728
PyObject *weakreflist; /* List of weak references */
2829
int unlocked_count; /* Num. currently running sections of code
2930
using f_fp with the GIL released. */
@@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
3738
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
3839
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
3940
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
41+
PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
4042
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
4143
int (*)(FILE *));
4244
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);

Lib/test/test_sys.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,26 @@ def test_compact_freelists(self):
385385
## self.assert_(r[0][2] > 100, r[0][2])
386386
## self.assert_(r[1][2] > 100, r[1][2])
387387

388+
def test_ioencoding(self):
389+
import subprocess,os
390+
env = dict(os.environ)
391+
392+
# Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
393+
# not representable in ASCII.
394+
395+
env["PYTHONIOENCODING"] = "cp424"
396+
p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
397+
stdout = subprocess.PIPE, env=env)
398+
out = p.stdout.read().strip()
399+
self.assertEqual(out, unichr(0xa2).encode("cp424"))
400+
401+
env["PYTHONIOENCODING"] = "ascii:replace"
402+
p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
403+
stdout = subprocess.PIPE, env=env)
404+
out = p.stdout.read().strip()
405+
self.assertEqual(out, '?')
406+
407+
388408
def test_main():
389409
test.test_support.run_unittest(SysModuleTest)
390410

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1?
1212
Core and Builtins
1313
-----------------
1414

15+
- New environment variable PYTHONIOENCODING.
16+
1517
- Patch #2488: Add sys.maxsize.
1618

1719
- Issue #2353: file.xreadlines() now emits a Py3k warning.

Modules/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ static char *usage_5 = "\
9999
PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
100100
The default module search path uses %s.\n\
101101
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
102+
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
102103
";
103104

104105

Objects/fileobject.c

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
155155
Py_DECREF(f->f_name);
156156
Py_DECREF(f->f_mode);
157157
Py_DECREF(f->f_encoding);
158+
Py_DECREF(f->f_errors);
158159

159160
Py_INCREF(name);
160161
f->f_name = name;
@@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
170171
f->f_skipnextlf = 0;
171172
Py_INCREF(Py_None);
172173
f->f_encoding = Py_None;
174+
Py_INCREF(Py_None);
175+
f->f_errors = Py_None;
173176

174177
if (f->f_mode == NULL)
175178
return NULL;
@@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
435438
}
436439

437440
/* Set the encoding used to output Unicode strings.
438-
Returh 1 on success, 0 on failure. */
441+
Return 1 on success, 0 on failure. */
439442

440443
int
441444
PyFile_SetEncoding(PyObject *f, const char *enc)
445+
{
446+
return PyFile_SetEncodingAndErrors(f, enc, NULL);
447+
}
448+
449+
int
450+
PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
442451
{
443452
PyFileObject *file = (PyFileObject*)f;
444-
PyObject *str = PyBytes_FromString(enc);
453+
PyObject *str, *oerrors;
445454

446455
assert(PyFile_Check(f));
456+
str = PyBytes_FromString(enc);
447457
if (!str)
448458
return 0;
459+
if (errors) {
460+
oerrors = PyString_FromString(errors);
461+
if (!oerrors) {
462+
Py_DECREF(str);
463+
return 0;
464+
}
465+
} else {
466+
oerrors = Py_None;
467+
Py_INCREF(Py_None);
468+
}
449469
Py_DECREF(file->f_encoding);
450470
file->f_encoding = str;
471+
Py_DECREF(file->f_errors);
472+
file->f_errors = oerrors;
451473
return 1;
452474
}
453475

@@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f)
491513
Py_XDECREF(f->f_name);
492514
Py_XDECREF(f->f_mode);
493515
Py_XDECREF(f->f_encoding);
516+
Py_XDECREF(f->f_errors);
494517
drop_readahead(f);
495518
Py_TYPE(f)->tp_free((PyObject *)f);
496519
}
@@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = {
18791902
"file name"},
18801903
{"encoding", T_OBJECT, OFF(f_encoding), RO,
18811904
"file encoding"},
1905+
{"errors", T_OBJECT, OFF(f_errors), RO,
1906+
"Unicode error handler"},
18821907
/* getattr(f, "closed") is implemented without this table */
18831908
{NULL} /* Sentinel */
18841909
};
@@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
20932118
((PyFileObject *)self)->f_mode = not_yet_string;
20942119
Py_INCREF(Py_None);
20952120
((PyFileObject *)self)->f_encoding = Py_None;
2121+
Py_INCREF(Py_None);
2122+
((PyFileObject *)self)->f_errors = Py_None;
20962123
((PyFileObject *)self)->weakreflist = NULL;
20972124
((PyFileObject *)self)->unlocked_count = 0;
20982125
}
@@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
22952322
if ((flags & Py_PRINT_RAW) &&
22962323
PyUnicode_Check(v) && enc != Py_None) {
22972324
char *cenc = PyBytes_AS_STRING(enc);
2298-
value = PyUnicode_AsEncodedString(v, cenc, "strict");
2325+
char *errors = fobj->f_errors == Py_None ?
2326+
"strict" : PyBytes_AS_STRING(fobj->f_errors);
2327+
value = PyUnicode_AsEncodedString(v, cenc, errors);
22992328
if (value == NULL)
23002329
return -1;
23012330
} else {

Python/pythonrun.c

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,19 @@ Py_InitializeEx(int install_sigs)
132132
PyThreadState *tstate;
133133
PyObject *bimod, *sysmod;
134134
char *p;
135+
char *icodeset; /* On Windows, input codeset may theoretically
136+
differ from output codeset. */
137+
char *codeset = NULL;
138+
char *errors = NULL;
139+
int free_codeset = 0;
140+
int overridden = 0;
135141
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
136-
char *codeset;
137-
char *saved_locale;
142+
char *saved_locale, *loc_codeset;
138143
PyObject *sys_stream, *sys_isatty;
144+
#endif
145+
#ifdef MS_WINDOWS
146+
char ibuf[128];
147+
char buf[128];
139148
#endif
140149
extern void _Py_ReadyTypes(void);
141150

@@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs)
238247
_PyGILState_Init(interp, tstate);
239248
#endif /* WITH_THREAD */
240249

250+
if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
251+
p = icodeset = codeset = strdup(p);
252+
free_codeset = 1;
253+
errors = strchr(p, ':');
254+
if (errors) {
255+
*errors = '\0';
256+
errors++;
257+
}
258+
overridden = 1;
259+
}
260+
241261
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
242262
/* On Unix, set the file system encoding according to the
243263
user's preference, if the CODESET names a well-known
244264
Python codec, and Py_FileSystemDefaultEncoding isn't
245265
initialized by other means. Also set the encoding of
246-
stdin and stdout if these are terminals. */
247-
248-
saved_locale = strdup(setlocale(LC_CTYPE, NULL));
249-
setlocale(LC_CTYPE, "");
250-
codeset = nl_langinfo(CODESET);
251-
if (codeset && *codeset) {
252-
PyObject *enc = PyCodec_Encoder(codeset);
253-
if (enc) {
254-
codeset = strdup(codeset);
255-
Py_DECREF(enc);
256-
} else {
257-
codeset = NULL;
258-
PyErr_Clear();
266+
stdin and stdout if these are terminals, unless overridden. */
267+
268+
if (!overridden || !Py_FileSystemDefaultEncoding) {
269+
saved_locale = strdup(setlocale(LC_CTYPE, NULL));
270+
setlocale(LC_CTYPE, "");
271+
loc_codeset = nl_langinfo(CODESET);
272+
if (loc_codeset && *loc_codeset) {
273+
PyObject *enc = PyCodec_Encoder(loc_codeset);
274+
if (enc) {
275+
loc_codeset = strdup(loc_codeset);
276+
Py_DECREF(enc);
277+
} else {
278+
loc_codeset = NULL;
279+
PyErr_Clear();
280+
}
281+
} else
282+
loc_codeset = NULL;
283+
setlocale(LC_CTYPE, saved_locale);
284+
free(saved_locale);
285+
286+
if (!overridden) {
287+
codeset = icodeset = loc_codeset;
288+
free_codeset = 1;
289+
}
290+
291+
/* Initialize Py_FileSystemDefaultEncoding from
292+
locale even if PYTHONIOENCODING is set. */
293+
if (!Py_FileSystemDefaultEncoding) {
294+
Py_FileSystemDefaultEncoding = loc_codeset;
295+
if (!overridden)
296+
free_codeset = 0;
259297
}
260-
} else
261-
codeset = NULL;
262-
setlocale(LC_CTYPE, saved_locale);
263-
free(saved_locale);
298+
}
299+
#endif
300+
301+
#ifdef MS_WINDOWS
302+
if (!overridden) {
303+
icodeset = ibuf;
304+
encoding = buf;
305+
sprintf(ibuf, "cp%d", GetConsoleCP());
306+
sprintf(buf, "cp%d", GetConsoleOutputCP());
307+
}
308+
#endif
264309

265310
if (codeset) {
266311
sys_stream = PySys_GetObject("stdin");
267312
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
268313
if (!sys_isatty)
269314
PyErr_Clear();
270-
if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
315+
if ((overridden ||
316+
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
271317
PyFile_Check(sys_stream)) {
272-
if (!PyFile_SetEncoding(sys_stream, codeset))
318+
if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
273319
Py_FatalError("Cannot set codeset of stdin");
274320
}
275321
Py_XDECREF(sys_isatty);
@@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs)
278324
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
279325
if (!sys_isatty)
280326
PyErr_Clear();
281-
if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
327+
if ((overridden ||
328+
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
282329
PyFile_Check(sys_stream)) {
283-
if (!PyFile_SetEncoding(sys_stream, codeset))
330+
if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
284331
Py_FatalError("Cannot set codeset of stdout");
285332
}
286333
Py_XDECREF(sys_isatty);
@@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs)
289336
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
290337
if (!sys_isatty)
291338
PyErr_Clear();
292-
if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
339+
if((overridden ||
340+
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
293341
PyFile_Check(sys_stream)) {
294-
if (!PyFile_SetEncoding(sys_stream, codeset))
342+
if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
295343
Py_FatalError("Cannot set codeset of stderr");
296344
}
297345
Py_XDECREF(sys_isatty);
298346

299-
if (!Py_FileSystemDefaultEncoding)
300-
Py_FileSystemDefaultEncoding = codeset;
301-
else
347+
if (free_codeset)
302348
free(codeset);
303349
}
304-
#endif
305350
}
306351

307352
void

Python/sysmodule.c

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,9 +1232,6 @@ _PySys_Init(void)
12321232
PyObject *m, *v, *sysdict;
12331233
PyObject *sysin, *sysout, *syserr;
12341234
char *s;
1235-
#ifdef MS_WINDOWS
1236-
char buf[128];
1237-
#endif
12381235

12391236
m = Py_InitModule3("sys", sys_methods, sys_doc);
12401237
if (m == NULL)
@@ -1272,23 +1269,6 @@ _PySys_Init(void)
12721269
syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
12731270
if (PyErr_Occurred())
12741271
return NULL;
1275-
#ifdef MS_WINDOWS
1276-
if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
1277-
sprintf(buf, "cp%d", GetConsoleCP());
1278-
if (!PyFile_SetEncoding(sysin, buf))
1279-
return NULL;
1280-
}
1281-
if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
1282-
sprintf(buf, "cp%d", GetConsoleOutputCP());
1283-
if (!PyFile_SetEncoding(sysout, buf))
1284-
return NULL;
1285-
}
1286-
if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
1287-
sprintf(buf, "cp%d", GetConsoleOutputCP());
1288-
if (!PyFile_SetEncoding(syserr, buf))
1289-
return NULL;
1290-
}
1291-
#endif
12921272

12931273
PyDict_SetItemString(sysdict, "stdin", sysin);
12941274
PyDict_SetItemString(sysdict, "stdout", sysout);

0 commit comments

Comments
 (0)