Skip to content

Commit c7797dc

Browse files
Issue #19543: Emit deprecation warning for known non-text encodings.
Backported issues #19619: encode() and decode() methods and constructors of str, unicode and bytearray classes now emit deprecation warning for known non-text encodings when Python is ran with the -3 option. Backported issues #20404: io.TextIOWrapper (and hence io.open()) now uses the internal codec marking system added to emit deprecation warning for known non-text encodings at stream construction time when Python is ran with the -3 option.
1 parent cfb7028 commit c7797dc

22 files changed

+391
-70
lines changed

Include/codecs.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,51 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode(
8181
const char *errors
8282
);
8383

84+
/* Text codec specific encoding and decoding API.
85+
86+
Checks the encoding against a list of codecs which do not
87+
implement a unicode<->bytes encoding before attempting the
88+
operation.
89+
90+
Please note that these APIs are internal and should not
91+
be used in Python C extensions.
92+
93+
XXX (ncoghlan): should we make these, or something like them, public
94+
in Python 3.5+?
95+
96+
*/
97+
PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding(
98+
const char *encoding,
99+
const char *alternate_command
100+
);
101+
102+
PyAPI_FUNC(PyObject *) _PyCodec_EncodeText(
103+
PyObject *object,
104+
const char *encoding,
105+
const char *errors
106+
);
107+
108+
PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
109+
PyObject *object,
110+
const char *encoding,
111+
const char *errors
112+
);
113+
114+
/* These two aren't actually text encoding specific, but _io.TextIOWrapper
115+
* is the only current API consumer.
116+
*/
117+
PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder(
118+
PyObject *codec_info,
119+
const char *errors
120+
);
121+
122+
PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder(
123+
PyObject *codec_info,
124+
const char *errors
125+
);
126+
127+
128+
84129
/* --- Codec Lookup APIs --------------------------------------------------
85130
86131
All APIs return a codec object with incremented refcount and are

Lib/_pyio.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
import abc
99
import codecs
10+
import sys
1011
import warnings
1112
import errno
1213
# Import thread instead of threading to reduce startup cost
@@ -1497,6 +1498,11 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
14971498
if not isinstance(encoding, basestring):
14981499
raise ValueError("invalid encoding: %r" % encoding)
14991500

1501+
if sys.py3kwarning and not codecs.lookup(encoding)._is_text_encoding:
1502+
msg = ("%r is not a text encoding; "
1503+
"use codecs.open() to handle arbitrary codecs")
1504+
warnings.warnpy3k(msg % encoding, stacklevel=2)
1505+
15001506
if errors is None:
15011507
errors = "strict"
15021508
else:

Lib/codecs.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,19 @@
7979
### Codec base classes (defining the API)
8080

8181
class CodecInfo(tuple):
82+
"""Codec details when looking up the codec registry"""
83+
84+
# Private API to allow Python to blacklist the known non-Unicode
85+
# codecs in the standard library. A more general mechanism to
86+
# reliably distinguish test encodings from other codecs will hopefully
87+
# be defined for Python 3.5
88+
#
89+
# See http://bugs.python.org/issue19619
90+
_is_text_encoding = True # Assume codecs are text encodings by default
8291

8392
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
84-
incrementalencoder=None, incrementaldecoder=None, name=None):
93+
incrementalencoder=None, incrementaldecoder=None, name=None,
94+
_is_text_encoding=None):
8595
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
8696
self.name = name
8797
self.encode = encode
@@ -90,6 +100,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
90100
self.incrementaldecoder = incrementaldecoder
91101
self.streamwriter = streamwriter
92102
self.streamreader = streamreader
103+
if _is_text_encoding is not None:
104+
self._is_text_encoding = _is_text_encoding
93105
return self
94106

95107
def __repr__(self):

Lib/encodings/base64_codec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,5 @@ def getregentry():
7676
incrementaldecoder=IncrementalDecoder,
7777
streamwriter=StreamWriter,
7878
streamreader=StreamReader,
79+
_is_text_encoding=False,
7980
)

Lib/encodings/bz2_codec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,5 @@ def getregentry():
9999
incrementaldecoder=IncrementalDecoder,
100100
streamwriter=StreamWriter,
101101
streamreader=StreamReader,
102+
_is_text_encoding=False,
102103
)

Lib/encodings/hex_codec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,5 @@ def getregentry():
7676
incrementaldecoder=IncrementalDecoder,
7777
streamwriter=StreamWriter,
7878
streamreader=StreamReader,
79+
_is_text_encoding=False,
7980
)

Lib/encodings/quopri_codec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,5 @@ def getregentry():
7272
incrementaldecoder=IncrementalDecoder,
7373
streamwriter=StreamWriter,
7474
streamreader=StreamReader,
75+
_is_text_encoding=False,
7576
)

Lib/encodings/rot_13.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def getregentry():
4444
incrementaldecoder=IncrementalDecoder,
4545
streamwriter=StreamWriter,
4646
streamreader=StreamReader,
47+
_is_text_encoding=False,
4748
)
4849

4950
### Decoding Map

Lib/encodings/uu_codec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,5 @@ def getregentry():
126126
incrementaldecoder=IncrementalDecoder,
127127
streamreader=StreamReader,
128128
streamwriter=StreamWriter,
129+
_is_text_encoding=False,
129130
)

Lib/encodings/zlib_codec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,5 @@ def getregentry():
9999
incrementaldecoder=IncrementalDecoder,
100100
streamreader=StreamReader,
101101
streamwriter=StreamWriter,
102+
_is_text_encoding=False,
102103
)

0 commit comments

Comments
 (0)