Skip to content

Commit 1d6a1ca

Browse files
author
marc-andre.lemburg
committed
Slightly revised version of patch #1538956:
Replace UnicodeDecodeErrors raised during == and != compares of Unicode and other objects with a new UnicodeWarning. All other comparisons continue to raise exceptions. Exceptions other than UnicodeDecodeErrors are also left untouched. git-svn-id: http://svn.python.org/projects/python/trunk@51276 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent e5efb81 commit 1d6a1ca

11 files changed

Lines changed: 171 additions & 37 deletions

File tree

Doc/api/concrete.tex

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,6 +1560,31 @@ \subsubsection{Methods and Slot Functions \label{unicodeMethodsAndSlots}}
15601560
greater than, respectively.
15611561
\end{cfuncdesc}
15621562

1563+
\begin{cfuncdesc}{int}{PyUnicode_RichCompare}{PyObject *left,
1564+
PyObject *right,
1565+
int op}
1566+
1567+
% This entry could use some polishing - my TeX is too
1568+
% rusty these days... (MAL)
1569+
1570+
Rich compare two strings and return one of the following:
1571+
\begin{verbatim}
1572+
- NULL in case an exception was raised
1573+
- Py_True or Py_False for successfuly comparisons
1574+
- Py_NotImplemented in case the type combination is unknown
1575+
\end{verbatim}
1576+
1577+
Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1578+
case the conversion of the arguments to Unicode fails with a
1579+
UnicodeDecodeError.
1580+
1581+
Possible values for \var{op}:
1582+
\begin{verbatim}
1583+
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1584+
\end{verbatim}
1585+
1586+
\end{cfuncdesc}
1587+
15631588
\begin{cfuncdesc}{PyObject*}{PyUnicode_Format}{PyObject *format,
15641589
PyObject *args}
15651590
Return a new string object from \var{format} and \var{args}; this

Doc/api/exceptions.tex

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,10 +288,11 @@ \chapter{Exception Handling \label{exceptionHandling}}
288288
names are \samp{PyExc_} followed by the Python exception name.
289289
These have the type \ctype{PyObject*}; they are all class objects.
290290
Their names are \cdata{PyExc_Warning}, \cdata{PyExc_UserWarning},
291-
\cdata{PyExc_DeprecationWarning}, \cdata{PyExc_SyntaxWarning},
292-
\cdata{PyExc_RuntimeWarning}, and \cdata{PyExc_FutureWarning}.
293-
\cdata{PyExc_Warning} is a subclass of \cdata{PyExc_Exception}; the
294-
other warning categories are subclasses of \cdata{PyExc_Warning}.
291+
\cdata{PyExc_UnicodeWarning}, \cdata{PyExc_DeprecationWarning},
292+
\cdata{PyExc_SyntaxWarning}, \cdata{PyExc_RuntimeWarning}, and
293+
\cdata{PyExc_FutureWarning}. \cdata{PyExc_Warning} is a subclass of
294+
\cdata{PyExc_Exception}; the other warning categories are subclasses
295+
of \cdata{PyExc_Warning}.
295296

296297
For information about warning control, see the documentation for the
297298
\module{warnings} module and the \programopt{-W} option in the

Doc/lib/libexcs.tex

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,11 @@ \section{Built-in Exceptions}
456456
\versionadded{2.5}
457457
\end{excdesc}
458458

459+
\begin{excdesc}{UnicodeWarning}
460+
Base class for warnings related to Unicode.
461+
\versionadded{2.5}
462+
\end{excdesc}
463+
459464
The class hierarchy for built-in exceptions is:
460465

461466
\verbatiminput{../../Lib/test/exception_hierarchy.txt}

Doc/lib/libwarnings.tex

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ \subsection{Warning Categories \label{warning-categories}}
7676

7777
\lineii{ImportWarning}{Base category for warnings triggered during the
7878
process of importing a module (ignored by default).}
79+
80+
\lineii{UnicodeWarning}{Base category for warnings related to Unicode.}
81+
7982
\end{tableii}
8083

8184
While these are technically built-in exceptions, they are documented

Include/pyerrors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ PyAPI_DATA(PyObject *) PyExc_SyntaxWarning;
173173
PyAPI_DATA(PyObject *) PyExc_RuntimeWarning;
174174
PyAPI_DATA(PyObject *) PyExc_FutureWarning;
175175
PyAPI_DATA(PyObject *) PyExc_ImportWarning;
176+
PyAPI_DATA(PyObject *) PyExc_UnicodeWarning;
176177

177178

178179
/* Convenience functions */

Include/unicodeobject.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
189189
# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
190190
# define PyUnicode_Replace PyUnicodeUCS2_Replace
191191
# define PyUnicode_Resize PyUnicodeUCS2_Resize
192+
# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
192193
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
193194
# define PyUnicode_Split PyUnicodeUCS2_Split
194195
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
@@ -266,6 +267,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
266267
# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
267268
# define PyUnicode_Replace PyUnicodeUCS4_Replace
268269
# define PyUnicode_Resize PyUnicodeUCS4_Resize
270+
# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
269271
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
270272
# define PyUnicode_Split PyUnicodeUCS4_Split
271273
# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
@@ -1139,6 +1141,28 @@ PyAPI_FUNC(int) PyUnicode_Compare(
11391141
PyObject *right /* Right string */
11401142
);
11411143

1144+
/* Rich compare two strings and return one of the following:
1145+
1146+
- NULL in case an exception was raised
1147+
- Py_True or Py_False for successfuly comparisons
1148+
- Py_NotImplemented in case the type combination is unknown
1149+
1150+
Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1151+
case the conversion of the arguments to Unicode fails with a
1152+
UnicodeDecodeError.
1153+
1154+
Possible values for op:
1155+
1156+
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1157+
1158+
*/
1159+
1160+
PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
1161+
PyObject *left, /* Left string */
1162+
PyObject *right, /* Right string */
1163+
int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
1164+
);
1165+
11421166
/* Apply a argument tuple or dictionary to a format string and return
11431167
the resulting Unicode string. */
11441168

Lib/test/exception_hierarchy.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ BaseException
4545
+-- UserWarning
4646
+-- FutureWarning
4747
+-- ImportWarning
48+
+-- UnicodeWarning

Misc/NEWS

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@ What's New in Python 2.5 release candidate 1?
1212
Core and builtins
1313
-----------------
1414

15-
- Fix segfault when doing string formatting on subclasses of long.
16-
17-
- Fix bug related to __len__ functions using values > 2**32 on 64-bit machines
18-
with new-style classes.
19-
20-
- Fix bug related to __len__ functions returning negative values with
21-
classic classes.
22-
23-
- Patch #1538606, Fix __index__() clipping. There were some problems
24-
discovered with the API and how integers that didn't fit into Py_ssize_t
25-
were handled. This patch attempts to provide enough alternatives
26-
to effectively use __index__.
15+
- Unicode objects will no longer raise an exception when being
16+
compared equal or unequal to a string and causing a
17+
UnicodeDecodeError exception, e.g. as result of a decoding failure.
18+
19+
Instead, the equal (==) and unequal (!=) comparison operators will
20+
now issue a UnicodeWarning and interpret the two objects as
21+
unequal. The UnicodeWarning can be filtered as desired using
22+
the warning framework, e.g. silenced completely, turned into an
23+
exception, logged, etc.
24+
25+
Note that compare operators other than equal and unequal will still
26+
raise UnicodeDecodeError exceptions as they've always done.
2727

2828
- Bug #1536021: __hash__ may now return long int; the final hash
2929
value is obtained by invoking hash on the long int.
@@ -99,6 +99,8 @@ Build
9999
C API
100100
-----
101101

102+
- New API for Unicode rich comparisons: PyUnicode_RichCompare()
103+
102104
- Bug #1069160. Internal correctness changes were made to
103105
``PyThreadState_SetAsyncExc()``. A test case was added, and
104106
the documentation was changed to state that the return value

Objects/exceptions.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,6 +1948,14 @@ SimpleExtendsException(PyExc_Warning, ImportWarning,
19481948
"Base class for warnings about probable mistakes in module imports");
19491949

19501950

1951+
/*
1952+
* UnicodeWarning extends Warning
1953+
*/
1954+
SimpleExtendsException(PyExc_Warning, UnicodeWarning,
1955+
"Base class for warnings about Unicode related problems, mostly\n"
1956+
"related to conversion problems.");
1957+
1958+
19511959
/* Pre-computed MemoryError instance. Best to create this as early as
19521960
* possible and not wait until a MemoryError is actually raised!
19531961
*/
@@ -2048,6 +2056,7 @@ _PyExc_Init(void)
20482056
PRE_INIT(RuntimeWarning)
20492057
PRE_INIT(FutureWarning)
20502058
PRE_INIT(ImportWarning)
2059+
PRE_INIT(UnicodeWarning)
20512060

20522061
m = Py_InitModule4("exceptions", functions, exceptions_doc,
20532062
(PyObject *)NULL, PYTHON_API_VERSION);
@@ -2113,6 +2122,7 @@ _PyExc_Init(void)
21132122
POST_INIT(RuntimeWarning)
21142123
POST_INIT(FutureWarning)
21152124
POST_INIT(ImportWarning)
2125+
POST_INIT(UnicodeWarning)
21162126

21172127
PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
21182128
if (!PyExc_MemoryErrorInst)

Objects/object.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -731,23 +731,6 @@ default_3way_compare(PyObject *v, PyObject *w)
731731
return (vv < ww) ? -1 : (vv > ww) ? 1 : 0;
732732
}
733733

734-
#ifdef Py_USING_UNICODE
735-
/* Special case for Unicode */
736-
if (PyUnicode_Check(v) || PyUnicode_Check(w)) {
737-
c = PyUnicode_Compare(v, w);
738-
if (!PyErr_Occurred())
739-
return c;
740-
/* TypeErrors are ignored: if Unicode coercion fails due
741-
to one of the arguments not having the right type, we
742-
continue as defined by the coercion protocol (see
743-
above). Luckily, decoding errors are reported as
744-
ValueErrors and are not masked by this technique. */
745-
if (!PyErr_ExceptionMatches(PyExc_TypeError))
746-
return -2;
747-
PyErr_Clear();
748-
}
749-
#endif
750-
751734
/* None is smaller than anything */
752735
if (v == Py_None)
753736
return -1;

0 commit comments

Comments
 (0)