Address review comments.

python · vstinner · May 4, 2023 · Aug 13, 2018 · Dec 3, 2022 · Dec 3, 2022
commit 1c1775c667edeeebe3c0ee46d030e9759e3c9438
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
@@ -16,18 +16,14 @@ class Str(str):
 
 
 class CAPITest(unittest.TestCase):
-    # TODO: Test the following function:
-    #
-    #   PyUnicode_ClearFreeList
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_new(self):
         """Test PyUnicode_New()"""
         from _testcapi import unicode_new as new
 
-        for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600:
-            # XXX assertIs?
+        for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600, 0x10ffff:
             self.assertEqual(new(0, maxchar), '')
             self.assertEqual(new(5, maxchar), chr(maxchar)*5)
         self.assertEqual(new(0, 0x110000), '')
@@ -41,6 +37,7 @@ def test_fill(self):
         from _testcapi import unicode_fill as fill
 
         strings = [
+            # all strings have exactly 5 characters
             'abcde', '\xa1\xa2\xa3\xa4\xa5',
             '\u4f60\u597d\u4e16\u754c\uff01',
             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
@@ -70,7 +67,7 @@ def test_fill(self):
         self.assertRaises(SystemError, fill, [], 0, 0, 0x78)
         # CRASHES fill(s, 0, NULL, 0, 0)
         # CRASHES fill(NULL, 0, 0, 0x78)
-        # TODO: Test PyUnicode_CopyCharacters() with non-modifiable unicode.
+        # TODO: Test PyUnicode_Fill() with non-modifiable unicode.
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -79,9 +76,11 @@ def test_writechar(self):
         from _testcapi import unicode_writechar as writechar
 
         strings = [
+            # one string for every kind
             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
             '\U0001f600\U0001f601\U0001f602'
         ]
+        # one character for every kind + out of range code
         chars = [0x78, 0xa9, 0x20ac, 0x1f638, 0x110000]
         for i, s in enumerate(strings):
             for j, c in enumerate(chars):
@@ -106,6 +105,7 @@ def test_resize(self):
         from _testcapi import unicode_resize as resize
 
         strings = [
+            # all strings have exactly 3 characters
             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
             '\U0001f600\U0001f601\U0001f602'
         ]
@@ -133,7 +133,8 @@ def test_append(self):
         for left in strings:
             left = left[::-1]
             for right in strings:
-                self.assertEqual(append(left, right), left + right)
+                expected = left + right
+                self.assertEqual(append(left, right), expected)
 
         self.assertRaises(SystemError, append, 'abc', b'abc')
         self.assertRaises(SystemError, append, b'abc', 'abc')
@@ -608,15 +609,9 @@ def test_fromwidechar(self):
         from _testcapi import SIZEOF_WCHAR_T
 
         if SIZEOF_WCHAR_T == 2:
-            if sys.byteorder == 'little':
-                encoding = 'utf-16le'
-            elif sys.byteorder == 'little':
-                encoding = 'utf-16be'
+            encoding = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
         elif SIZEOF_WCHAR_T == 4:
-            if sys.byteorder == 'little':
-                encoding = 'utf-32le'
-            elif sys.byteorder == 'little':
-                encoding = 'utf-32be'
+            encoding = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
 
         for s in '', 'abc', '\xa1\xa2', '\u4f60', '\U0001f600':
             b = s.encode(encoding)
@@ -1289,6 +1284,7 @@ def test_copycharacters(self):
         from _testcapi import unicode_copycharacters
 
         strings = [
+            # all strings have exactly 5 characters
             'abcde', '\xa1\xa2\xa3\xa4\xa5',
             '\u4f60\u597d\u4e16\u754c\uff01',
             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'

diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
@@ -250,9 +250,6 @@ unicode_append(PyObject *self, PyObject *args)
         return NULL;
     }
     PyUnicode_Append(&left_copy, right);
-    if (PyErr_Occurred()) {
-        Py_XDECREF(left_copy);
-    }
     return left_copy;
 }
 
@@ -272,31 +269,33 @@ unicode_appendanddel(PyObject *self, PyObject *args)
     }
     Py_XINCREF(right);
     PyUnicode_AppendAndDel(&left_copy, right);
-    if (PyErr_Occurred()) {
-        Py_XDECREF(left_copy);
-    }
     return left_copy;
 }
 
 /* Test PyUnicode_FromStringAndSize() */
 static PyObject *
 unicode_fromstringandsize(PyObject *self, PyObject *args)
 {
-    const char *s = NULL;
+    const char *s;
+    Py_ssize_t bsize;
     Py_ssize_t size = -100;
 
-    if (!PyArg_ParseTuple(args, "z#|n", &s, &size, &size)) {
+    if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) {
         return NULL;
     }
+
+    if (size == -100) {
+        size = bsize;
+    }
     return PyUnicode_FromStringAndSize(s, size);
 }
 
 /* Test PyUnicode_FromString() */
 static PyObject *
 unicode_fromstring(PyObject *self, PyObject *arg)
 {
-    const char *s = NULL;
-    Py_ssize_t size = -100;
+    const char *s;
+    Py_ssize_t size;
 
     if (!PyArg_Parse(arg, "z#", &s, &size)) {
         return NULL;
@@ -310,14 +309,19 @@ unicode_fromkindanddata(PyObject *self, PyObject *args)
 {
     int kind;
     void *buffer;
+    Py_ssize_t bsize;
     Py_ssize_t size = -100;
 
-    if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &size, &size)) {
+    if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &bsize, &size)) {
         return NULL;
     }
 
+    if (size == -100) {
+        size = bsize;
+    }
     if (kind && size % kind) {
-        PyErr_SetString(PyExc_SystemError, "invalid size in unicode_fromkindanddata()");
+        PyErr_SetString(PyExc_AssertionError,
+                        "invalid size in unicode_fromkindanddata()");
         return NULL;
     }
     return PyUnicode_FromKindAndData(kind, buffer, kind ? size / kind : 0);
@@ -392,8 +396,8 @@ unicode_interninplace(PyObject *self, PyObject *arg)
 static PyObject *
 unicode_internfromstring(PyObject *self, PyObject *arg)
 {
-    const char *s = NULL;
-    Py_ssize_t size = -100;
+    const char *s;
+    Py_ssize_t size;
 
     if (!PyArg_Parse(arg, "z#", &s, &size)) {
         return NULL;
@@ -405,14 +409,19 @@ unicode_internfromstring(PyObject *self, PyObject *arg)
 static PyObject *
 unicode_fromwidechar(PyObject *self, PyObject *args)
 {
-    const char *s = NULL;
-    Py_ssize_t bsize = -100;
+    const char *s;
+    Py_ssize_t bsize;
     Py_ssize_t size = -100;
 
     if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) {
         return NULL;
     }
     if (size == -100) {
+        if (bsize % SIZEOF_WCHAR_T) {
+            PyErr_SetString(PyExc_AssertionError,
+                            "invalid size in unicode_fromwidechar()");
+            return NULL;
+        }
         size = bsize / SIZEOF_WCHAR_T;
     }
     return PyUnicode_FromWideChar((const wchar_t *)s, size);
@@ -536,12 +545,12 @@ unicode_asucs4(PyObject *self, PyObject *args)
     buffer[str_len] = 0xffffU;
 
     if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
-        PyMem_FREE(buffer);
+        PyMem_Free(buffer);
         return NULL;
     }
 
     result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
-    PyMem_FREE(buffer);
+    PyMem_Free(buffer);
     return result;
 }