Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Address review comments.
  • Loading branch information
serhiy-storchaka committed Dec 3, 2022
commit 1c1775c667edeeebe3c0ee46d030e9759e3c9438
26 changes: 11 additions & 15 deletions Lib/test/test_capi/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,14 @@ class Str(str):


class CAPITest(unittest.TestCase):
# TODO: Test the following function:
#
# PyUnicode_ClearFreeList

@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
def test_new(self):
"""Test PyUnicode_New()"""
from _testcapi import unicode_new as new

for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600:
# XXX assertIs?
for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600, 0x10ffff:
self.assertEqual(new(0, maxchar), '')
self.assertEqual(new(5, maxchar), chr(maxchar)*5)
self.assertEqual(new(0, 0x110000), '')
Comment thread
vstinner marked this conversation as resolved.
Expand All @@ -41,6 +37,7 @@ def test_fill(self):
from _testcapi import unicode_fill as fill

strings = [
# all strings have exactly 5 characters
'abcde', '\xa1\xa2\xa3\xa4\xa5',
'\u4f60\u597d\u4e16\u754c\uff01',
'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
Expand Down Expand Up @@ -70,7 +67,7 @@ def test_fill(self):
self.assertRaises(SystemError, fill, [], 0, 0, 0x78)
# CRASHES fill(s, 0, NULL, 0, 0)
# CRASHES fill(NULL, 0, 0, 0x78)
# TODO: Test PyUnicode_CopyCharacters() with non-modifiable unicode.
# TODO: Test PyUnicode_Fill() with non-modifiable unicode.

@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand All @@ -79,9 +76,11 @@ def test_writechar(self):
from _testcapi import unicode_writechar as writechar

strings = [
# one string for every kind
'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
'\U0001f600\U0001f601\U0001f602'
]
# one character for every kind + out of range code
chars = [0x78, 0xa9, 0x20ac, 0x1f638, 0x110000]
for i, s in enumerate(strings):
for j, c in enumerate(chars):
Expand All @@ -106,6 +105,7 @@ def test_resize(self):
from _testcapi import unicode_resize as resize

strings = [
Comment thread
vstinner marked this conversation as resolved.
# all strings have exactly 3 characters
'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
'\U0001f600\U0001f601\U0001f602'
]
Expand Down Expand Up @@ -133,7 +133,8 @@ def test_append(self):
for left in strings:
left = left[::-1]
for right in strings:
self.assertEqual(append(left, right), left + right)
expected = left + right
self.assertEqual(append(left, right), expected)

self.assertRaises(SystemError, append, 'abc', b'abc')
self.assertRaises(SystemError, append, b'abc', 'abc')
Expand Down Expand Up @@ -608,15 +609,9 @@ def test_fromwidechar(self):
from _testcapi import SIZEOF_WCHAR_T

if SIZEOF_WCHAR_T == 2:
if sys.byteorder == 'little':
encoding = 'utf-16le'
elif sys.byteorder == 'little':
encoding = 'utf-16be'
encoding = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
elif SIZEOF_WCHAR_T == 4:
if sys.byteorder == 'little':
encoding = 'utf-32le'
elif sys.byteorder == 'little':
encoding = 'utf-32be'
encoding = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'

for s in '', 'abc', '\xa1\xa2', '\u4f60', '\U0001f600':
b = s.encode(encoding)
Expand Down Expand Up @@ -1289,6 +1284,7 @@ def test_copycharacters(self):
from _testcapi import unicode_copycharacters

strings = [
# all strings have exactly 5 characters
'abcde', '\xa1\xa2\xa3\xa4\xa5',
'\u4f60\u597d\u4e16\u754c\uff01',
'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
Expand Down
45 changes: 27 additions & 18 deletions Modules/_testcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,6 @@ unicode_append(PyObject *self, PyObject *args)
return NULL;
}
PyUnicode_Append(&left_copy, right);
if (PyErr_Occurred()) {
Py_XDECREF(left_copy);
}
return left_copy;
}

Expand All @@ -272,31 +269,33 @@ unicode_appendanddel(PyObject *self, PyObject *args)
}
Py_XINCREF(right);
PyUnicode_AppendAndDel(&left_copy, right);
if (PyErr_Occurred()) {
Py_XDECREF(left_copy);
}
return left_copy;
}

/* Test PyUnicode_FromStringAndSize() */
static PyObject *
unicode_fromstringandsize(PyObject *self, PyObject *args)
{
const char *s = NULL;
const char *s;
Py_ssize_t bsize;
Py_ssize_t size = -100;

if (!PyArg_ParseTuple(args, "z#|n", &s, &size, &size)) {
if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) {
return NULL;
}

if (size == -100) {
size = bsize;
}
return PyUnicode_FromStringAndSize(s, size);
}

/* Test PyUnicode_FromString() */
static PyObject *
unicode_fromstring(PyObject *self, PyObject *arg)
{
const char *s = NULL;
Py_ssize_t size = -100;
const char *s;
Py_ssize_t size;

if (!PyArg_Parse(arg, "z#", &s, &size)) {
return NULL;
Expand All @@ -310,14 +309,19 @@ unicode_fromkindanddata(PyObject *self, PyObject *args)
{
int kind;
void *buffer;
Py_ssize_t bsize;
Py_ssize_t size = -100;

if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &size, &size)) {
if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &bsize, &size)) {
return NULL;
}

if (size == -100) {
size = bsize;
}
if (kind && size % kind) {
PyErr_SetString(PyExc_SystemError, "invalid size in unicode_fromkindanddata()");
PyErr_SetString(PyExc_AssertionError,
"invalid size in unicode_fromkindanddata()");
return NULL;
}
return PyUnicode_FromKindAndData(kind, buffer, kind ? size / kind : 0);
Expand Down Expand Up @@ -392,8 +396,8 @@ unicode_interninplace(PyObject *self, PyObject *arg)
static PyObject *
unicode_internfromstring(PyObject *self, PyObject *arg)
{
const char *s = NULL;
Py_ssize_t size = -100;
const char *s;
Py_ssize_t size;

if (!PyArg_Parse(arg, "z#", &s, &size)) {
return NULL;
Expand All @@ -405,14 +409,19 @@ unicode_internfromstring(PyObject *self, PyObject *arg)
static PyObject *
unicode_fromwidechar(PyObject *self, PyObject *args)
{
const char *s = NULL;
Py_ssize_t bsize = -100;
const char *s;
Py_ssize_t bsize;
Py_ssize_t size = -100;

if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) {
return NULL;
}
if (size == -100) {
if (bsize % SIZEOF_WCHAR_T) {
PyErr_SetString(PyExc_AssertionError,
"invalid size in unicode_fromwidechar()");
return NULL;
}
size = bsize / SIZEOF_WCHAR_T;
Comment thread
vstinner marked this conversation as resolved.
}
return PyUnicode_FromWideChar((const wchar_t *)s, size);
Expand Down Expand Up @@ -536,12 +545,12 @@ unicode_asucs4(PyObject *self, PyObject *args)
buffer[str_len] = 0xffffU;

if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
PyMem_FREE(buffer);
PyMem_Free(buffer);
return NULL;
}

result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
PyMem_FREE(buffer);
PyMem_Free(buffer);
return result;
}

Expand Down