Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Create ``any_unicode_count`` private helper for
Comment thread
sobolevn marked this conversation as resolved.
Outdated
both ``PyUnicode_Count`` and ``unicode_count`` in
``unicodeobject.c``.
80 changes: 23 additions & 57 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -8964,21 +8964,19 @@ _PyUnicode_InsertThousandsGrouping(
return count;
}


Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
static Py_ssize_t
any_unicode_count(PyObject *str,
Comment thread
sobolevn marked this conversation as resolved.
Outdated
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
{
// You must ensure that `str` and `substr` are both unicode objects
Comment thread
sobolevn marked this conversation as resolved.
Outdated
// before calling this function.
Py_ssize_t result;
int kind1, kind2;
const void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2;

if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;

kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(substr);
if (kind1 < kind2)
Expand Down Expand Up @@ -9039,6 +9037,18 @@ PyUnicode_Count(PyObject *str,
return -1;
}

Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
{
if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;

return any_unicode_count(str, substr, start, end);
}

Py_ssize_t
PyUnicode_Find(PyObject *str,
PyObject *substr,
Expand Down Expand Up @@ -10858,60 +10868,16 @@ unicode_count(PyObject *self, PyObject *args)
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
int kind1, kind2;
const void *buf1, *buf2;
Py_ssize_t len1, len2, iresult;
Py_ssize_t iresult;

if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
return NULL;

kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
if (kind1 < kind2)
return PyLong_FromLong(0);

len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
ADJUST_INDICES(start, end, len1);
if (end - start < len2)
return PyLong_FromLong(0);

buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
iresult = ucs1lib_count(
((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_2BYTE_KIND:
iresult = ucs2lib_count(
((const Py_UCS2*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_4BYTE_KIND:
iresult = ucs4lib_count(
((const Py_UCS4*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
default:
Py_UNREACHABLE();
}
iresult = any_unicode_count(self, substring, start, end);
if (iresult == -1)
return NULL;

result = PyLong_FromSsize_t(iresult);
Comment thread
sobolevn marked this conversation as resolved.
Outdated

assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
if (kind2 != kind1)
PyMem_Free((void *)buf2);

return result;
}

Expand Down