Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,13 @@ def test_isidentifier(self):
self.assertFalse("©".isidentifier())
self.assertFalse("0".isidentifier())

@support.cpython_only
def test_isidentifier_legacy(self):
import _testcapi
u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
self.assertTrue(u.isidentifier())
self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier())

def test_isprintable(self):
self.assertTrue("".isprintable())
self.assertTrue(" ".isprintable())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixed :meth:`str.isidentifier` for non-canonicalized strings containing
non-BMP characters on Windows.
20 changes: 17 additions & 3 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -12356,20 +12356,34 @@ PyUnicode_IsIdentifier(PyObject *self)
return len && i == len;
}
else {
Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
if (len == 0) {
/* an empty string is not a valid identifier */
return 0;
}

const wchar_t *wstr = _PyUnicode_WSTR(self);
Py_UCS4 ch = wstr[0];
Py_UCS4 ch = wstr[i++];
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
&& i < len
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
{
ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
i++;
}
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
}

for (i = 1; i < len; i++) {
for (; i < len; i++) {
ch = wstr[i];
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
&& i < len
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
{
ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
i++;
}
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
}
Expand Down