Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,15 @@ expression support in the :mod:`re` module).
from the "Alphabetic" property defined in the Unicode Standard.


.. method:: str.isascii()

Return true if the string is empty or all characters in the string are ASCII,
false otherwise.
ASCII characters have code points in the range U+0000-U+007F.

.. versionadded:: 3.7


.. method:: str.isdecimal()

Return true if all characters in the string are decimal
Expand Down Expand Up @@ -2941,6 +2950,16 @@ place, and instead produce new objects.
False


.. method:: bytes.isascii()
bytearray.isascii()

Return true if the sequence is empty or all bytes in the sequence are ASCII,
false otherwise.
ASCII bytes are in the range 0-0x7F.

.. versionadded:: 3.7


.. method:: bytes.isdigit()
bytearray.isdigit()

Expand Down
2 changes: 2 additions & 0 deletions Include/bytes_methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len);
extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len);
extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len);
extern PyObject* _Py_bytes_isascii(const char *cptr, Py_ssize_t len);
extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len);
extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len);
extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len);
Expand Down Expand Up @@ -37,6 +38,7 @@ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to);
extern const char _Py_isspace__doc__[];
extern const char _Py_isalpha__doc__[];
extern const char _Py_isalnum__doc__[];
extern const char _Py_isascii__doc__[];
extern const char _Py_isdigit__doc__[];
extern const char _Py_islower__doc__[];
extern const char _Py_isupper__doc__[];
Expand Down
1 change: 1 addition & 0 deletions Lib/collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,7 @@ def index(self, sub, start=0, end=_sys.maxsize):
return self.data.index(sub, start, end)
def isalpha(self): return self.data.isalpha()
def isalnum(self): return self.data.isalnum()
def isascii(self): return self.data.isascii()
def isdecimal(self): return self.data.isdecimal()
def isdigit(self): return self.data.isdigit()
def isidentifier(self): return self.data.isidentifier()
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/string_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,14 @@ def test_isalnum(self):
self.checkequal(False, 'abc\n', 'isalnum')
self.checkraises(TypeError, 'abc', 'isalnum', 42)

def test_isascii(self):
self.checkequal(True, '', 'isascii')
self.checkequal(True, '\x00', 'isascii')
self.checkequal(True, '\x7f', 'isascii')
self.checkequal(True, '\x00\x7f', 'isascii')
self.checkequal(False, '\x80', 'isascii')
self.checkequal(False, '\xe9', 'isascii')

def test_isdigit(self):
self.checkequal(False, '', 'isdigit')
self.checkequal(False, 'a', 'isdigit')
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_doctest.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def non_Python_modules(): r"""

>>> import builtins
>>> tests = doctest.DocTestFinder().find(builtins)
>>> 790 < len(tests) < 810 # approximate number of objects with docstrings
>>> 800 < len(tests) < 820 # approximate number of objects with docstrings
True
>>> real_tests = [t for t in tests if len(t.examples) > 0]
>>> len(real_tests) # objects that actually have doctests
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,11 @@ def test_isalpha(self):
self.assertFalse('\U0001F40D'.isalpha())
self.assertFalse('\U0001F46F'.isalpha())

def test_isascii(self):
super().test_isascii()
self.assertFalse("\u20ac".isascii())
self.assertFalse("\U0010ffff".isascii())

def test_isdecimal(self):
self.checkequalnofix(False, '', 'isdecimal')
self.checkequalnofix(False, 'a', 'isdecimal')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add ``.isascii()`` method to ``str``, ``bytes`` and ``bytearray``.
It can be used to test that string contains only ASCII characters.
2 changes: 2 additions & 0 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2159,6 +2159,8 @@ bytearray_methods[] = {
_Py_isalnum__doc__},
{"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
_Py_isalpha__doc__},
{"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
_Py_isascii__doc__},
{"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
_Py_isdigit__doc__},
{"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
Expand Down
20 changes: 20 additions & 0 deletions Objects/bytes_methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,26 @@ _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
}


PyDoc_STRVAR_shared(_Py_isascii__doc__,
"B.isascii() -> bool\n\
\n\
Return True if B is empty or all characters in B are ASCII,\n\
False otherwise.");

PyObject*
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
{
const unsigned char *p = (unsigned char *) cptr;
const unsigned char *e = p + len;
for (; p < e; p++) {
if (*p >= 128) {
Py_RETURN_FALSE;
}
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to optimize this function, I suggest you to look at ascii_decode() of Objects/unicodeobject.c which is heavily optimized to scan ASCII characters in a uint8_t* string. It works on "unsigned long" words rather than working on bytes.

But it should be done in a second PR. Right now, I would prefer to push this PR before 3.7b1 (monday).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree.

Py_RETURN_TRUE;
}


PyDoc_STRVAR_shared(_Py_isdigit__doc__,
"B.isdigit() -> bool\n\
\n\
Expand Down
2 changes: 2 additions & 0 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2459,6 +2459,8 @@ bytes_methods[] = {
_Py_isalnum__doc__},
{"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
_Py_isalpha__doc__},
{"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
_Py_isascii__doc__},
{"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
_Py_isdigit__doc__},
{"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
Expand Down
23 changes: 22 additions & 1 deletion Objects/clinic/unicodeobject.c.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,27 @@ unicode_expandtabs(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyOb
return return_value;
}

PyDoc_STRVAR(unicode_isascii__doc__,
"isascii($self, /)\n"
"--\n"
"\n"
"Return True if all characters in the string are ASCII, False otherwise.\n"
"\n"
"ASCII characters have code points in the range U+0000-U+007F.\n"
"Empty string is ASCII too.");

#define UNICODE_ISASCII_METHODDEF \
{"isascii", (PyCFunction)unicode_isascii, METH_NOARGS, unicode_isascii__doc__},

static PyObject *
unicode_isascii_impl(PyObject *self);

static PyObject *
unicode_isascii(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return unicode_isascii_impl(self);
}

PyDoc_STRVAR(unicode_islower__doc__,
"islower($self, /)\n"
"--\n"
Expand Down Expand Up @@ -930,4 +951,4 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return unicode_sizeof_impl(self);
}
/*[clinic end generated code: output=1ad4e81b68194264 input=a9049054013a1b77]*/
/*[clinic end generated code: output=561c88c912b8fe3b input=a9049054013a1b77]*/
6 changes: 6 additions & 0 deletions Objects/stringlib/ctype.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ stringlib_isalnum(PyObject *self)
return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}

static PyObject*
stringlib_isascii(PyObject *self)
{
return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}

static PyObject*
stringlib_isdigit(PyObject *self)
{
Expand Down
20 changes: 20 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11611,6 +11611,25 @@ unicode_index(PyObject *self, PyObject *args)
return PyLong_FromSsize_t(result);
}

/*[clinic input]
str.isascii as unicode_isascii

Return True if all characters in the string are ASCII, False otherwise.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick, maybe copy from the doc: "Return true if the string is empty or all characters in the string are ASCII," rather than "Empty string is ASCII too." below.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Return true if the string is empty or all characters in the string are ASCII, False otherwise." overs 80 columns.
And clinic show error when I wrap the line.

All other docstrings in unicodeobject has short (<80) summaries.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wow, that's a nasty issue. Ignore my comment and leave the docstring as it is ;-)


ASCII characters have code points in the range U+0000-U+007F.
Empty string is ASCII too.
[clinic start generated code]*/

static PyObject *
unicode_isascii_impl(PyObject *self)
/*[clinic end generated code: output=c5910d64b5a8003f input=5a43cbc6399621d5]*/
{
if (PyUnicode_READY(self) == -1) {
return NULL;
}
return PyBool_FromLong(PyUnicode_IS_ASCII(self));
}

/*[clinic input]
str.islower as unicode_islower

Expand Down Expand Up @@ -13801,6 +13820,7 @@ static PyMethodDef unicode_methods[] = {
UNICODE_UPPER_METHODDEF
{"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
{"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
UNICODE_ISASCII_METHODDEF
UNICODE_ISLOWER_METHODDEF
UNICODE_ISUPPER_METHODDEF
UNICODE_ISTITLE_METHODDEF
Expand Down