Skip to content

Commit 38337d1

Browse files
committed
Issue #24000: Improved Argument Clinic's mapping of converters to legacy
"format units". Updated the documentation to match.
1 parent 95283fb commit 38337d1

File tree

7 files changed

+119
-98
lines changed

7 files changed

+119
-98
lines changed

Doc/howto/clinic.rst

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,14 @@ All Argument Clinic converters accept the following arguments:
758758
In addition, some converters accept additional arguments. Here is a list
759759
of these arguments, along with their meanings:
760760

761+
``accept``
762+
A set of Python types (and possibly pseudo-types);
763+
this restricts the allowable Python argument to values of these types.
764+
(This is not a general-purpose facility; as a rule it only supports
765+
specific lists of types as shown in the legacy converter table.)
766+
767+
To accept ``None``, add ``NoneType`` to this set.
768+
761769
``bitwise``
762770
Only supported for unsigned integers. The native integer value of this
763771
Python argument will be written to the parameter without any range checking,
@@ -772,39 +780,27 @@ of these arguments, along with their meanings:
772780
Only supported for strings. Specifies the encoding to use when converting
773781
this string from a Python str (Unicode) value into a C ``char *`` value.
774782

775-
``length``
776-
Only supported for strings. If true, requests that the length of the
777-
string be passed in to the impl function, just after the string parameter,
778-
in a parameter named ``<parameter_name>_length``.
779-
780-
``nullable``
781-
Only supported for strings. If true, this parameter may also be set to
782-
``None``, in which case the C parameter will be set to ``NULL``.
783783

784784
``subclass_of``
785785
Only supported for the ``object`` converter. Requires that the Python
786786
value be a subclass of a Python type, as expressed in C.
787787

788-
``types``
789-
Only supported for the ``object`` (and ``self``) converter. Specifies
788+
``type``
789+
Only supported for the ``object`` and ``self`` converters. Specifies
790790
the C type that will be used to declare the variable. Default value is
791791
``"PyObject *"``.
792792

793-
``types``
794-
A string containing a list of Python types (and possibly pseudo-types);
795-
this restricts the allowable Python argument to values of these types.
796-
(This is not a general-purpose facility; as a rule it only supports
797-
specific lists of types as shown in the legacy converter table.)
798-
799793
``zeroes``
800794
Only supported for strings. If true, embedded NUL bytes (``'\\0'``) are
801-
permitted inside the value.
795+
permitted inside the value. The length of the string will be passed in
796+
to the impl function, just after the string parameter, as a parameter named
797+
``<parameter_name>_length``.
802798

803799
Please note, not every possible combination of arguments will work.
804-
Often these arguments are implemented internally by specific ``PyArg_ParseTuple``
800+
Usually these arguments are implemented by specific ``PyArg_ParseTuple``
805801
*format units*, with specific behavior. For example, currently you cannot
806-
call ``str`` and pass in ``zeroes=True`` without also specifying an ``encoding``;
807-
although it's perfectly reasonable to think this would work, these semantics don't
802+
call ``unsigned_short`` without also specifying ``bitwise=True``.
803+
Although it's perfectly reasonable to think this would work, these semantics don't
808804
map to any existing format unit. So Argument Clinic doesn't support it. (Or, at
809805
least, not yet.)
810806

@@ -816,13 +812,13 @@ on the right is the text you'd replace it with.
816812
``'B'`` ``unsigned_char(bitwise=True)``
817813
``'b'`` ``unsigned_char``
818814
``'c'`` ``char``
819-
``'C'`` ``int(types='str')``
815+
``'C'`` ``int(accept={str})``
820816
``'d'`` ``double``
821817
``'D'`` ``Py_complex``
822-
``'es#'`` ``str(encoding='name_of_encoding', length=True, zeroes=True)``
823818
``'es'`` ``str(encoding='name_of_encoding')``
824-
``'et#'`` ``str(encoding='name_of_encoding', types='bytes bytearray str', length=True)``
825-
``'et'`` ``str(encoding='name_of_encoding', types='bytes bytearray str')``
819+
``'es#'`` ``str(encoding='name_of_encoding', zeroes=True)``
820+
``'et'`` ``str(encoding='name_of_encoding', accept={bytes, bytearray, str})``
821+
``'et#'`` ``str(encoding='name_of_encoding', accept={bytes, bytearray, str}, zeroes=True)``
826822
``'f'`` ``float``
827823
``'h'`` ``short``
828824
``'H'`` ``unsigned_short(bitwise=True)``
@@ -832,27 +828,27 @@ on the right is the text you'd replace it with.
832828
``'K'`` ``unsigned_PY_LONG_LONG(bitwise=True)``
833829
``'L'`` ``PY_LONG_LONG``
834830
``'n'`` ``Py_ssize_t``
831+
``'O'`` ``object``
835832
``'O!'`` ``object(subclass_of='&PySomething_Type')``
836833
``'O&'`` ``object(converter='name_of_c_function')``
837-
``'O'`` ``object``
838834
``'p'`` ``bool``
839-
``'s#'`` ``str(length=True)``
840835
``'S'`` ``PyBytesObject``
841836
``'s'`` ``str``
842-
``'s*'`` ``Py_buffer(types='str bytes bytearray buffer')``
843-
``'u#'`` ``Py_UNICODE(length=True)``
844-
``'u'`` ``Py_UNICODE``
837+
``'s#'`` ``str(zeroes=True)``
838+
``'s*'`` ``Py_buffer(accept={buffer, str})``
845839
``'U'`` ``unicode``
846-
``'w*'`` ``Py_buffer(types='bytearray rwbuffer')``
847-
``'y#'`` ``str(types='bytes', length=True)``
840+
``'u'`` ``Py_UNICODE``
841+
``'u#'`` ``Py_UNICODE(zeroes=True)``
842+
``'w*'`` ``Py_buffer(accept={rwbuffer})``
848843
``'Y'`` ``PyByteArrayObject``
849-
``'y'`` ``str(types='bytes')``
844+
``'y'`` ``str(accept={bytes})``
845+
``'y#'`` ``str(accept={robuffer}, zeroes=True)``
850846
``'y*'`` ``Py_buffer``
851-
``'Z#'`` ``Py_UNICODE(nullable=True, length=True)``
852-
``'z#'`` ``str(nullable=True, length=True)``
853-
``'Z'`` ``Py_UNICODE(nullable=True)``
854-
``'z'`` ``str(nullable=True)``
855-
``'z*'`` ``Py_buffer(types='str bytes bytearray buffer', nullable=True)``
847+
``'Z'`` ``Py_UNICODE(accept={str, NoneType})``
848+
``'Z#'`` ``Py_UNICODE(accept={str, NoneType}, zeroes=True)``
849+
``'z'`` ``str(accept={str, NoneType})``
850+
``'z#'`` ``str(accept={str, NoneType}, zeroes=True)``
851+
``'z*'`` ``Py_buffer(accept={buffer, str, NoneType})``
856852
========= =================================================================================
857853

858854
As an example, here's our sample ``pickle.Pickler.dump`` using the proper

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ Documentation
8282
Tools/Demos
8383
-----------
8484

85+
- Issue #24000: Improved Argument Clinic's mapping of converters to legacy
86+
"format units". Updated the documentation to match.
87+
8588
- Issue #24001: Argument Clinic converters now use accept={type}
8689
instead of types={'type'} to specify the types the converter accepts.
8790

Modules/_dbmmodule.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ static PySequenceMethods dbm_as_sequence = {
274274
/*[clinic input]
275275
_dbm.dbm.get
276276
277-
key: str(accept={str, robuffer}, length=True)
277+
key: str(accept={str, robuffer}, zeroes=True)
278278
default: object(c_default="NULL") = b''
279279
/
280280
@@ -284,7 +284,8 @@ Return the value for key if present, otherwise default.
284284
static PyObject *
285285
_dbm_dbm_get_impl(dbmobject *self, const char *key,
286286
Py_ssize_clean_t key_length, PyObject *default_value)
287-
/*[clinic end generated code: output=b44f95eba8203d93 input=3c7c1afd9c508457]*/
287+
/*[clinic end generated code: output=b44f95eba8203d93 input=a3a279957f85eb6d]*/
288+
/*[clinic end generated code: output=4f5c0e523eaf1251 input=9402c0af8582dc69]*/
288289
{
289290
datum dbm_key, val;
290291

@@ -301,7 +302,7 @@ _dbm_dbm_get_impl(dbmobject *self, const char *key,
301302

302303
/*[clinic input]
303304
_dbm.dbm.setdefault
304-
key: str(accept={str, robuffer}, length=True)
305+
key: str(accept={str, robuffer}, zeroes=True)
305306
default: object(c_default="NULL") = b''
306307
/
307308
@@ -314,7 +315,7 @@ static PyObject *
314315
_dbm_dbm_setdefault_impl(dbmobject *self, const char *key,
315316
Py_ssize_clean_t key_length,
316317
PyObject *default_value)
317-
/*[clinic end generated code: output=52545886cf272161 input=a66fcb7f18ee2f50]*/
318+
/*[clinic end generated code: output=52545886cf272161 input=bf40c48edaca01d6]*/
318319
{
319320
datum dbm_key, val;
320321
Py_ssize_t tmp_size;

Modules/_gdbmmodule.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ _gdbm_gdbm_firstkey_impl(dbmobject *self)
383383
/*[clinic input]
384384
_gdbm.gdbm.nextkey
385385
386-
key: str(accept={str, robuffer}, length=True)
386+
key: str(accept={str, robuffer}, zeroes=True)
387387
/
388388
389389
Returns the key that follows key in the traversal.
@@ -400,7 +400,7 @@ to create a list in memory that contains them all:
400400
static PyObject *
401401
_gdbm_gdbm_nextkey_impl(dbmobject *self, const char *key,
402402
Py_ssize_clean_t key_length)
403-
/*[clinic end generated code: output=192ab892de6eb2f6 input=1eb2ff9b4b0e6ffd]*/
403+
/*[clinic end generated code: output=192ab892de6eb2f6 input=1f1606943614e36f]*/
404404
{
405405
PyObject *v;
406406
datum dbm_key, nextkey;

Modules/arraymodule.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,7 +1673,7 @@ array_array_tostring_impl(arrayobject *self)
16731673
/*[clinic input]
16741674
array.array.fromunicode
16751675
1676-
ustr: Py_UNICODE(length=True)
1676+
ustr: Py_UNICODE(zeroes=True)
16771677
/
16781678
16791679
Extends this array with data from the unicode string ustr.
@@ -1686,7 +1686,7 @@ some other type.
16861686
static PyObject *
16871687
array_array_fromunicode_impl(arrayobject *self, Py_UNICODE *ustr,
16881688
Py_ssize_clean_t ustr_length)
1689-
/*[clinic end generated code: output=ebb72fc16975e06d input=56bcedb5ef70139f]*/
1689+
/*[clinic end generated code: output=ebb72fc16975e06d input=150f00566ffbca6e]*/
16901690
{
16911691
char typecode;
16921692

Modules/unicodedata.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,7 @@ unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
12151215
unicodedata.UCD.lookup
12161216
12171217
self: self
1218-
name: str(accept={str, robuffer}, length=True)
1218+
name: str(accept={str, robuffer}, zeroes=True)
12191219
/
12201220
12211221
Look up character by name.
@@ -1227,7 +1227,7 @@ corresponding character. If not found, KeyError is raised.
12271227
static PyObject *
12281228
unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
12291229
Py_ssize_clean_t name_length)
1230-
/*[clinic end generated code: output=765cb8186788e6be input=2dfe682c2491447a]*/
1230+
/*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/
12311231
{
12321232
Py_UCS4 code;
12331233
unsigned int index;

Tools/clinic/clinic.py

Lines changed: 72 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,64 +2644,85 @@ class buffer: pass
26442644
class rwbuffer: pass
26452645
class robuffer: pass
26462646

2647-
@add_legacy_c_converter('s#', accept={str, robuffer}, length=True)
2648-
@add_legacy_c_converter('y', accept={robuffer})
2649-
@add_legacy_c_converter('y#', accept={robuffer}, length=True)
2650-
@add_legacy_c_converter('z', accept={str, NoneType})
2651-
@add_legacy_c_converter('z#', accept={str, NoneType}, length=True)
2652-
# add_legacy_c_converter not supported for es, es#, et, et#
2653-
# because of their extra encoding argument
2647+
def str_converter_key(types, encoding, zeroes):
2648+
return (frozenset(types), bool(encoding), bool(zeroes))
2649+
2650+
str_converter_argument_map = {}
2651+
26542652
class str_converter(CConverter):
26552653
type = 'const char *'
26562654
default_type = (str, Null, NoneType)
26572655
format_unit = 's'
26582656

2659-
def converter_init(self, *, encoding=None, accept={str}, length=False, zeroes=False):
2660-
2661-
self.length = bool(length)
2657+
def converter_init(self, *, accept={str}, encoding=None, zeroes=False):
26622658

2663-
is_b_or_ba = accept == {bytes, bytearray}
2664-
is_b_or_ba_or_none = accept == {bytes, bytearray, NoneType}
2665-
is_str = accept == {str}
2666-
is_str_or_none = accept == {str, NoneType}
2667-
is_robuffer = accept == {robuffer}
2668-
is_str_or_robuffer = accept == {str, robuffer}
2669-
is_str_or_robuffer_or_none = accept == {str, robuffer, NoneType}
2670-
2671-
format_unit = None
2659+
key = str_converter_key(accept, encoding, zeroes)
2660+
format_unit = str_converter_argument_map.get(key)
2661+
if not format_unit:
2662+
fail("str_converter: illegal combination of arguments", key)
26722663

2664+
self.format_unit = format_unit
2665+
self.length = bool(zeroes)
26732666
if encoding:
2667+
if self.default not in (Null, None, unspecified):
2668+
fail("str_converter: Argument Clinic doesn't support default values for encoded strings")
26742669
self.encoding = encoding
2670+
self.type = 'char *'
2671+
# sorry, clinic can't support preallocated buffers
2672+
# for es# and et#
2673+
self.c_default = "NULL"
26752674

2676-
if is_str and not length and not zeroes:
2677-
format_unit = 'es'
2678-
elif is_str_or_none and length and zeroes:
2679-
format_unit = 'es#'
2680-
elif is_b_or_ba and not length and not zeroes:
2681-
format_unit = 'et'
2682-
elif is_b_or_ba_or_none and length and zeroes:
2683-
format_unit = 'et#'
2684-
2685-
else:
2686-
if zeroes:
2687-
fail("str_converter: illegal combination of arguments (zeroes is only legal with an encoding)")
2688-
2689-
if is_str and not length:
2690-
format_unit = 's'
2691-
elif is_str_or_none and not length:
2692-
format_unit = 'z'
2693-
elif is_robuffer and not length:
2694-
format_unit = 'y'
2695-
elif is_robuffer and length:
2696-
format_unit = 'y#'
2697-
elif is_str_or_robuffer and length:
2698-
format_unit = 's#'
2699-
elif is_str_or_robuffer_or_none and length:
2700-
format_unit = 'z#'
2675+
def cleanup(self):
2676+
if self.encoding:
2677+
name = ensure_legal_c_identifier(self.name)
2678+
return "".join(["if (", name, ")\n PyMem_FREE(", name, ");\n"])
27012679

2702-
if not format_unit:
2703-
fail("str_converter: illegal combination of arguments")
2704-
self.format_unit = format_unit
2680+
#
2681+
# This is the fourth or fifth rewrite of registering all the
2682+
# crazy string converter format units. Previous approaches hid
2683+
# bugs--generally mismatches between the semantics of the format
2684+
# unit and the arguments necessary to represent those semantics
2685+
# properly. Hopefully with this approach we'll get it 100% right.
2686+
#
2687+
# The r() function (short for "register") both registers the
2688+
# mapping from arguments to format unit *and* registers the
2689+
# legacy C converter for that format unit.
2690+
#
2691+
def r(format_unit, *, accept, encoding=False, zeroes=False):
2692+
if not encoding and format_unit != 's':
2693+
# add the legacy c converters here too.
2694+
#
2695+
# note: add_legacy_c_converter can't work for
2696+
# es, es#, et, or et#
2697+
# because of their extra encoding argument
2698+
#
2699+
# also don't add the converter for 's' because
2700+
# the metaclass for CConverter adds it for us.
2701+
kwargs = {}
2702+
if accept != {str}:
2703+
kwargs['accept'] = accept
2704+
if zeroes:
2705+
kwargs['zeroes'] = True
2706+
added_f = functools.partial(str_converter, **kwargs)
2707+
legacy_converters[format_unit] = added_f
2708+
2709+
d = str_converter_argument_map
2710+
key = str_converter_key(accept, encoding, zeroes)
2711+
if key in d:
2712+
sys.exit("Duplicate keys specified for str_converter_argument_map!")
2713+
d[key] = format_unit
2714+
2715+
r('es', encoding=True, accept={str})
2716+
r('es#', encoding=True, zeroes=True, accept={str})
2717+
r('et', encoding=True, accept={bytes, bytearray, str})
2718+
r('et#', encoding=True, zeroes=True, accept={bytes, bytearray, str})
2719+
r('s', accept={str})
2720+
r('s#', zeroes=True, accept={robuffer, str})
2721+
r('y', accept={robuffer})
2722+
r('y#', zeroes=True, accept={robuffer})
2723+
r('z', accept={str, NoneType})
2724+
r('z#', zeroes=True, accept={robuffer, str, NoneType})
2725+
del r
27052726

27062727

27072728
class PyBytesObject_converter(CConverter):
@@ -2719,17 +2740,17 @@ class unicode_converter(CConverter):
27192740
default_type = (str, Null, NoneType)
27202741
format_unit = 'U'
27212742

2722-
@add_legacy_c_converter('u#', length=True)
2743+
@add_legacy_c_converter('u#', zeroes=True)
27232744
@add_legacy_c_converter('Z', accept={str, NoneType})
2724-
@add_legacy_c_converter('Z#', accept={str, NoneType}, length=True)
2745+
@add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True)
27252746
class Py_UNICODE_converter(CConverter):
27262747
type = 'Py_UNICODE *'
27272748
default_type = (str, Null, NoneType)
27282749
format_unit = 'u'
27292750

2730-
def converter_init(self, *, accept={str}, length=False):
2751+
def converter_init(self, *, accept={str}, zeroes=False):
27312752
format_unit = 'Z' if accept=={str, NoneType} else 'u'
2732-
if length:
2753+
if zeroes:
27332754
format_unit += '#'
27342755
self.length = True
27352756
self.format_unit = format_unit

0 commit comments

Comments
 (0)