Skip to content

Commit 783b1a8

Browse files
dlechdpgeorge
authored andcommitted
py/runtime: Allow multiple *args in a function call.
This is a partial implementation of PEP 448 to allow unpacking multiple star args in a function or method call. This is implemented by changing the emitted bytecodes so that both positional args and star args are stored as positional args. A bitmap is added to indicate if an argument at a given position is a positional argument or a star arg. In the generated code, this new bitmap takes the place of the old star arg. It is stored as a small int, so this means only the first N arguments can be star args where N is the number of bits in a small int. The runtime is modified to interpret this new bytecode format while still trying to perform as few memory reallocations as possible. Signed-off-by: David Lechner <david@pybricks.com>
1 parent 1e99d29 commit 783b1a8

10 files changed

Lines changed: 152 additions & 70 deletions

File tree

docs/differences/python_35.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Below is a list of finalised/accepted PEPs for Python 3.5 grouped into their imp
88
+----------------------------------------------------------------------------------------------------------+---------------+
99
| **Extensions to the syntax:** | **Status** |
1010
+--------------------------------------------------------+-------------------------------------------------+---------------+
11-
| `PEP 448 <https://www.python.org/dev/peps/pep-0448/>`_ | additional unpacking generalizations | |
11+
| `PEP 448 <https://www.python.org/dev/peps/pep-0448/>`_ | additional unpacking generalizations | Partial |
1212
+--------------------------------------------------------+-------------------------------------------------+---------------+
1313
| `PEP 465 <https://www.python.org/dev/peps/pep-0465/>`_ | a new matrix multiplication operator | Completed |
1414
+--------------------------------------------------------+-------------------------------------------------+---------------+

py/compile.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "py/asmbase.h"
3838
#include "py/nativeglue.h"
3939
#include "py/persistentcode.h"
40+
#include "py/smallint.h"
4041

4142
#if MICROPY_ENABLE_COMPILER
4243

@@ -2397,17 +2398,30 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
23972398
int n_positional = n_positional_extra;
23982399
uint n_keyword = 0;
23992400
uint star_flags = 0;
2400-
mp_parse_node_struct_t *star_args_node = NULL;
2401+
mp_uint_t star_args = 0;
24012402
for (size_t i = 0; i < n_args; i++) {
24022403
if (MP_PARSE_NODE_IS_STRUCT(args[i])) {
24032404
mp_parse_node_struct_t *pns_arg = (mp_parse_node_struct_t *)args[i];
24042405
if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_star) {
2405-
if (star_flags & MP_EMIT_STAR_FLAG_SINGLE) {
2406-
compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("can't have multiple *x"));
2406+
if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) {
2407+
compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("* arg after **"));
2408+
return;
2409+
}
2410+
#if MICROPY_DYNAMIC_COMPILER
2411+
if (i > mp_dynamic_compiler.small_int_bits)
2412+
#else
2413+
if (i > MP_SMALL_INT_BITS)
2414+
#endif
2415+
{
2416+
// If there are not enough bits in a small int to fit the flag, then we consider
2417+
// it a syntax error. It should be unlikely to have this many args in practice.
2418+
compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("too many args"));
24072419
return;
24082420
}
24092421
star_flags |= MP_EMIT_STAR_FLAG_SINGLE;
2410-
star_args_node = pns_arg;
2422+
star_args |= 1 << i;
2423+
compile_node(comp, pns_arg->nodes[0]);
2424+
n_positional++;
24112425
} else if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_dbl_star) {
24122426
star_flags |= MP_EMIT_STAR_FLAG_DOUBLE;
24132427
// double-star args are stored as kw arg with key of None
@@ -2438,27 +2452,22 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
24382452
}
24392453
} else {
24402454
normal_argument:
2441-
if (star_flags) {
2442-
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after */**"));
2455+
if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) {
2456+
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after **"));
24432457
return;
24442458
}
24452459
if (n_keyword > 0) {
2446-
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after keyword arg"));
2460+
compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after keyword arg"));
24472461
return;
24482462
}
24492463
compile_node(comp, args[i]);
24502464
n_positional++;
24512465
}
24522466
}
24532467

2454-
// compile the star/double-star arguments if we had them
2455-
// if we had one but not the other then we load "null" as a place holder
24562468
if (star_flags != 0) {
2457-
if (star_args_node == NULL) {
2458-
EMIT(load_null);
2459-
} else {
2460-
compile_node(comp, star_args_node->nodes[0]);
2461-
}
2469+
// one extra object that contains the star_args map
2470+
EMIT_ARG(load_const_small_int, star_args);
24622471
}
24632472

24642473
// emit the function/method call

py/runtime.c

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -701,9 +701,9 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
701701
}
702702
uint n_args = n_args_n_kw & 0xff;
703703
uint n_kw = (n_args_n_kw >> 8) & 0xff;
704-
mp_obj_t pos_seq = args[n_args + 2 * n_kw]; // may be MP_OBJ_NULL
704+
mp_uint_t star_args = mp_obj_get_int_truncated(args[n_args + 2 * n_kw]);
705705

706-
DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, seq=%p)\n", fun, self, n_args, n_kw, args, pos_seq);
706+
DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, map=%u)\n", fun, self, n_args, n_kw, args, star_args);
707707

708708
// We need to create the following array of objects:
709709
// args[0 .. n_args] unpacked(pos_seq) args[n_args .. n_args + 2 * n_kw] unpacked(kw_dict)
@@ -714,6 +714,20 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
714714
uint args2_alloc;
715715
uint args2_len = 0;
716716

717+
// Try to get a hint for unpacked * args length
718+
uint list_len = 0;
719+
720+
if (star_args != 0) {
721+
for (uint i = 0; i < n_args; i++) {
722+
if (star_args & (1 << i)) {
723+
mp_obj_t len = mp_obj_len_maybe(args[i]);
724+
if (len != MP_OBJ_NULL) {
725+
list_len += mp_obj_get_int(len);
726+
}
727+
}
728+
}
729+
}
730+
717731
// Try to get a hint for the size of the kw_dict
718732
uint kw_dict_len = 0;
719733

@@ -727,8 +741,8 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
727741

728742
// Extract the pos_seq sequence to the new args array.
729743
// Note that it can be arbitrary iterator.
730-
if (pos_seq == MP_OBJ_NULL) {
731-
// no sequence
744+
if (star_args == 0) {
745+
// no star args to unpack
732746

733747
// allocate memory for the new array of args
734748
args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len);
@@ -742,60 +756,69 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
742756
// copy the fixed pos args
743757
mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t);
744758
args2_len += n_args;
745-
746-
} else if (mp_obj_is_type(pos_seq, &mp_type_tuple) || mp_obj_is_type(pos_seq, &mp_type_list)) {
747-
// optimise the case of a tuple and list
748-
749-
// get the items
750-
size_t len;
751-
mp_obj_t *items;
752-
mp_obj_get_array(pos_seq, &len, &items);
753-
754-
// allocate memory for the new array of args
755-
args2_alloc = 1 + n_args + len + 2 * (n_kw + kw_dict_len);
756-
args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));
757-
758-
// copy the self
759-
if (self != MP_OBJ_NULL) {
760-
args2[args2_len++] = self;
761-
}
762-
763-
// copy the fixed and variable position args
764-
mp_seq_cat(args2 + args2_len, args, n_args, items, len, mp_obj_t);
765-
args2_len += n_args + len;
766-
767759
} else {
768-
// generic iterator
760+
// at least one star arg to unpack
769761

770762
// allocate memory for the new array of args
771-
args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len) + 3;
763+
args2_alloc = 1 + n_args + list_len + 2 * (n_kw + kw_dict_len);
772764
args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));
773765

774766
// copy the self
775767
if (self != MP_OBJ_NULL) {
776768
args2[args2_len++] = self;
777769
}
778770

779-
// copy the fixed position args
780-
mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t);
781-
args2_len += n_args;
782-
783-
// extract the variable position args from the iterator
784-
mp_obj_iter_buf_t iter_buf;
785-
mp_obj_t iterable = mp_getiter(pos_seq, &iter_buf);
786-
mp_obj_t item;
787-
while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
788-
if (args2_len >= args2_alloc) {
789-
args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t), args2_alloc * 2 * sizeof(mp_obj_t));
790-
args2_alloc *= 2;
771+
for (uint i = 0; i < n_args; i++) {
772+
mp_obj_t arg = args[i];
773+
if (star_args & (1 << i)) {
774+
// star arg
775+
if (mp_obj_is_type(arg, &mp_type_tuple) || mp_obj_is_type(arg, &mp_type_list)) {
776+
// optimise the case of a tuple and list
777+
778+
// get the items
779+
size_t len;
780+
mp_obj_t *items;
781+
mp_obj_get_array(arg, &len, &items);
782+
783+
// copy the items
784+
assert(args2_len + len <= args2_alloc);
785+
mp_seq_copy(args2 + args2_len, items, len, mp_obj_t);
786+
args2_len += len;
787+
} else {
788+
// generic iterator
789+
790+
// extract the variable position args from the iterator
791+
mp_obj_iter_buf_t iter_buf;
792+
mp_obj_t iterable = mp_getiter(arg, &iter_buf);
793+
mp_obj_t item;
794+
while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
795+
if (args2_len >= args2_alloc) {
796+
args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t),
797+
args2_alloc * 2 * sizeof(mp_obj_t));
798+
args2_alloc *= 2;
799+
}
800+
args2[args2_len++] = item;
801+
}
802+
}
803+
} else {
804+
// normal argument
805+
assert(args2_len < args2_alloc);
806+
args2[args2_len++] = arg;
791807
}
792-
args2[args2_len++] = item;
793808
}
794809
}
795810

796811
// The size of the args2 array now is the number of positional args.
797812
uint pos_args_len = args2_len;
798813

814+
// ensure there is still enough room for kw args
815+
if (args2_len + 2 * (n_kw + kw_dict_len) > args2_alloc) {
816+
uint new_alloc = args2_len + 2 * (n_kw + kw_dict_len);
817+
args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t),
818+
new_alloc * sizeof(mp_obj_t));
819+
args2_alloc = new_alloc;
820+
}
821+
799822
// Copy the kw args.
800823
for (uint i = 0; i < n_kw; i++) {
801824
mp_obj_t kw_key = args[n_args + i * 2];

py/vm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -949,7 +949,7 @@ unwind_jump:;
949949
// unum & 0xff == n_positional
950950
// (unum >> 8) & 0xff == n_keyword
951951
// We have following stack layout here:
952-
// fun arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS
952+
// fun arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS
953953
sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 1;
954954
#if MICROPY_STACKLESS
955955
if (mp_obj_get_type(*sp) == &mp_type_fun_bc) {
@@ -1034,7 +1034,7 @@ unwind_jump:;
10341034
// unum & 0xff == n_positional
10351035
// (unum >> 8) & 0xff == n_keyword
10361036
// We have following stack layout here:
1037-
// fun self arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS
1037+
// fun self arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS
10381038
sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 2;
10391039
#if MICROPY_STACKLESS
10401040
if (mp_obj_get_type(*sp) == &mp_type_fun_bc) {

tests/basics/fun_callstar.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
def foo(a, b, c):
44
print(a, b, c)
55

6+
foo(*(), 1, 2, 3)
7+
foo(*(1,), 2, 3)
8+
foo(*(1, 2), 3)
69
foo(*(1, 2, 3))
710
foo(1, *(2, 3))
811
foo(1, 2, *(3,))
912
foo(1, 2, 3, *())
13+
foo(*(1,), 2, *(3,))
14+
foo(*(1, 2), *(3,))
15+
foo(*(1,), *(2, 3))
1016

1117
# Another sequence type
1218
foo(1, 2, *[100])
@@ -29,10 +35,16 @@ def foo(self, a, b, c):
2935
print(a, b, c)
3036

3137
a = A()
38+
a.foo(*(), 1, 2, 3)
39+
a.foo(*(1,), 2, 3)
40+
a.foo(*(1, 2), 3)
3241
a.foo(*(1, 2, 3))
3342
a.foo(1, *(2, 3))
3443
a.foo(1, 2, *(3,))
3544
a.foo(1, 2, 3, *())
45+
a.foo(*(1,), 2, *(3,))
46+
a.foo(*(1, 2), *(3,))
47+
a.foo(*(1,), *(2, 3))
3648

3749
# Another sequence type
3850
a.foo(1, 2, *[100])

tests/basics/fun_callstardblstar.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ def f(a, b, c, d):
66
f(*(1, 2), **{'c':3, 'd':4})
77
f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)})
88

9+
try:
10+
eval("f(**{'a': 1}, *(2, 3, 4))")
11+
except SyntaxError:
12+
print("SyntaxError")
13+
914
# test calling a method with *tuple and **dict
1015

1116
class A:
@@ -15,3 +20,8 @@ def f(self, a, b, c, d):
1520
a = A()
1621
a.f(*(1, 2), **{'c':3, 'd':4})
1722
a.f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)})
23+
24+
try:
25+
eval("a.f(**{'a': 1}, *(2, 3, 4))")
26+
except SyntaxError:
27+
print("SyntaxError")

tests/basics/fun_kwvarargs.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,16 @@ def f4(*vargs, **kwargs):
2323
f4(*(1, 2))
2424
f4(kw_arg=3)
2525
f4(*(1, 2), kw_arg=3)
26+
27+
28+
# test evaluation order of arguments
29+
def f5(*vargs, **kwargs):
30+
print(vargs, kwargs)
31+
32+
33+
def print_ret(x):
34+
print(x)
35+
return x
36+
37+
38+
f5(*print_ret(["a", "b"]), kw_arg=print_ret(None))

tests/basics/python34.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,23 @@
66
print("SKIP")
77
raise SystemExit
88

9-
# from basics/fun_kwvarargs.py
10-
# test evaluation order of arguments (in 3.4 it's backwards, 3.5 it's fixed)
11-
def f4(*vargs, **kwargs):
12-
print(vargs, kwargs)
9+
1310
def print_ret(x):
1411
print(x)
1512
return x
16-
f4(*print_ret(['a', 'b']), kw_arg=print_ret(None))
1713

1814
# test evaluation order of dictionary key/value pair (in 3.4 it's backwards)
1915
{print_ret(1):print_ret(2)}
2016

17+
2118
# from basics/syntaxerror.py
2219
def test_syntax(code):
2320
try:
2421
exec(code)
2522
except SyntaxError:
2623
print("SyntaxError")
27-
test_syntax("f(*a, *b)") # can't have multiple * (in 3.5 we can)
28-
test_syntax("f(*a, b)") # can't have positional after *
24+
25+
2926
test_syntax("f(**a, b)") # can't have positional after **
3027
test_syntax("() = []") # can't assign to empty tuple (in 3.6 we can)
3128
test_syntax("del ()") # can't delete empty tuple (in 3.6 we can)

tests/basics/python34.py.exp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
1-
None
2-
['a', 'b']
3-
('a', 'b') {'kw_arg': None}
41
2
52
1
63
SyntaxError
74
SyntaxError
85
SyntaxError
9-
SyntaxError
10-
SyntaxError
116
3.4
127
3 4
138
IndexError('foo',)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""
2+
categories: Syntax
3+
description: Argument unpacking does not work if the argument being unpacked is the nth or greater argument where n is the number of bits in an MP_SMALL_INT.
4+
cause: The implementation uses an MP_SMALL_INT to flag args that need to be unpacked.
5+
workaround: Use fewer arguments.
6+
"""
7+
8+
9+
def example(*args):
10+
print(len(args))
11+
12+
13+
MORE = ["a", "b", "c"]
14+
15+
# fmt: off
16+
example(
17+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
18+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19+
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
21+
*MORE,
22+
)
23+
# fmt: on

0 commit comments

Comments
 (0)