Skip to content

Commit e3a29de

Browse files
pohmeliedpgeorge
authored andcommitted
py/objstr: For str.format, add nested/computed fields support.
Eg: '{:{}}'.format(123, '>20') @pohmelie was the original author of this patch, but @dpgeorge made significant changes to reduce code size and improve efficiency.
1 parent 2bd758f commit e3a29de

2 files changed

Lines changed: 42 additions & 19 deletions

File tree

py/objstr.c

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "py/objlist.h"
3535
#include "py/runtime0.h"
3636
#include "py/runtime.h"
37+
#include "py/stackctrl.h"
3738

3839
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_obj_t *args, mp_obj_t dict);
3940

@@ -848,16 +849,12 @@ STATIC NORETURN void terse_str_format_value_error(void) {
848849
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "bad format string"));
849850
}
850851

851-
mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
852-
assert(MP_OBJ_IS_STR_OR_BYTES(args[0]));
853-
854-
GET_STR_DATA_LEN(args[0], str, len);
855-
int arg_i = 0;
852+
vstr_t mp_obj_str_format_helper(const char *str, const char *top, int *arg_i, mp_uint_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
856853
vstr_t vstr;
857854
mp_print_t print;
858855
vstr_init_print(&vstr, 16, &print);
859856

860-
for (const byte *top = str + len; str < top; str++) {
857+
for (; str < top; str++) {
861858
if (*str == '}') {
862859
str++;
863860
if (str < top && *str == '}') {
@@ -886,7 +883,7 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
886883

887884
vstr_t *field_name = NULL;
888885
char conversion = '\0';
889-
vstr_t *format_spec = NULL;
886+
const char *format_spec = NULL;
890887

891888
if (str < top && *str != '}' && *str != '!' && *str != ':') {
892889
field_name = vstr_new();
@@ -927,9 +924,16 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
927924
// '{:d}'.format(True) returns '1'
928925
// So we treat {:} as {} and this later gets treated to be {!s}
929926
if (*str != '}') {
930-
format_spec = vstr_new();
931-
while (str < top && *str != '}') {
932-
vstr_add_byte(format_spec, *str++);
927+
format_spec = str;
928+
for (int nest = 1; str < top;) {
929+
if (*str == '{') {
930+
++nest;
931+
} else if (*str == '}') {
932+
if (--nest == 0) {
933+
break;
934+
}
935+
}
936+
++str;
933937
}
934938
}
935939
}
@@ -957,7 +961,7 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
957961
const char *field = vstr_null_terminated_str(field_name);
958962
const char *lookup = NULL;
959963
if (MP_LIKELY(unichar_isdigit(*field))) {
960-
if (arg_i > 0) {
964+
if (*arg_i > 0) {
961965
if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
962966
terse_str_format_value_error();
963967
} else {
@@ -970,7 +974,7 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
970974
nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
971975
}
972976
arg = args[index + 1];
973-
arg_i = -1;
977+
*arg_i = -1;
974978
} else {
975979
for (lookup = field; *lookup && *lookup != '.' && *lookup != '['; lookup++);
976980
mp_obj_t field_q = mp_obj_new_str(field, lookup - field, true/*?*/);
@@ -986,19 +990,19 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
986990
vstr_free(field_name);
987991
field_name = NULL;
988992
} else {
989-
if (arg_i < 0) {
993+
if (*arg_i < 0) {
990994
if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
991995
terse_str_format_value_error();
992996
} else {
993997
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError,
994998
"can't switch from manual field specification to automatic field numbering"));
995999
}
9961000
}
997-
if ((uint)arg_i >= n_args - 1) {
1001+
if ((uint)*arg_i >= n_args - 1) {
9981002
nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
9991003
}
1000-
arg = args[arg_i + 1];
1001-
arg_i++;
1004+
arg = args[(*arg_i) + 1];
1005+
(*arg_i)++;
10021006
}
10031007
if (!format_spec && !conversion) {
10041008
conversion = 's';
@@ -1037,7 +1041,10 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
10371041
// precision ::= integer
10381042
// type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"
10391043

1040-
const char *s = vstr_null_terminated_str(format_spec);
1044+
// recursively call the formatter to format any nested specifiers
1045+
MP_STACK_CHECK();
1046+
vstr_t format_spec_vstr = mp_obj_str_format_helper(format_spec, str, arg_i, n_args, args, kwargs);
1047+
const char *s = vstr_null_terminated_str(&format_spec_vstr);
10411048
if (isalignment(*s)) {
10421049
align = *s++;
10431050
} else if (*s && isalignment(s[1])) {
@@ -1084,8 +1091,7 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
10841091
"invalid format specifier"));
10851092
}
10861093
}
1087-
vstr_free(format_spec);
1088-
format_spec = NULL;
1094+
vstr_clear(&format_spec_vstr);
10891095
}
10901096
if (!align) {
10911097
if (arg_looks_numeric(arg)) {
@@ -1288,6 +1294,15 @@ mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs
12881294
}
12891295
}
12901296

1297+
return vstr;
1298+
}
1299+
1300+
mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
1301+
assert(MP_OBJ_IS_STR_OR_BYTES(args[0]));
1302+
1303+
GET_STR_DATA_LEN(args[0], str, len);
1304+
int arg_i = 0;
1305+
vstr_t vstr = mp_obj_str_format_helper((const char*)str, (const char*)str + len, &arg_i, n_args, args, kwargs);
12911306
return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
12921307
}
12931308

tests/basics/string_format.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ def test(fmt, *args):
6666
test("{:^20}", "foo")
6767
test("{:<20}", "foo")
6868

69+
# nested format specifiers
70+
print("{:{}}".format(123, '#>10'))
71+
print("{:{}{}{}}".format(123, '#', '>', '10'))
72+
print("{0:{1}{2}}".format(123, '#>', '10'))
73+
print("{text:{align}{width}}".format(text="foo", align="<", width=20))
74+
print("{text:{align}{width}}".format(text="foo", align="^", width=10))
75+
print("{text:{align}{width}}".format(text="foo", align=">", width=30))
76+
6977
print("{foo}/foo".format(foo="bar"))
7078
print("{}".format(123, foo="bar"))
7179
print("{}-{foo}".format(123, foo="bar"))

0 commit comments

Comments
 (0)