Skip to content

Commit 4d94474

Browse files
committed
rewrite the parsing of field names to be more consistent wrt recursive expansion
1 parent 4895363 commit 4d94474

2 files changed

Lines changed: 62 additions & 63 deletions

File tree

Lib/test/test_unicode.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ def __format__(self, format_spec):
892892
self.assertRaises(ValueError, "{0".format)
893893
self.assertRaises(IndexError, "{0.}".format)
894894
self.assertRaises(ValueError, "{0.}".format, 0)
895-
self.assertRaises(IndexError, "{0[}".format)
895+
self.assertRaises(ValueError, "{0[}".format)
896896
self.assertRaises(ValueError, "{0[}".format, [])
897897
self.assertRaises(KeyError, "{0]}".format)
898898
self.assertRaises(ValueError, "{0.[]}".format, 0)
@@ -944,6 +944,14 @@ def __format__(self, format_spec):
944944
'')
945945

946946
self.assertEqual("{[{}]}".format({"{}": 5}), "5")
947+
self.assertEqual("{[{}]}".format({"{}" : "a"}), "a")
948+
self.assertEqual("{[{]}".format({"{" : "a"}), "a")
949+
self.assertEqual("{[}]}".format({"}" : "a"}), "a")
950+
self.assertEqual("{[[]}".format({"[" : "a"}), "a")
951+
self.assertEqual("{[!]}".format({"!" : "a"}), "a")
952+
self.assertRaises(ValueError, "{a{}b}".format, 42)
953+
self.assertRaises(ValueError, "{a{b}".format, 42)
954+
self.assertRaises(ValueError, "{[}".format, 42)
947955

948956
def test_format_map(self):
949957
self.assertEqual(''.format_map({}), '')

Objects/stringlib/unicode_format.h

Lines changed: 53 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
543543

544544
static int
545545
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
546-
Py_UCS4 *conversion)
546+
int *format_spec_needs_expanding, Py_UCS4 *conversion)
547547
{
548548
/* Note this function works if the field name is zero length,
549549
which is good. Zero length field names are handled later, in
@@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
561561
field_name->start = str->start;
562562
while (str->start < str->end) {
563563
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
564+
case '{':
565+
PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
566+
return 0;
567+
case '[':
568+
for (; str->start < str->end; str->start++)
569+
if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
570+
break;
571+
continue;
572+
case '}':
564573
case ':':
565574
case '!':
566575
break;
@@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
570579
break;
571580
}
572581

582+
field_name->end = str->start - 1;
573583
if (c == '!' || c == ':') {
584+
Py_ssize_t count;
574585
/* we have a format specifier and/or a conversion */
575586
/* don't include the last character */
576-
field_name->end = str->start-1;
577-
578-
/* the format specifier is the rest of the string */
579-
format_spec->str = str->str;
580-
format_spec->start = str->start;
581-
format_spec->end = str->end;
582587

583588
/* see if there's a conversion specifier */
584589
if (c == '!') {
585590
/* there must be another character present */
586-
if (format_spec->start >= format_spec->end) {
591+
if (str->start >= str->end) {
587592
PyErr_SetString(PyExc_ValueError,
588-
"end of format while looking for conversion "
593+
"end of string while looking for conversion "
589594
"specifier");
590595
return 0;
591596
}
592-
*conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
597+
*conversion = PyUnicode_READ_CHAR(str->str, str->start++);
593598

594-
/* if there is another character, it must be a colon */
595-
if (format_spec->start < format_spec->end) {
596-
c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
599+
if (str->start < str->end) {
600+
c = PyUnicode_READ_CHAR(str->str, str->start++);
601+
if (c == '}')
602+
return 1;
597603
if (c != ':') {
598604
PyErr_SetString(PyExc_ValueError,
599-
"expected ':' after format specifier");
605+
"expected ':' after conversion specifier");
600606
return 0;
601607
}
602608
}
603609
}
610+
format_spec->str = str->str;
611+
format_spec->start = str->start;
612+
count = 1;
613+
while (str->start < str->end) {
614+
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
615+
case '{':
616+
*format_spec_needs_expanding = 1;
617+
count++;
618+
break;
619+
case '}':
620+
count--;
621+
if (count == 0) {
622+
format_spec->end = str->start - 1;
623+
return 1;
624+
}
625+
break;
626+
default:
627+
break;
628+
}
629+
}
630+
631+
PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
632+
return 0;
633+
}
634+
else if (c != '}') {
635+
PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
636+
return 0;
604637
}
605-
else
606-
/* end of string, there's no format_spec or conversion */
607-
field_name->end = str->start;
608638

609639
return 1;
610640
}
@@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
638668
SubString *format_spec, Py_UCS4 *conversion,
639669
int *format_spec_needs_expanding)
640670
{
641-
int at_end, hit_format_spec;
671+
int at_end;
642672
Py_UCS4 c = 0;
643673
Py_ssize_t start;
644-
int count;
645674
Py_ssize_t len;
646675
int markup_follows = 0;
647676

@@ -713,50 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
713742
if (!markup_follows)
714743
return 2;
715744

716-
/* this is markup, find the end of the string by counting nested
717-
braces. note that this prohibits escaped braces, so that
718-
format_specs cannot have braces in them. */
745+
/* this is markup; parse the field */
719746
*field_present = 1;
720-
count = 1;
721-
722-
start = self->str.start;
723-
724-
/* we know we can't have a zero length string, so don't worry
725-
about that case */
726-
hit_format_spec = 0;
727-
while (self->str.start < self->str.end) {
728-
switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
729-
case ':':
730-
hit_format_spec = 1;
731-
count = 1;
732-
break;
733-
case '{':
734-
/* the format spec needs to be recursively expanded.
735-
this is an optimization, and not strictly needed */
736-
if (hit_format_spec)
737-
*format_spec_needs_expanding = 1;
738-
count++;
739-
break;
740-
case '}':
741-
count--;
742-
if (count <= 0) {
743-
/* we're done. parse and get out */
744-
SubString s;
745-
746-
SubString_init(&s, self->str.str, start, self->str.start - 1);
747-
if (parse_field(&s, field_name, format_spec, conversion) == 0)
748-
return 0;
749-
750-
/* success */
751-
return 2;
752-
}
753-
break;
754-
}
755-
}
756-
757-
/* end of string while searching for matching '}' */
758-
PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
759-
return 0;
747+
if (!parse_field(&self->str, field_name, format_spec,
748+
format_spec_needs_expanding, conversion))
749+
return 0;
750+
return 2;
760751
}
761752

762753

0 commit comments

Comments
 (0)