Skip to content

Commit 5858d8c

Browse files
committed
gh-87790: support underscore for formatting fractional part of floats
```pycon >>> f"{123_456.123_456:_._f}" # Whole and fractional '123_456.123_456' >>> f"{123_456.123_456:_f}" # Integer component only '123_456.123456' >>> f"{123_456.123_456:._f}" # Fractional component only '123456.123_456' >>> f"{123_456.123_456:.4_f}" # with precision '123456.1_235' ```
1 parent a726ce7 commit 5858d8c

File tree

4 files changed

+122
-28
lines changed

4 files changed

+122
-28
lines changed

Doc/library/string.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,12 +312,13 @@ non-empty format specification typically modifies the result.
312312
The general form of a *standard format specifier* is:
313313

314314
.. productionlist:: format-spec
315-
format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision`][`type`]
315+
format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision` [`fraction_grouping`]][`type`]
316316
fill: <any character>
317317
align: "<" | ">" | "=" | "^"
318318
sign: "+" | "-" | " "
319319
width: `~python-grammar:digit`+
320320
grouping_option: "_" | ","
321+
fraction_grouping: "_"
321322
precision: `~python-grammar:digit`+
322323
type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"
323324

@@ -448,6 +449,13 @@ indicates the maximum field size - in other words, how many characters will be
448449
used from the field content. The *precision* is not allowed for integer
449450
presentation types.
450451

452+
The ``'_'`` option after *precision* means the use of an underscore for a
453+
thousands separator of the fractional part for floating-point presentation
454+
types.
455+
456+
.. versionchanged:: 3.14
457+
Support underscore as a thousands separator for the fractional part.
458+
451459
Finally, the *type* determines how the data should be presented.
452460

453461
The available string presentation types are:

Lib/test/test_float.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,18 @@ def test_format(self):
754754
self.assertEqual(format(INF, 'f'), 'inf')
755755
self.assertEqual(format(INF, 'F'), 'INF')
756756

757+
# underscores
758+
x = 123_456.123_456
759+
self.assertEqual(format(x, '_f'), '123_456.123456')
760+
self.assertEqual(format(x, '._f'), '123456.123_456')
761+
self.assertEqual(format(x, '_._f'), '123_456.123_456')
762+
self.assertEqual(format(x, '.10_f'), '123456.1_234_560_000')
763+
self.assertEqual(format(x, '>21._f'), ' 123456.123_456')
764+
self.assertEqual(format(x, '<21._f'), '123456.123_456 ')
765+
self.assertEqual(format(x, '+.11_e'), '+1.23_456_123_456e+05')
766+
767+
self.assertRaises(ValueError, format, x , '._6f')
768+
757769
@support.requires_IEEE_754
758770
def test_format_testfile(self):
759771
with open(format_testfile, encoding="utf-8") as testfile:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Support underscore as a thousands separator in the fractional part for
2+
floating-point presentation types of the new-style string formatting (with
3+
:func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev.

Python/formatter_unicode.c

Lines changed: 98 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ typedef struct {
135135
Py_ssize_t width;
136136
enum LocaleType thousands_separators;
137137
Py_ssize_t precision;
138+
enum LocaleType frac_thousands_separator;
138139
Py_UCS4 type;
139140
} InternalFormatSpec;
140141

@@ -171,6 +172,7 @@ parse_internal_render_format_spec(PyObject *obj,
171172
format->sign = '\0';
172173
format->width = -1;
173174
format->thousands_separators = LT_NO_LOCALE;
175+
format->frac_thousands_separator = LT_NO_LOCALE;
174176
format->precision = -1;
175177
format->type = default_type;
176178

@@ -260,7 +262,16 @@ parse_internal_render_format_spec(PyObject *obj,
260262
/* Overflow error. Exception already set. */
261263
return 0;
262264

263-
/* Not having a precision after a dot is an error. */
265+
if (end-pos && READ_spec(pos) == '_') {
266+
if (consumed == 0) {
267+
format->precision = -1;
268+
}
269+
format->frac_thousands_separator = LT_UNDERSCORE_LOCALE;
270+
++pos;
271+
++consumed;
272+
}
273+
274+
/* Not having a precision or underscore after a dot is an error. */
264275
if (consumed == 0) {
265276
PyErr_Format(PyExc_ValueError,
266277
"Format specifier missing precision");
@@ -402,6 +413,7 @@ fill_padding(_PyUnicodeWriter *writer,
402413
typedef struct {
403414
PyObject *decimal_point;
404415
PyObject *thousands_sep;
416+
PyObject *frac_thousands_sep;
405417
const char *grouping;
406418
char *grouping_buffer;
407419
} LocaleInfo;
@@ -423,6 +435,8 @@ typedef struct {
423435
Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424436
excluding the decimal itself, if
425437
present. */
438+
Py_ssize_t n_frac;
439+
Py_ssize_t n_grouped_frac_digits;
426440

427441
/* These 2 are not the widths of fields, but are needed by
428442
STRINGLIB_GROUPING. */
@@ -445,24 +459,32 @@ typedef struct {
445459
*/
446460
static void
447461
parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448-
Py_ssize_t *n_remainder, int *has_decimal)
462+
Py_ssize_t *n_remainder, Py_ssize_t *n_frac, int *has_decimal)
449463
{
450-
Py_ssize_t remainder;
464+
Py_ssize_t frac;
451465
int kind = PyUnicode_KIND(s);
452466
const void *data = PyUnicode_DATA(s);
453467

454-
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
468+
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) {
455469
++pos;
456-
remainder = pos;
470+
}
471+
frac = pos;
457472

458473
/* Does remainder start with a decimal point? */
459-
*has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
474+
*has_decimal = pos<end && PyUnicode_READ(kind, data, frac) == '.';
460475

461476
/* Skip the decimal point. */
462-
if (*has_decimal)
463-
remainder++;
477+
if (*has_decimal) {
478+
frac++;
479+
pos++;
480+
}
481+
482+
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) {
483+
++pos;
484+
}
464485

465-
*n_remainder = end - remainder;
486+
*n_frac = pos - frac;
487+
*n_remainder = end - pos;
466488
}
467489

468490
/* not all fields of format are used. for example, precision is
@@ -473,18 +495,19 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
473495
static Py_ssize_t
474496
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475497
Py_UCS4 sign_char, Py_ssize_t n_start,
476-
Py_ssize_t n_end, Py_ssize_t n_remainder,
498+
Py_ssize_t n_end, Py_ssize_t n_remainder, Py_ssize_t n_frac,
477499
int has_decimal, const LocaleInfo *locale,
478500
const InternalFormatSpec *format, Py_UCS4 *maxchar)
479501
{
480502
Py_ssize_t n_non_digit_non_padding;
481503
Py_ssize_t n_padding;
482504

483-
spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
505+
spec->n_digits = n_end - n_start - n_frac - n_remainder - (has_decimal?1:0);
484506
spec->n_lpadding = 0;
485507
spec->n_prefix = n_prefix;
486508
spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487509
spec->n_remainder = n_remainder;
510+
spec->n_frac = n_frac;
488511
spec->n_spadding = 0;
489512
spec->n_rpadding = 0;
490513
spec->sign = '\0';
@@ -530,7 +553,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
530553

531554
/* The number of chars used for non-digits and non-padding. */
532555
n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533-
spec->n_remainder;
556+
+ spec->n_frac + spec->n_remainder;
534557

535558
/* min_width can go negative, that's okay. format->width == -1 means
536559
we don't care. */
@@ -557,12 +580,29 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
557580
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
558581
}
559582

583+
if (spec->n_frac == 0) {
584+
spec->n_grouped_frac_digits = 0;
585+
}
586+
else {
587+
Py_UCS4 grouping_maxchar;
588+
spec->n_grouped_frac_digits = _PyUnicode_InsertThousandsGrouping(
589+
NULL, 0,
590+
NULL, 0, spec->n_frac,
591+
spec->n_frac,
592+
locale->grouping, locale->frac_thousands_sep, &grouping_maxchar);
593+
if (spec->n_grouped_frac_digits == -1) {
594+
return -1;
595+
}
596+
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
597+
}
598+
560599
/* Given the desired width and the total of digit and non-digit
561600
space we consume, see if we need any padding. format->width can
562601
be negative (meaning no padding), but this code still works in
563602
that case. */
564603
n_padding = format->width -
565-
(n_non_digit_non_padding + spec->n_grouped_digits);
604+
(n_non_digit_non_padding + spec->n_grouped_digits
605+
+ spec->n_grouped_frac_digits - spec->n_frac);
566606
if (n_padding > 0) {
567607
/* Some padding is needed. Determine if it's left, space, or right. */
568608
switch (format->align) {
@@ -593,7 +633,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
593633

594634
return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595635
spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596-
spec->n_remainder + spec->n_rpadding;
636+
spec->n_grouped_frac_digits + spec->n_remainder + spec->n_rpadding;
597637
}
598638

599639
/* Fill in the digit parts of a number's string representation,
@@ -677,6 +717,19 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
677717
d_pos += 1;
678718
}
679719

720+
if (spec->n_frac) {
721+
r = _PyUnicode_InsertThousandsGrouping(
722+
writer, spec->n_grouped_frac_digits,
723+
digits, d_pos, spec->n_frac, spec->n_frac,
724+
locale->grouping, locale->frac_thousands_sep, NULL);
725+
if (r == -1) {
726+
return -1;
727+
}
728+
assert(r == spec->n_grouped_frac_digits);
729+
d_pos += spec->n_frac;
730+
writer->pos += spec->n_grouped_frac_digits;
731+
}
732+
680733
if (spec->n_remainder) {
681734
_PyUnicode_FastCopyCharacters(
682735
writer->buffer, writer->pos,
@@ -701,7 +754,8 @@ static const char no_grouping[1] = {CHAR_MAX};
701754
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702755
LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703756
static int
704-
get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
757+
get_locale_info(enum LocaleType type, enum LocaleType frac_type,
758+
LocaleInfo *locale_info)
705759
{
706760
switch (type) {
707761
case LT_CURRENT_LOCALE: {
@@ -746,6 +800,15 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
746800
locale_info->grouping = no_grouping;
747801
break;
748802
}
803+
if (frac_type == LT_UNDERSCORE_LOCALE) {
804+
locale_info->frac_thousands_sep = PyUnicode_FromOrdinal('_');
805+
if (locale_info->grouping == no_grouping) {
806+
locale_info->grouping = "\3";
807+
}
808+
}
809+
else {
810+
locale_info->frac_thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
811+
}
749812
return 0;
750813
}
751814

@@ -754,6 +817,7 @@ free_locale_info(LocaleInfo *locale_info)
754817
{
755818
Py_XDECREF(locale_info->decimal_point);
756819
Py_XDECREF(locale_info->thousands_sep);
820+
Py_XDECREF(locale_info->frac_thousands_sep);
757821
PyMem_Free(locale_info->grouping_buffer);
758822
}
759823

@@ -1005,13 +1069,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
10051069

10061070
/* Determine the grouping, separator, and decimal point, if any. */
10071071
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008-
format->thousands_separators,
1072+
format->thousands_separators, 0,
10091073
&locale) == -1)
10101074
goto done;
10111075

10121076
/* Calculate how much memory we'll need. */
10131077
n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014-
inumeric_chars + n_digits, n_remainder, 0,
1078+
inumeric_chars + n_digits, n_remainder, 0, 0,
10151079
&locale, format, &maxchar);
10161080
if (n_total == -1) {
10171081
goto done;
@@ -1046,6 +1110,7 @@ format_float_internal(PyObject *value,
10461110
char *buf = NULL; /* buffer returned from PyOS_double_to_string */
10471111
Py_ssize_t n_digits;
10481112
Py_ssize_t n_remainder;
1113+
Py_ssize_t n_frac;
10491114
Py_ssize_t n_total;
10501115
int has_decimal;
10511116
double val;
@@ -1125,7 +1190,8 @@ format_float_internal(PyObject *value,
11251190
if (format->sign != '+' && format->sign != ' '
11261191
&& format->width == -1
11271192
&& format->type != 'n'
1128-
&& !format->thousands_separators)
1193+
&& !format->thousands_separators
1194+
&& !format->frac_thousands_separator)
11291195
{
11301196
/* Fast path */
11311197
result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
@@ -1151,18 +1217,20 @@ format_float_internal(PyObject *value,
11511217

11521218
/* Determine if we have any "remainder" (after the digits, might include
11531219
decimal or exponent or both (or neither)) */
1154-
parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1220+
parse_number(unicode_tmp, index, index + n_digits,
1221+
&n_remainder, &n_frac, &has_decimal);
11551222

11561223
/* Determine the grouping, separator, and decimal point, if any. */
11571224
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
11581225
format->thousands_separators,
1226+
format->frac_thousands_separator,
11591227
&locale) == -1)
11601228
goto done;
11611229

11621230
/* Calculate how much memory we'll need. */
11631231
n_total = calc_number_widths(&spec, 0, sign_char, index,
1164-
index + n_digits, n_remainder, has_decimal,
1165-
&locale, format, &maxchar);
1232+
index + n_digits, n_remainder, n_frac,
1233+
has_decimal, &locale, format, &maxchar);
11661234
if (n_total == -1) {
11671235
goto done;
11681236
}
@@ -1202,6 +1270,8 @@ format_complex_internal(PyObject *value,
12021270
Py_ssize_t n_im_digits;
12031271
Py_ssize_t n_re_remainder;
12041272
Py_ssize_t n_im_remainder;
1273+
Py_ssize_t n_re_frac;
1274+
Py_ssize_t n_im_frac;
12051275
Py_ssize_t n_re_total;
12061276
Py_ssize_t n_im_total;
12071277
int re_has_decimal;
@@ -1330,13 +1400,14 @@ format_complex_internal(PyObject *value,
13301400
/* Determine if we have any "remainder" (after the digits, might include
13311401
decimal or exponent or both (or neither)) */
13321402
parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333-
&n_re_remainder, &re_has_decimal);
1403+
&n_re_remainder, &n_re_frac, &re_has_decimal);
13341404
parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335-
&n_im_remainder, &im_has_decimal);
1405+
&n_im_remainder, &n_im_frac, &im_has_decimal);
13361406

13371407
/* Determine the grouping, separator, and decimal point, if any. */
13381408
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
13391409
format->thousands_separators,
1410+
format->frac_thousands_separator,
13401411
&locale) == -1)
13411412
goto done;
13421413

@@ -1349,8 +1420,8 @@ format_complex_internal(PyObject *value,
13491420
/* Calculate how much memory we'll need. */
13501421
n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
13511422
i_re, i_re + n_re_digits, n_re_remainder,
1352-
re_has_decimal, &locale, &tmp_format,
1353-
&maxchar);
1423+
n_re_frac, re_has_decimal, &locale,
1424+
&tmp_format, &maxchar);
13541425
if (n_re_total == -1) {
13551426
goto done;
13561427
}
@@ -1362,8 +1433,8 @@ format_complex_internal(PyObject *value,
13621433
tmp_format.sign = '+';
13631434
n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
13641435
i_im, i_im + n_im_digits, n_im_remainder,
1365-
im_has_decimal, &locale, &tmp_format,
1366-
&maxchar);
1436+
n_im_frac, im_has_decimal, &locale,
1437+
&tmp_format, &maxchar);
13671438
if (n_im_total == -1) {
13681439
goto done;
13691440
}

0 commit comments

Comments
 (0)