Skip to content

Commit 89e1b1a

Browse files
committed
Issue 27080: PEP 515: add '_' formatting option.
1 parent 37d398e commit 89e1b1a

File tree

4 files changed

+93
-22
lines changed

4 files changed

+93
-22
lines changed

Doc/library/string.rst

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ non-empty format string typically modifies the result.
300300
The general form of a *standard format specifier* is:
301301

302302
.. productionlist:: sf
303-
format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][.`precision`][`type`]
303+
format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][_][.`precision`][`type`]
304304
fill: <any character>
305305
align: "<" | ">" | "=" | "^"
306306
sign: "+" | "-" | " "
@@ -378,6 +378,16 @@ instead.
378378
.. versionchanged:: 3.1
379379
Added the ``','`` option (see also :pep:`378`).
380380

381+
The ``'_'`` option signals the use of an underscore for a thousands
382+
separator for floating point presentation types and for integer
383+
presentation type ``'d'``. For integer presentation types ``'b'``,
384+
``'o'``, ``'x'``, and ``'X'``, underscores will be inserted every 4
385+
digits. For other presentation types, specifying this option is an
386+
error.
387+
388+
.. versionchanged:: 3.6
389+
Added the ``'_'`` option (see also :pep:`515`).
390+
381391
*width* is a decimal integer defining the minimum field width. If not
382392
specified, then the field width will be determined by the content.
383393

Lib/test/test_long.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,8 @@ def __lt__(self, other):
621621
def test__format__(self):
622622
self.assertEqual(format(123456789, 'd'), '123456789')
623623
self.assertEqual(format(123456789, 'd'), '123456789')
624+
self.assertEqual(format(123456789, ','), '123,456,789')
625+
self.assertEqual(format(123456789, '_'), '123_456_789')
624626

625627
# sign and aligning are interdependent
626628
self.assertEqual(format(1, "-"), '1')
@@ -649,8 +651,25 @@ def test__format__(self):
649651
self.assertEqual(format(int('be', 16), "X"), "BE")
650652
self.assertEqual(format(-int('be', 16), "x"), "-be")
651653
self.assertEqual(format(-int('be', 16), "X"), "-BE")
654+
self.assertRaises(ValueError, format, 1234567890, ',x')
655+
self.assertEqual(format(1234567890, '_x'), '4996_02d2')
656+
self.assertEqual(format(1234567890, '_X'), '4996_02D2')
652657

653658
# octal
659+
self.assertEqual(format(3, "o"), "3")
660+
self.assertEqual(format(-3, "o"), "-3")
661+
self.assertEqual(format(1234, "o"), "2322")
662+
self.assertEqual(format(-1234, "o"), "-2322")
663+
self.assertEqual(format(1234, "-o"), "2322")
664+
self.assertEqual(format(-1234, "-o"), "-2322")
665+
self.assertEqual(format(1234, " o"), " 2322")
666+
self.assertEqual(format(-1234, " o"), "-2322")
667+
self.assertEqual(format(1234, "+o"), "+2322")
668+
self.assertEqual(format(-1234, "+o"), "-2322")
669+
self.assertRaises(ValueError, format, 1234567890, ',o')
670+
self.assertEqual(format(1234567890, '_o'), '111_4540_1322')
671+
672+
# binary
654673
self.assertEqual(format(3, "b"), "11")
655674
self.assertEqual(format(-3, "b"), "-11")
656675
self.assertEqual(format(1234, "b"), "10011010010")
@@ -661,12 +680,21 @@ def test__format__(self):
661680
self.assertEqual(format(-1234, " b"), "-10011010010")
662681
self.assertEqual(format(1234, "+b"), "+10011010010")
663682
self.assertEqual(format(-1234, "+b"), "-10011010010")
683+
self.assertRaises(ValueError, format, 1234567890, ',b')
684+
self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')
664685

665686
# make sure these are errors
666687
self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed
688+
self.assertRaises(ValueError, format, 3, "_c") # underscore,
689+
self.assertRaises(ValueError, format, 3, ",c") # comma, and
667690
self.assertRaises(ValueError, format, 3, "+c") # sign not allowed
668691
# with 'c'
669692

693+
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,')
694+
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_')
695+
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,d')
696+
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_d')
697+
670698
# ensure that only int and float type specifiers work
671699
for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
672700
[chr(x) for x in range(ord('A'), ord('Z')+1)]):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #27080: Implement formatting support for PEP 515. Initial patch
14+
by Chris Angelico.
15+
1316
- Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput.
1417
Patch by Jason Fried.
1518

Python/formatter_unicode.c

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_type)
3232
{
3333
if (presentation_type > 32 && presentation_type < 128)
3434
PyErr_Format(PyExc_ValueError,
35-
"Cannot specify ',' with '%c'.",
35+
"Cannot specify ',' or '_' with '%c'.",
3636
(char)presentation_type);
3737
else
3838
PyErr_Format(PyExc_ValueError,
39-
"Cannot specify ',' with '\\x%x'.",
39+
"Cannot specify ',' or '_' with '\\x%x'.",
4040
(unsigned int)presentation_type);
4141
}
4242

43+
static void
44+
invalid_comma_and_underscore()
45+
{
46+
PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
47+
}
48+
4349
/*
4450
get_integer consumes 0 or more decimal digit characters from an
4551
input string, updates *result with the corresponding positive
@@ -108,6 +114,12 @@ is_sign_element(Py_UCS4 c)
108114
}
109115
}
110116

117+
/* Locale type codes. LT_NO_LOCALE must be zero. */
118+
#define LT_NO_LOCALE 0
119+
#define LT_DEFAULT_LOCALE 1
120+
#define LT_UNDERSCORE_LOCALE 2
121+
#define LT_UNDER_FOUR_LOCALE 3
122+
#define LT_CURRENT_LOCALE 4
111123

112124
typedef struct {
113125
Py_UCS4 fill_char;
@@ -223,9 +235,22 @@ parse_internal_render_format_spec(PyObject *format_spec,
223235

224236
/* Comma signifies add thousands separators */
225237
if (end-pos && READ_spec(pos) == ',') {
226-
format->thousands_separators = 1;
238+
format->thousands_separators = LT_DEFAULT_LOCALE;
227239
++pos;
228240
}
241+
/* Underscore signifies add thousands separators */
242+
if (end-pos && READ_spec(pos) == '_') {
243+
if (format->thousands_separators != 0) {
244+
invalid_comma_and_underscore();
245+
return 0;
246+
}
247+
format->thousands_separators = LT_UNDERSCORE_LOCALE;
248+
++pos;
249+
}
250+
if (end-pos && READ_spec(pos) == ',') {
251+
invalid_comma_and_underscore();
252+
return 0;
253+
}
229254

230255
/* Parse field precision */
231256
if (end-pos && READ_spec(pos) == '.') {
@@ -275,6 +300,16 @@ parse_internal_render_format_spec(PyObject *format_spec,
275300
case '\0':
276301
/* These are allowed. See PEP 378.*/
277302
break;
303+
case 'b':
304+
case 'o':
305+
case 'x':
306+
case 'X':
307+
/* Underscores are allowed in bin/oct/hex. See PEP 515. */
308+
if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
309+
/* Every four digits, not every three, in bin/oct/hex. */
310+
format->thousands_separators = LT_UNDER_FOUR_LOCALE;
311+
break;
312+
}
278313
default:
279314
invalid_comma_type(format->type);
280315
return 0;
@@ -351,11 +386,6 @@ fill_padding(_PyUnicodeWriter *writer,
351386
/*********** common routines for numeric formatting *********************/
352387
/************************************************************************/
353388

354-
/* Locale type codes. */
355-
#define LT_CURRENT_LOCALE 0
356-
#define LT_DEFAULT_LOCALE 1
357-
#define LT_NO_LOCALE 2
358-
359389
/* Locale info needed for formatting integers and the part of floats
360390
before and including the decimal. Note that locales only support
361391
8-bit chars, not unicode. */
@@ -667,8 +697,8 @@ static const char no_grouping[1] = {CHAR_MAX};
667697

668698
/* Find the decimal point character(s?), thousands_separator(s?), and
669699
grouping description, either for the current locale if type is
670-
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
671-
none if LT_NO_LOCALE. */
700+
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
701+
LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
672702
static int
673703
get_locale_info(int type, LocaleInfo *locale_info)
674704
{
@@ -691,16 +721,22 @@ get_locale_info(int type, LocaleInfo *locale_info)
691721
break;
692722
}
693723
case LT_DEFAULT_LOCALE:
724+
case LT_UNDERSCORE_LOCALE:
725+
case LT_UNDER_FOUR_LOCALE:
694726
locale_info->decimal_point = PyUnicode_FromOrdinal('.');
695-
locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
727+
locale_info->thousands_sep = PyUnicode_FromOrdinal(
728+
type == LT_DEFAULT_LOCALE ? ',' : '_');
696729
if (!locale_info->decimal_point || !locale_info->thousands_sep) {
697730
Py_XDECREF(locale_info->decimal_point);
698731
Py_XDECREF(locale_info->thousands_sep);
699732
return -1;
700733
}
701-
locale_info->grouping = "\3"; /* Group every 3 characters. The
734+
if (type != LT_UNDER_FOUR_LOCALE)
735+
locale_info->grouping = "\3"; /* Group every 3 characters. The
702736
(implicit) trailing 0 means repeat
703737
infinitely. */
738+
else
739+
locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
704740
break;
705741
case LT_NO_LOCALE:
706742
locale_info->decimal_point = PyUnicode_FromOrdinal('.');
@@ -952,9 +988,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
952988

953989
/* Determine the grouping, separator, and decimal point, if any. */
954990
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
955-
(format->thousands_separators ?
956-
LT_DEFAULT_LOCALE :
957-
LT_NO_LOCALE),
991+
format->thousands_separators,
958992
&locale) == -1)
959993
goto done;
960994

@@ -1099,9 +1133,7 @@ format_float_internal(PyObject *value,
10991133

11001134
/* Determine the grouping, separator, and decimal point, if any. */
11011135
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1102-
(format->thousands_separators ?
1103-
LT_DEFAULT_LOCALE :
1104-
LT_NO_LOCALE),
1136+
format->thousands_separators,
11051137
&locale) == -1)
11061138
goto done;
11071139

@@ -1277,9 +1309,7 @@ format_complex_internal(PyObject *value,
12771309

12781310
/* Determine the grouping, separator, and decimal point, if any. */
12791311
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1280-
(format->thousands_separators ?
1281-
LT_DEFAULT_LOCALE :
1282-
LT_NO_LOCALE),
1312+
format->thousands_separators,
12831313
&locale) == -1)
12841314
goto done;
12851315

0 commit comments

Comments
 (0)