Skip to content

Commit a67548c

Browse files
author
eric.smith
committed
Addresses issue 2802: 'n' formatting for integers.
Adds 'n' as a format specifier for integers, to mirror the same specifier which is already available for floats. 'n' is the same as 'd', but inserts the current locale-specific thousands grouping. I added this as a stringlib function, but it's only used by str type, not unicode. This is because of an implementation detail in unicode.format(), which does its own str->unicode conversion. But the unicode version will be needed in 3.0, and it may be needed by other code eventually in 2.6 (maybe decimal?), so I left it as a stringlib implementation. As long as the unicode version isn't instantiated, there's no overhead for this. git-svn-id: http://svn.python.org/projects/python/trunk@63078 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 801485f commit a67548c

9 files changed

Lines changed: 184 additions & 52 deletions

File tree

Include/stringobject.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,17 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
176176
(only possible for 0-terminated
177177
strings) */
178178
);
179-
179+
180+
/* Using the current locale, insert the thousands grouping
181+
into the string pointed to by buffer. For the argument descriptions,
182+
see Objects/stringlib/localeutil.h */
183+
184+
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
185+
Py_ssize_t len,
186+
char *plast,
187+
Py_ssize_t buf_size,
188+
Py_ssize_t *count,
189+
int append_zero_char);
180190

181191
#ifdef __cplusplus
182192
}

Lib/test/test_types.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def test(i, format_spec, result):
377377

378378
# ensure that float type specifiers work; format converts
379379
# the int to a float
380-
for format_spec in 'eEfFgGn%':
380+
for format_spec in 'eEfFgG%':
381381
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
382382
self.assertEqual(value.__format__(format_spec),
383383
float(value).__format__(format_spec))
@@ -472,7 +472,7 @@ def test(i, format_spec, result):
472472

473473
# ensure that float type specifiers work; format converts
474474
# the long to a float
475-
for format_spec in 'eEfFgGn%':
475+
for format_spec in 'eEfFgG%':
476476
for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
477477
self.assertEqual(value.__format__(format_spec),
478478
float(value).__format__(format_spec))
@@ -486,6 +486,17 @@ def test_float__format__locale(self):
486486
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
487487
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
488488

489+
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
490+
def test_int__format__locale(self):
491+
# test locale support for __format__ code 'n' for integers
492+
493+
x = 123456789012345678901234567890
494+
for i in range(0, 30):
495+
self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
496+
497+
# move to the next integer to test
498+
x = x // 10
499+
489500
def test_float__format__(self):
490501
# these should be rewritten to use both format(x, spec) and
491502
# x.__format__(spec)

Makefile.pre.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,8 @@ STRINGLIB_HEADERS= \
549549
$(srcdir)/Objects/stringlib/stringdefs.h \
550550
$(srcdir)/Objects/stringlib/string_format.h \
551551
$(srcdir)/Objects/stringlib/transmogrify.h \
552-
$(srcdir)/Objects/stringlib/unicodedefs.h
552+
$(srcdir)/Objects/stringlib/unicodedefs.h \
553+
$(srcdir)/Objects/stringlib/localeutil.h
553554

554555
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
555556
$(STRINGLIB_HEADERS)

Objects/stringlib/formatter.h

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
453453
Py_ssize_t n_digits; /* count of digits need from the computed
454454
string */
455455
Py_ssize_t n_leading_chars;
456+
Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
457+
allocate, used for 'n'
458+
formatting. */
456459
NumberFieldWidths spec;
457460
long x;
458461

@@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
523526
break;
524527
default: /* shouldn't be needed, but stops a compiler warning */
525528
case 'd':
529+
case 'n':
526530
base = 10;
527531
leading_chars_to_skip = 0;
528532
break;
@@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
555559
/* Calculate the widths of the various leading and trailing parts */
556560
calc_number_widths(&spec, sign, n_digits, format);
557561

562+
if (format->type == 'n')
563+
/* Compute how many additional chars we need to allocate
564+
to hold the thousands grouping. */
565+
STRINGLIB_GROUPING(pnumeric_chars, n_digits,
566+
pnumeric_chars+n_digits,
567+
0, &n_grouping_chars, 0);
568+
558569
/* Allocate a new string to hold the result */
559-
result = STRINGLIB_NEW(NULL, spec.n_total);
570+
result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
560571
if (!result)
561572
goto done;
562573
p = STRINGLIB_STR(result);
@@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
567578
pnumeric_chars,
568579
n_digits * sizeof(STRINGLIB_CHAR));
569580

570-
/* if X, convert to uppercase */
581+
/* If type is 'X', convert to uppercase */
571582
if (format->type == 'X') {
572583
Py_ssize_t t;
573584
for (t = 0; t < n_digits; ++t)
574585
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
575586
}
576587

588+
/* Insert the grouping, if any, after the uppercasing of 'X', so we can
589+
ensure that grouping chars won't be affeted. */
590+
if (n_grouping_chars && format->type == 'n') {
591+
/* We know this can't fail, since we've already
592+
reserved enough space. */
593+
STRINGLIB_CHAR *pstart = p + n_leading_chars;
594+
int r = STRINGLIB_GROUPING(pstart, n_digits,
595+
pstart + n_digits,
596+
spec.n_total+n_grouping_chars-n_leading_chars,
597+
NULL, 0);
598+
assert(r);
599+
}
600+
577601
/* Fill in the non-digit parts */
578602
fill_number(p, &spec, n_digits,
579603
format->fill_char == '\0' ? ' ' : format->fill_char);
@@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
841865
case 'o':
842866
case 'x':
843867
case 'X':
868+
case 'n':
844869
/* no type conversion needed, already an int (or long). do
845870
the formatting */
846871
result = format_int_or_long_internal(value, &format, tostring);
@@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
852877
case 'F':
853878
case 'g':
854879
case 'G':
855-
case 'n':
856880
case '%':
857881
/* convert to float */
858882
tmp = PyNumber_Float(value);

Objects/stringlib/localeutil.h

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/* stringlib: locale related helpers implementation */
2+
3+
#ifndef STRINGLIB_LOCALEUTIL_H
4+
#define STRINGLIB_LOCALEUTIL_H
5+
6+
#include <locale.h>
7+
8+
/**
9+
* _Py_InsertThousandsGrouping:
10+
* @buffer: A pointer to the start of a string.
11+
* @len: The length of the string.
12+
* @plast: A pointer to the end of of the digits in the string. This
13+
* may be before the end of the string (if the string contains
14+
* decimals, for example).
15+
* @buf_size: The maximum size of the buffer pointed to by buffer.
16+
* @count: If non-NULL, points to a variable that will receive the
17+
* number of characters we need to insert (and no formatting
18+
* will actually occur).
19+
* @append_zero_char: If non-zero, put a trailing zero at the end of
20+
* of the resulting string, if and only if we modified the
21+
* string.
22+
*
23+
* Inserts thousand grouping characters (as defined in the current
24+
* locale) into the string between buffer and plast. If count is
25+
* non-NULL, don't do any formatting, just count the number of
26+
* characters to insert. This is used by the caller to appropriately
27+
* resize the buffer, if needed.
28+
*
29+
* Return value: 0 on error, else 1. Note that no error can occur if
30+
* count is non-NULL.
31+
*
32+
* This name won't be used, the includer of this file should define
33+
* it to be the actual function name, based on unicode or string.
34+
**/
35+
int
36+
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
37+
Py_ssize_t len,
38+
STRINGLIB_CHAR *plast,
39+
Py_ssize_t buf_size,
40+
Py_ssize_t *count,
41+
int append_zero_char)
42+
{
43+
struct lconv *locale_data = localeconv();
44+
const char *grouping = locale_data->grouping;
45+
const char *thousands_sep = locale_data->thousands_sep;
46+
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
47+
STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */
48+
STRINGLIB_CHAR *pmax = buffer + buf_size; /* max of buffer */
49+
char current_grouping;
50+
51+
/* Initialize the character count, if we're just counting. */
52+
if (count)
53+
*count = 0;
54+
55+
/* Starting at plast and working right-to-left, keep track of
56+
what grouping needs to be added and insert that. */
57+
current_grouping = *grouping++;
58+
59+
/* If the first character is 0, perform no grouping at all. */
60+
if (current_grouping == 0)
61+
return 1;
62+
63+
while (plast - buffer > current_grouping) {
64+
/* Always leave buffer and pend valid at the end of this
65+
loop, since we might leave with a return statement. */
66+
67+
plast -= current_grouping;
68+
if (count) {
69+
/* We're only counting, not touching the memory. */
70+
*count += thousands_sep_len;
71+
}
72+
else {
73+
/* Do the formatting. */
74+
75+
/* Is there room to insert thousands_sep_len chars? */
76+
if (pmax - pend < thousands_sep_len)
77+
/* No room. */
78+
return 0;
79+
80+
/* Move the rest of the string down. */
81+
memmove(plast + thousands_sep_len,
82+
plast,
83+
(pend - plast) * sizeof(STRINGLIB_CHAR));
84+
/* Copy the thousands_sep chars into the buffer. */
85+
#if STRINGLIB_IS_UNICODE
86+
/* Convert from the char's of the thousands_sep from
87+
the locale into unicode. */
88+
{
89+
Py_ssize_t i;
90+
for (i = 0; i < thousands_sep_len; ++i)
91+
plast[i] = thousands_sep[i];
92+
}
93+
#else
94+
/* No conversion, just memcpy the thousands_sep. */
95+
memcpy(plast, thousands_sep, thousands_sep_len);
96+
#endif
97+
}
98+
99+
/* Adjust end pointer. */
100+
pend += thousands_sep_len;
101+
102+
/* Move to the next grouping character, unless we're
103+
repeating (which is designated by a grouping of 0). */
104+
if (*grouping != 0) {
105+
current_grouping = *grouping++;
106+
if (current_grouping == CHAR_MAX)
107+
/* We're done. */
108+
break;
109+
}
110+
}
111+
if (append_zero_char) {
112+
/* Append a zero character to mark the end of the string,
113+
if there's room. */
114+
if (pend - plast < 1)
115+
/* No room, error. */
116+
return 0;
117+
*pend = 0;
118+
}
119+
return 1;
120+
}
121+
#endif /* STRINGLIB_LOCALEUTIL_H */

Objects/stringlib/stringdefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@
2323
#define STRINGLIB_CHECK PyString_Check
2424
#define STRINGLIB_CMP memcmp
2525
#define STRINGLIB_TOSTR PyObject_Str
26+
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
2627

2728
#endif /* !STRINGLIB_STRINGDEFS_H */

Objects/stringlib/unicodedefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#define STRINGLIB_NEW PyUnicode_FromUnicode
2222
#define STRINGLIB_RESIZE PyUnicode_Resize
2323
#define STRINGLIB_CHECK PyUnicode_Check
24+
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
2425

2526
#if PY_VERSION_HEX < 0x03000000
2627
#define STRINGLIB_TOSTR PyObject_Unicode

Objects/stringobject.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,10 @@ PyString_AsStringAndSize(register PyObject *obj,
784784
#include "stringlib/find.h"
785785
#include "stringlib/partition.h"
786786

787+
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
788+
#include "stringlib/localeutil.h"
789+
790+
787791

788792
static int
789793
string_print(PyStringObject *op, FILE *fp, int flags)

Python/pystrtod.c

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size)
343343
Py_LOCAL_INLINE(int)
344344
add_thousands_grouping(char* buffer, size_t buf_size)
345345
{
346+
Py_ssize_t len = strlen(buffer);
346347
struct lconv *locale_data = localeconv();
347-
const char *grouping = locale_data->grouping;
348-
const char *thousands_sep = locale_data->thousands_sep;
349-
size_t thousands_sep_len = strlen(thousands_sep);
350348
const char *decimal_point = locale_data->decimal_point;
351-
char *pend = buffer + strlen(buffer); /* current end of buffer */
352-
char *pmax = buffer + buf_size; /* max of buffer */
353-
char current_grouping;
354349

355350
/* Find the decimal point, if any. We're only concerned
356351
about the characters to the left of the decimal when
@@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size)
364359
if (!p)
365360
/* No exponent and no decimal. Use the entire
366361
string. */
367-
p = pend;
362+
p = buffer + len;
368363
}
369364
/* At this point, p points just past the right-most character we
370365
want to format. We need to add the grouping string for the
371366
characters between buffer and p. */
372-
373-
/* Starting at p and working right-to-left, keep track of
374-
what grouping needs to be added and insert that. */
375-
current_grouping = *grouping++;
376-
377-
/* If the first character is 0, perform no grouping at all. */
378-
if (current_grouping == 0)
379-
return 1;
380-
381-
while (p - buffer > current_grouping) {
382-
/* Always leave buffer and pend valid at the end of this
383-
loop, since we might leave with a return statement. */
384-
385-
/* Is there room to insert thousands_sep_len chars?. */
386-
if (pmax - pend <= thousands_sep_len)
387-
/* No room. */
388-
return 0;
389-
390-
/* Move the rest of the string down. */
391-
p -= current_grouping;
392-
memmove(p + thousands_sep_len,
393-
p,
394-
pend - p + 1);
395-
/* Adjust end pointer. */
396-
pend += thousands_sep_len;
397-
/* Copy the thousands_sep chars into the buffer. */
398-
memcpy(p, thousands_sep, thousands_sep_len);
399-
400-
/* Move to the next grouping character, unless we're
401-
repeating (which is designated by a grouping of 0). */
402-
if (*grouping != 0) {
403-
current_grouping = *grouping++;
404-
if (current_grouping == CHAR_MAX)
405-
/* We're done. */
406-
return 1;
407-
}
408-
}
409-
return 1;
367+
return _PyString_InsertThousandsGrouping(buffer, len, p,
368+
buf_size, NULL, 1);
410369
}
411370

412371
/* see FORMATBUFLEN in unicodeobject.c */

0 commit comments

Comments
 (0)