Skip to content

Commit c4029e5

Browse files
committed
Add string formatting support for longlong and mpz.
1 parent 6827f9f commit c4029e5

11 files changed

Lines changed: 302 additions & 94 deletions

File tree

py/mpz.c

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,34 +1216,51 @@ uint mpz_as_str_size(const mpz_t *i, uint base) {
12161216
return i->len * DIG_SIZE / log_base2_floor[base] + 2 + 1; // +1 for null byte termination
12171217
}
12181218

1219+
uint mpz_as_str_size_formatted(const mpz_t *i, uint base, const char *prefix, char comma) {
1220+
if (base < 2 || base > 32) {
1221+
return 0;
1222+
}
1223+
1224+
uint num_digits = i->len * DIG_SIZE / log_base2_floor[base] + 1;
1225+
uint num_commas = comma ? num_digits / 3: 0;
1226+
uint prefix_len = prefix ? strlen(prefix) : 0;
1227+
1228+
return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte
1229+
}
1230+
12191231
char *mpz_as_str(const mpz_t *i, uint base) {
12201232
char *s = m_new(char, mpz_as_str_size(i, base));
1221-
mpz_as_str_inpl(i, base, s);
1233+
mpz_as_str_inpl(i, base, "", 'a', 0, s);
12221234
return s;
12231235
}
12241236

12251237
// assumes enough space as calculated by mpz_as_str_size
12261238
// returns length of string, not including null byte
1227-
uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
1239+
uint mpz_as_str_inpl(const mpz_t *i, uint base, const char *prefix, char base_char, char comma, char *str) {
12281240
if (str == NULL || base < 2 || base > 32) {
12291241
str[0] = 0;
12301242
return 0;
12311243
}
12321244

12331245
uint ilen = i->len;
12341246

1247+
char *s = str;
12351248
if (ilen == 0) {
1236-
str[0] = '0';
1237-
str[1] = 0;
1238-
return 1;
1249+
if (prefix) {
1250+
while (*prefix)
1251+
*s++ = *prefix++;
1252+
}
1253+
*s++ = '0';
1254+
*s = '\0';
1255+
return s - str;
12391256
}
12401257

12411258
// make a copy of mpz digits
12421259
mpz_dig_t *dig = m_new(mpz_dig_t, ilen);
12431260
memcpy(dig, i->dig, ilen * sizeof(mpz_dig_t));
12441261

12451262
// convert
1246-
char *s = str;
1263+
char *last_comma = str;
12471264
bool done;
12481265
do {
12491266
mpz_dig_t *d = dig + ilen;
@@ -1259,7 +1276,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
12591276
// convert to character
12601277
a += '0';
12611278
if (a > '9') {
1262-
a += 'a' - '9' - 1;
1279+
a += base_char - '9' - 1;
12631280
}
12641281
*s++ = a;
12651282

@@ -1271,8 +1288,19 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
12711288
break;
12721289
}
12731290
}
1274-
} while (!done);
1291+
if (comma && (s - last_comma) == 3) {
1292+
*s++ = comma;
1293+
last_comma = s;
1294+
}
1295+
}
1296+
while (!done);
12751297

1298+
if (prefix) {
1299+
const char *p = &prefix[strlen(prefix)];
1300+
while (p > prefix) {
1301+
*s++ = *--p;
1302+
}
1303+
}
12761304
if (i->neg != 0) {
12771305
*s++ = '-';
12781306
}
@@ -1284,7 +1312,7 @@ uint mpz_as_str_inpl(const mpz_t *i, uint base, char *str) {
12841312
*v = temp;
12851313
}
12861314

1287-
s[0] = 0; // null termination
1315+
*s = '\0'; // null termination
12881316

12891317
return s - str;
12901318
}

py/mpz.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,6 @@ bool mpz_as_int_checked(const mpz_t *z, machine_int_t *value);
7676
mp_float_t mpz_as_float(const mpz_t *z);
7777
#endif
7878
uint mpz_as_str_size(const mpz_t *z, uint base);
79+
uint mpz_as_str_size_formatted(const mpz_t *i, uint base, const char *prefix, char comma);
7980
char *mpz_as_str(const mpz_t *z, uint base);
80-
uint mpz_as_str_inpl(const mpz_t *z, uint base, char *str);
81+
uint mpz_as_str_inpl(const mpz_t *z, uint base, const char *prefix, char base_char, char comma, char *str);

py/objint.c

Lines changed: 120 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <stdlib.h>
22
#include <stdint.h>
33
#include <assert.h>
4+
#include <string.h>
45

56
#include "nlr.h"
67
#include "misc.h"
@@ -53,14 +54,129 @@ STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, co
5354
}
5455
}
5556

56-
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
57-
5857
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
59-
if (MP_OBJ_IS_SMALL_INT(self_in)) {
60-
print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
58+
// The size of this buffer is rather arbitrary. If it's not large
59+
// enough, a dynamic one will be allocated.
60+
char stack_buf[sizeof(machine_int_t) * 4];
61+
char *buf = stack_buf;
62+
int buf_size = sizeof(stack_buf);
63+
int fmt_size;
64+
65+
char *str = mp_obj_int_formatted(&buf, &buf_size, &fmt_size, self_in, 10, NULL, '\0', '\0');
66+
print(env, "%s", str);
67+
68+
if (buf != stack_buf) {
69+
m_free(buf, buf_size);
70+
}
71+
}
72+
73+
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE || MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
74+
75+
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
76+
typedef mp_longint_impl_t fmt_int_t;
77+
#else
78+
typedef mp_small_int_t fmt_int_t;
79+
#endif
80+
81+
static const uint log_base2_floor[] = {
82+
0,
83+
0, 1, 1, 2,
84+
2, 2, 2, 3,
85+
3, 3, 3, 3,
86+
3, 3, 3, 4,
87+
4, 4, 4, 4,
88+
4, 4, 4, 4,
89+
4, 4, 4, 4,
90+
4, 4, 4, 5
91+
};
92+
93+
uint int_as_str_size_formatted(uint base, const char *prefix, char comma) {
94+
if (base < 2 || base > 32) {
95+
return 0;
6196
}
97+
98+
uint num_digits = sizeof(fmt_int_t) * 8 / log_base2_floor[base] + 1;
99+
uint num_commas = comma ? num_digits / 3: 0;
100+
uint prefix_len = prefix ? strlen(prefix) : 0;
101+
return num_digits + num_commas + prefix_len + 2; // +1 for sign, +1 for null byte
62102
}
63103

104+
// This routine expects you to pass in a buffer and size (in *buf and buf_size).
105+
// If, for some reason, this buffer is too small, then it will allocate a
106+
// buffer and return the allocated buffer and size in *buf and *buf_size. It
107+
// is the callers responsibility to free this allocated buffer.
108+
//
109+
// The resulting formatted string will be returned from this function and the
110+
// formatted size will be in *fmt_size.
111+
char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in,
112+
int base, const char *prefix, char base_char, char comma) {
113+
if (!MP_OBJ_IS_INT(self_in)) {
114+
buf[0] = '\0';
115+
*fmt_size = 0;
116+
return *buf;
117+
}
118+
fmt_int_t num = mp_obj_get_int(self_in);
119+
char sign = '\0';
120+
if (num < 0) {
121+
num = -num;
122+
sign = '-';
123+
}
124+
125+
uint needed_size = int_as_str_size_formatted(base, prefix, comma);
126+
if (needed_size > *buf_size) {
127+
*buf = m_new(char, needed_size);
128+
*buf_size = needed_size;
129+
}
130+
char *str = *buf;
131+
132+
char *b = str + needed_size;
133+
*(--b) = '\0';
134+
char *last_comma = b;
135+
136+
if (num == 0) {
137+
*(--b) = '0';
138+
} else {
139+
do {
140+
int c = num % base;
141+
num /= base;
142+
if (c >= 10) {
143+
c += base_char - 10;
144+
} else {
145+
c += '0';
146+
}
147+
*(--b) = c;
148+
if (comma && num != 0 && b > str && (last_comma - b) == 3) {
149+
*(--b) = comma;
150+
last_comma = b;
151+
}
152+
}
153+
while (b > str && num != 0);
154+
}
155+
if (prefix) {
156+
size_t prefix_len = strlen(prefix);
157+
char *p = b - prefix_len;
158+
if (p > str) {
159+
b = p;
160+
while (*prefix) {
161+
*p++ = *prefix++;
162+
}
163+
}
164+
}
165+
if (sign && b > str) {
166+
*(--b) = sign;
167+
}
168+
*fmt_size = *buf + needed_size - b - 1;
169+
170+
return b;
171+
}
172+
173+
bool mp_obj_int_is_positive(mp_obj_t self_in) {
174+
return mp_obj_get_int(self_in) >= 0;
175+
}
176+
#endif // LONGLONG or NONE
177+
178+
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
179+
64180
// This is called for operations on SMALL_INT that are not handled by mp_unary_op
65181
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) {
66182
return MP_OBJ_NULL;

py/objint.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ typedef struct _mp_obj_int_t {
88
} mp_obj_int_t;
99

1010
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind);
11+
char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in,
12+
int base, const char *prefix, char base_char, char comma);
13+
bool mp_obj_int_is_positive(mp_obj_t self_in);
1114
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in);
1215
mp_obj_t mp_obj_int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in);
1316
mp_obj_t mp_obj_int_binary_op_extra_cases(int op, mp_obj_t lhs_in, mp_obj_t rhs_in);

py/objint_longlong.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <stdlib.h>
22
#include <stdint.h>
3+
#include <string.h>
34

45
#include "nlr.h"
56
#include "misc.h"
@@ -21,15 +22,6 @@
2122
#define SUFFIX ""
2223
#endif
2324

24-
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
25-
if (MP_OBJ_IS_SMALL_INT(self_in)) {
26-
print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
27-
} else {
28-
mp_obj_int_t *self = self_in;
29-
print(env, "%lld" SUFFIX, self->val);
30-
}
31-
}
32-
3325
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) {
3426
mp_obj_int_t *o = o_in;
3527
switch (op) {

py/objint_mpz.c

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,47 @@ STATIC mp_obj_int_t *mp_obj_int_new_mpz(void) {
2222
return o;
2323
}
2424

25-
void mp_obj_int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
25+
// This routine expects you to pass in a buffer and size (in *buf and buf_size).
26+
// If, for some reason, this buffer is too small, then it will allocate a
27+
// buffer and return the allocated buffer and size in *buf and *buf_size. It
28+
// is the callers responsibility to free this allocated buffer.
29+
//
30+
// The resulting formatted string will be returned from this function and the
31+
// formatted size will be in *fmt_size.
32+
char *mp_obj_int_formatted(char **buf, int *buf_size, int *fmt_size, mp_obj_t self_in,
33+
int base, const char *prefix, char base_char, char comma) {
34+
mpz_t small_mpz;
35+
mpz_t *mpz;
36+
mpz_dig_t small_dig[(sizeof(mp_small_int_t) * 8 + MPZ_DIG_SIZE - 1) / MPZ_DIG_SIZE];
37+
2638
if (MP_OBJ_IS_SMALL_INT(self_in)) {
27-
print(env, INT_FMT, MP_OBJ_SMALL_INT_VALUE(self_in));
39+
mpz_init_fixed_from_int(&small_mpz, small_dig,
40+
sizeof(small_dig) / sizeof(small_dig[0]),
41+
MP_OBJ_SMALL_INT_VALUE(self_in));
42+
mpz = &small_mpz;
2843
} else {
29-
// TODO would rather not allocate memory to print...
3044
mp_obj_int_t *self = self_in;
31-
char *str = mpz_as_str(&self->mpz, 10);
32-
print(env, "%s", str);
33-
m_free(str, 0);
45+
mpz = &self->mpz;
46+
}
47+
48+
uint needed_size = mpz_as_str_size_formatted(mpz, base, prefix, comma);
49+
if (needed_size > *buf_size) {
50+
*buf = m_new(char, needed_size);
51+
*buf_size = needed_size;
52+
}
53+
char *str = *buf;
54+
55+
*fmt_size = mpz_as_str_inpl(mpz, base, prefix, base_char, comma, str);
56+
57+
return str;
58+
}
59+
60+
bool mp_obj_int_is_positive(mp_obj_t self_in) {
61+
if (MP_OBJ_IS_SMALL_INT(self_in)) {
62+
return MP_OBJ_SMALL_INT_VALUE(self_in) >= 0;
3463
}
64+
mp_obj_int_t *self = self_in;
65+
return !self->mpz.neg;
3566
}
3667

3768
mp_obj_t mp_obj_int_unary_op(int op, mp_obj_t o_in) {

0 commit comments

Comments
 (0)