Skip to content

Commit bd16edd

Browse files
committed
Refactor to remove duplicated nan/inf parsing code in
pystrtod.c, floatobject.c and dtoa.c.
1 parent 4db6ff6 commit bd16edd

5 files changed

Lines changed: 85 additions & 125 deletions

File tree

Include/pystrtod.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
2121
int flags,
2222
int *type);
2323

24+
PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr);
25+
2426

2527
/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
2628
#define Py_DTSF_SIGN 0x01 /* always add the sign */

Lib/test/test_float.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,11 @@ def test_inf_from_str(self):
532532
self.assertRaises(ValueError, float, "-INFI")
533533
self.assertRaises(ValueError, float, "infinitys")
534534

535+
self.assertRaises(ValueError, float, "++Inf")
536+
self.assertRaises(ValueError, float, "-+inf")
537+
self.assertRaises(ValueError, float, "+-infinity")
538+
self.assertRaises(ValueError, float, "--Infinity")
539+
535540
def test_inf_as_str(self):
536541
self.assertEqual(repr(1e300 * 1e300), "inf")
537542
self.assertEqual(repr(-1e300 * 1e300), "-inf")
@@ -563,6 +568,11 @@ def test_nan_from_str(self):
563568
self.assertRaises(ValueError, float, "+na")
564569
self.assertRaises(ValueError, float, "-na")
565570

571+
self.assertRaises(ValueError, float, "++nan")
572+
self.assertRaises(ValueError, float, "-+NAN")
573+
self.assertRaises(ValueError, float, "+-NaN")
574+
self.assertRaises(ValueError, float, "--nAn")
575+
566576
def test_nan_as_str(self):
567577
self.assertEqual(repr(1e300 * 1e300 * 0), "nan")
568578
self.assertEqual(repr(-1e300 * 1e300 * 0), "nan")

Objects/floatobject.c

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,20 +1157,6 @@ Return a hexadecimal representation of a floating-point number.\n\
11571157
>>> 3.14159.hex()\n\
11581158
'0x1.921f9f01b866ep+1'");
11591159

1160-
/* Case-insensitive locale-independent string match used for nan and inf
1161-
detection. t should be lower-case and null-terminated. Return a nonzero
1162-
result if the first strlen(t) characters of s match t and 0 otherwise. */
1163-
1164-
static int
1165-
case_insensitive_match(const char *s, const char *t)
1166-
{
1167-
while(*t && Py_TOLOWER(*s) == *t) {
1168-
s++;
1169-
t++;
1170-
}
1171-
return *t ? 0 : 1;
1172-
}
1173-
11741160
/* Convert a hexadecimal string to a float. */
11751161

11761162
static PyObject *
@@ -1180,7 +1166,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
11801166
double x;
11811167
long exp, top_exp, lsb, key_digit;
11821168
char *s, *coeff_start, *s_store, *coeff_end, *exp_start, *s_end;
1183-
int half_eps, digit, round_up, sign=1;
1169+
int half_eps, digit, round_up, negate=0;
11841170
Py_ssize_t length, ndigits, fdigits, i;
11851171

11861172
/*
@@ -1237,33 +1223,24 @@ float_fromhex(PyObject *cls, PyObject *arg)
12371223
* Parse the string *
12381224
********************/
12391225

1240-
/* leading whitespace and optional sign */
1226+
/* leading whitespace */
12411227
while (Py_ISSPACE(*s))
12421228
s++;
1243-
if (*s == '-') {
1244-
s++;
1245-
sign = -1;
1246-
}
1247-
else if (*s == '+')
1248-
s++;
12491229

12501230
/* infinities and nans */
1251-
if (*s == 'i' || *s == 'I') {
1252-
if (!case_insensitive_match(s+1, "nf"))
1253-
goto parse_error;
1254-
s += 3;
1255-
x = Py_HUGE_VAL;
1256-
if (case_insensitive_match(s, "inity"))
1257-
s += 5;
1231+
x = _Py_parse_inf_or_nan(s, &coeff_end);
1232+
if (coeff_end != s) {
1233+
s = coeff_end;
12581234
goto finished;
12591235
}
1260-
if (*s == 'n' || *s == 'N') {
1261-
if (!case_insensitive_match(s+1, "an"))
1262-
goto parse_error;
1263-
s += 3;
1264-
x = Py_NAN;
1265-
goto finished;
1236+
1237+
/* optional sign */
1238+
if (*s == '-') {
1239+
s++;
1240+
negate = 1;
12661241
}
1242+
else if (*s == '+')
1243+
s++;
12671244

12681245
/* [0x] */
12691246
s_store = s;
@@ -1400,7 +1377,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
14001377
s++;
14011378
if (s != s_end)
14021379
goto parse_error;
1403-
result_as_float = Py_BuildValue("(d)", sign * x);
1380+
result_as_float = Py_BuildValue("(d)", negate ? -x : x);
14041381
if (result_as_float == NULL)
14051382
return NULL;
14061383
result = PyObject_CallObject(cls, result_as_float);

Python/dtoa.c

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -264,15 +264,6 @@ extern int strtod_diglim;
264264
#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
265265
#define Big1 0xffffffff
266266

267-
#ifndef NAN_WORD0
268-
#define NAN_WORD0 0x7ff80000
269-
#endif
270-
271-
#ifndef NAN_WORD1
272-
#define NAN_WORD1 0
273-
#endif
274-
275-
276267
/* struct BCinfo is used to pass information from _Py_dg_strtod to bigcomp */
277268

278269
typedef struct BCinfo BCinfo;
@@ -1026,25 +1017,6 @@ static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
10261017
#define Scale_Bit 0x10
10271018
#define n_bigtens 5
10281019

1029-
/* case insensitive string match, for recognising 'inf[inity]' and
1030-
'nan' strings. */
1031-
1032-
static int
1033-
match(const char **sp, char *t)
1034-
{
1035-
int c, d;
1036-
const char *s = *sp;
1037-
1038-
while((d = *t++)) {
1039-
if ((c = *++s) >= 'A' && c <= 'Z')
1040-
c += 'a' - 'A';
1041-
if (c != d)
1042-
return 0;
1043-
}
1044-
*sp = s + 1;
1045-
return 1;
1046-
}
1047-
10481020
#define ULbits 32
10491021
#define kshift 5
10501022
#define kmask 31
@@ -1459,28 +1431,6 @@ _Py_dg_strtod(const char *s00, char **se)
14591431
}
14601432
if (!nd) {
14611433
if (!nz && !nz0) {
1462-
/* Check for Nan and Infinity */
1463-
if (!bc.dplen)
1464-
switch(c) {
1465-
case 'i':
1466-
case 'I':
1467-
if (match(&s,"nf")) {
1468-
--s;
1469-
if (!match(&s,"inity"))
1470-
++s;
1471-
word0(&rv) = 0x7ff00000;
1472-
word1(&rv) = 0;
1473-
goto ret;
1474-
}
1475-
break;
1476-
case 'n':
1477-
case 'N':
1478-
if (match(&s, "an")) {
1479-
word0(&rv) = NAN_WORD0;
1480-
word1(&rv) = NAN_WORD1;
1481-
goto ret;
1482-
}
1483-
}
14841434
ret0:
14851435
s = s00;
14861436
sign = 0;

Python/pystrtod.c

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,57 @@
33
#include <Python.h>
44
#include <locale.h>
55

6+
/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
7+
"infinity", with an optional leading sign of "+" or "-". On success,
8+
return the NaN or Infinity as a double and set *endptr to point just beyond
9+
the successfully parsed portion of the string. On failure, return -1.0 and
10+
set *endptr to point to the start of the string. */
11+
12+
static int
13+
case_insensitive_match(const char *s, const char *t)
14+
{
15+
while(*t && Py_TOLOWER(*s) == *t) {
16+
s++;
17+
t++;
18+
}
19+
return *t ? 0 : 1;
20+
}
21+
22+
double
23+
_Py_parse_inf_or_nan(const char *p, char **endptr)
24+
{
25+
double retval;
26+
const char *s;
27+
int negate = 0;
28+
29+
s = p;
30+
if (*s == '-') {
31+
negate = 1;
32+
s++;
33+
}
34+
else if (*s == '+') {
35+
s++;
36+
}
37+
if (case_insensitive_match(s, "inf")) {
38+
s += 3;
39+
if (case_insensitive_match(s, "inity"))
40+
s += 5;
41+
retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
42+
}
43+
#ifdef Py_NAN
44+
else if (case_insensitive_match(s, "nan")) {
45+
s += 3;
46+
retval = negate ? -Py_NAN : Py_NAN;
47+
}
48+
#endif
49+
else {
50+
s = p;
51+
retval = -1.0;
52+
}
53+
*endptr = (char *)s;
54+
return retval;
55+
}
56+
657
/**
758
* PyOS_ascii_strtod:
859
* @nptr: the string to convert to a numeric value.
@@ -49,6 +100,10 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
49100
result = _Py_dg_strtod(nptr, endptr);
50101
_Py_SET_53BIT_PRECISION_END;
51102

103+
if (*endptr == nptr)
104+
/* string might represent and inf or nan */
105+
result = _Py_parse_inf_or_nan(nptr, endptr);
106+
52107
return result;
53108

54109
}
@@ -63,19 +118,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
63118
correctly rounded results.
64119
*/
65120

66-
/* Case-insensitive string match used for nan and inf detection; t should be
67-
lower-case. Returns 1 for a successful match, 0 otherwise. */
68-
69-
static int
70-
case_insensitive_match(const char *s, const char *t)
71-
{
72-
while(*t && Py_TOLOWER(*s) == *t) {
73-
s++;
74-
t++;
75-
}
76-
return *t ? 0 : 1;
77-
}
78-
79121
double
80122
_PyOS_ascii_strtod(const char *nptr, char **endptr)
81123
{
@@ -101,6 +143,11 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
101143

102144
decimal_point_pos = NULL;
103145

146+
/* Parse infinities and nans */
147+
val = _Py_parse_inf_or_nan(nptr, endptr);
148+
if (*endptr != nptr)
149+
return val;
150+
104151
/* Set errno to zero, so that we can distinguish zero results
105152
and underflows */
106153
errno = 0;
@@ -118,31 +165,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
118165
p++;
119166
}
120167

121-
/* Parse infinities and nans */
122-
if (*p == 'i' || *p == 'I') {
123-
if (case_insensitive_match(p+1, "nf")) {
124-
val = Py_HUGE_VAL;
125-
if (case_insensitive_match(p+3, "inity"))
126-
fail_pos = (char *)p+8;
127-
else
128-
fail_pos = (char *)p+3;
129-
goto got_val;
130-
}
131-
else
132-
goto invalid_string;
133-
}
134-
#ifdef Py_NAN
135-
if (*p == 'n' || *p == 'N') {
136-
if (case_insensitive_match(p+1, "an")) {
137-
val = Py_NAN;
138-
fail_pos = (char *)p+3;
139-
goto got_val;
140-
}
141-
else
142-
goto invalid_string;
143-
}
144-
#endif
145-
146168
/* Some platform strtods accept hex floats; Python shouldn't (at the
147169
moment), so we check explicitly for strings starting with '0x'. */
148170
if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
@@ -231,7 +253,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
231253
if (fail_pos == digits_pos)
232254
goto invalid_string;
233255

234-
got_val:
235256
if (negate && fail_pos != nptr)
236257
val = -val;
237258
*endptr = fail_pos;

0 commit comments

Comments
 (0)