Merged revisions 77410,77421,77450-77451 via svnmerge from

mdickinson · mdickinson · commit 81612e877870 · 2010-01-12T23:04:19.000Z
svn+ssh://pythondev@svn.python.org/python/trunk ........ r77410 | mark.dickinson | 2010-01-10 13:06:31 +0000 (Sun, 10 Jan 2010) | 1 line Remove unused BCinfo fields and an unused macro. ........ r77421 | mark.dickinson | 2010-01-11 17:15:13 +0000 (Mon, 11 Jan 2010) | 1 line Change a variable type to avoid signed overflow; replace repeated '19999' constant by a define. ........ r77450 | mark.dickinson | 2010-01-12 22:23:56 +0000 (Tue, 12 Jan 2010) | 4 lines Issue python#7632: Fix a problem with _Py_dg_strtod that could lead to crashes in debug builds, for certain long numeric strings corresponding to subnormal values. ........ r77451 | mark.dickinson | 2010-01-12 22:55:51 +0000 (Tue, 12 Jan 2010) | 2 lines Issue python#7632: Fix a bug in dtoa.c that could lead to incorrectly-rounded results. ........
diff --git a/Lib/test/floating_points.txt b/Lib/test/floating_points.txt
@@ -1019,3 +1019,10 @@
 +43723334984997307E-26
 +10182419849537963E-24
 -93501703572661982E-26
+
+# A value that caused a crash in debug builds for Python >= 2.7, 3.1
+# See http://bugs.python.org/issue7632
+2183167012312112312312.23538020374420446192e-370
+
+# Another value designed to test a corner case of Python's strtod code.
+0.99999999999999999999999999999999999999999e+23
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
@@ -7,6 +7,7 @@
 from math import isinf, isnan, copysign, ldexp
 import operator
 import random, fractions
+import re
 
 INF = float("inf")
 NAN = float("nan")
@@ -20,6 +21,74 @@
 test_dir = os.path.dirname(__file__) or os.curdir
 format_testfile = os.path.join(test_dir, 'formatfloat_testcases.txt')
 
+finite_decimal_parser = re.compile(r"""    # A numeric string consists of:
+    (?P<sign>[-+])?          # an optional sign, followed by
+    (?=\d|\.\d)              # a number with at least one digit
+    (?P<int>\d*)             # having a (possibly empty) integer part
+    (?:\.(?P<frac>\d*))?     # followed by an optional fractional part
+    (?:E(?P<exp>[-+]?\d+))?  # and an optional exponent
+    \Z
+""", re.VERBOSE | re.IGNORECASE | re.UNICODE).match
+
+# Pure Python version of correctly rounded string->float conversion.
+# Avoids any use of floating-point by returning the result as a hex string.
+def strtod(s, mant_dig=53, min_exp = -1021, max_exp = 1024):
+    """Convert a finite decimal string to a hex string representing an
+    IEEE 754 binary64 float.  Return 'inf' or '-inf' on overflow.
+    This function makes no use of floating-point arithmetic at any
+    stage."""
+
+    # parse string into a pair of integers 'a' and 'b' such that
+    # abs(decimal value) = a/b, and a boolean 'negative'.
+    m = finite_decimal_parser(s)
+    if m is None:
+        raise ValueError('invalid numeric string')
+    fraction = m.group('frac') or ''
+    intpart = int(m.group('int') + fraction)
+    exp = int(m.group('exp') or '0') - len(fraction)
+    negative = m.group('sign') == '-'
+    a, b = intpart*10**max(exp, 0), 10**max(0, -exp)
+
+    # quick return for zeros
+    if not a:
+        return '-0x0.0p+0' if negative else '0x0.0p+0'
+
+    # compute exponent e for result; may be one too small in the case
+    # that the rounded value of a/b lies in a different binade from a/b
+    d = a.bit_length() - b.bit_length()
+    d += (a >> d if d >= 0 else a << -d) >= b
+    e = max(d, min_exp) - mant_dig
+
+    # approximate a/b by number of the form q * 2**e; adjust e if necessary
+    a, b = a << max(-e, 0), b << max(e, 0)
+    q, r = divmod(a, b)
+    if 2*r > b or 2*r == b and q & 1:
+        q += 1
+        if q.bit_length() == mant_dig+1:
+            q //= 2
+            e += 1
+
+    # double check that (q, e) has the right form
+    assert q.bit_length() <= mant_dig and e >= min_exp - mant_dig
+    assert q.bit_length() == mant_dig or e == min_exp - mant_dig
+
+    # check for overflow and underflow
+    if e + q.bit_length() > max_exp:
+        return '-inf' if negative else 'inf'
+    if not q:
+        return '-0x0.0p+0' if negative else '0x0.0p+0'
+
+    # for hex representation, shift so # bits after point is a multiple of 4
+    hexdigs = 1 + (mant_dig-2)//4
+    shift = 3 - (mant_dig-2)%4
+    q, e = q << shift, e - shift
+    return '{}0x{:x}.{:0{}x}p{:+d}'.format(
+        '-' if negative else '',
+        q // 16**hexdigs,
+        q % 16**hexdigs,
+        hexdigs,
+        e + 4*hexdigs)
+
 class GeneralFloatCases(unittest.TestCase):
 
     def test_float(self):
@@ -1263,6 +1332,38 @@ def roundtrip(x):
             else:
                 self.identical(x, fromHex(toHex(x)))
 
+class StrtodTestCase(unittest.TestCase):
+    def check_string(self, s):
+        expected = strtod(s)
+        try:
+            fs = float(s)
+        except OverflowError:
+            got = '-inf' if s[0] == '-' else 'inf'
+        else:
+            got = fs.hex()
+        self.assertEqual(expected, got,
+                         "Incorrectly rounded str->float conversion for "
+                         "{}: expected {}, got {}".format(s, expected, got))
+
+    @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short',
+                         "applies only when using short float repr style")
+    def test_bug7632(self):
+        # check a few particular values that gave incorrectly rounded
+        # results with previous versions of dtoa.c
+        test_strings = [
+            '94393431193180696942841837085033647913224148539854e-358',
+            '12579816049008305546974391768996369464963024663104e-357',
+            '17489628565202117263145367596028389348922981857013e-357',
+            '18487398785991994634182916638542680759613590482273e-357',
+            '32002864200581033134358724675198044527469366773928e-358',
+            '73608278998966969345824653500136787876436005957953e-358',
+            '64774478836417299491718435234611299336288082136054e-358',
+            '13704940134126574534878641876947980878824688451169e-357',
+            '46697445774047060960624497964425416610480524760471e-358',
+            ]
+        for s in test_strings:
+            self.check_string(s)
+
 
 def test_main():
     support.run_unittest(
@@ -1275,6 +1376,7 @@ def test_main():
         RoundTestCase,
         InfNanTest,
         HexFloatTestCase,
+        StrtodTestCase,
         )
 
 if __name__ == '__main__':
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -12,6 +12,11 @@ What's New in Python 3.2 Alpha 1?
 Core and Builtins
 -----------------
 
+- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds
+  when parsing certain long numeric strings corresponding to subnormal
+  values.  Also fix a number of bugs in dtoa.c that could lead to
+  incorrectly rounded results when converting strings to floats.
+
 - The __complex__ method is now looked up on the class of instances to make it
   consistent with other special methods.
 
diff --git a/Python/dtoa.c b/Python/dtoa.c
@@ -200,10 +200,11 @@ typedef union { double d; ULong L[2]; } U;
 #define STRTOD_DIGLIM 40
 #endif
 
-#ifdef DIGLIM_DEBUG
-extern int strtod_diglim;
-#else
-#define strtod_diglim STRTOD_DIGLIM
+/* maximum permitted exponent value for strtod; exponents larger than
+   MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP.  MAX_ABS_EXP
+   should fit into an int. */
+#ifndef MAX_ABS_EXP
+#define MAX_ABS_EXP 19999U
 #endif
 
 /* The following definition of Storeinc is appropriate for MIPS processors.
@@ -269,8 +270,7 @@ extern int strtod_diglim;
 typedef struct BCinfo BCinfo;
 struct
 BCinfo {
-    int dp0, dp1, dplen, dsign, e0, inexact;
-    int nd, nd0, rounding, scale, uflchk;
+    int dp0, dp1, dplen, dsign, e0, nd, nd0, scale;
 };
 
 #define FFFFFFFF 0xffffffffUL
@@ -1130,6 +1130,26 @@ quorem(Bigint *b, Bigint *S)
     return q;
 }
 
+/* version of ulp(x) that takes bc.scale into account.
+
+   Assuming that x is finite and nonzero, and x / 2^bc.scale is exactly
+   representable as a double, sulp(x) is equivalent to 2^bc.scale * ulp(x /
+   2^bc.scale). */
+
+static double
+sulp(U *x, BCinfo *bc)
+{
+    U u;
+
+    if (bc->scale && 2*P + 1 - ((word0(x) & Exp_mask) >> Exp_shift) > 0) {
+        /* rv/2^bc->scale is subnormal */
+        word0(&u) = (P+2)*Exp_msk1;
+        word1(&u) = 0;
+        return u.d;
+    }
+    else
+        return ulp(x);
+}
 
 /* return 0 on success, -1 on failure */
 
@@ -1142,7 +1162,7 @@ bigcomp(U *rv, const char *s0, BCinfo *bc)
     dsign = bc->dsign;
     nd = bc->nd;
     nd0 = bc->nd0;
-    p5 = nd + bc->e0 - 1;
+    p5 = nd + bc->e0;
     speccase = 0;
     if (rv->d == 0.) {  /* special case: value near underflow-to-zero */
         /* threshold was rounded to zero */
@@ -1227,17 +1247,21 @@ bigcomp(U *rv, const char *s0, BCinfo *bc)
         }
     }
 
-    /* Now b/d = exactly half-way between the two floating-point values */
-    /* on either side of the input string.  Compute first digit of b/d. */
-
-    if (!(dig = quorem(b,d))) {
-        b = multadd(b, 10, 0);  /* very unlikely */
-        if (b == NULL) {
-            Bfree(d);
-            return -1;
-        }
-        dig = quorem(b,d);
+    /* Now 10*b/d = exactly half-way between the two floating-point values
+       on either side of the input string.  If b >= d, round down. */
+    if (cmp(b, d) >= 0) {
+        dd = -1;
+        goto ret;
     }
+	
+    /* Compute first digit of 10*b/d. */
+    b = multadd(b, 10, 0);
+    if (b == NULL) {
+        Bfree(d);
+        return -1;
+    }
+    dig = quorem(b, d);
+    assert(dig < 10);
 
     /* Compare b/d with s0 */
 
@@ -1285,12 +1309,12 @@ bigcomp(U *rv, const char *s0, BCinfo *bc)
     else if (dd < 0) {
         if (!dsign)     /* does not happen for round-near */
           retlow1:
-            dval(rv) -= ulp(rv);
+            dval(rv) -= sulp(rv, bc);
     }
     else if (dd > 0) {
         if (dsign) {
           rethi1:
-            dval(rv) += ulp(rv);
+            dval(rv) += sulp(rv, bc);
         }
     }
     else {
@@ -1312,13 +1336,12 @@ _Py_dg_strtod(const char *s00, char **se)
     int esign, i, j, k, nd, nd0, nf, nz, nz0, sign;
     const char *s, *s0, *s1;
     double aadj, aadj1;
-    Long L;
     U aadj2, adj, rv, rv0;
-    ULong y, z;
+    ULong y, z, L;
     BCinfo bc;
     Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
 
-    sign = nz0 = nz = bc.dplen = bc.uflchk = 0;
+    sign = nz0 = nz = bc.dplen = 0;
     dval(&rv) = 0.;
     for(s = s00;;s++) switch(*s) {
         case '-':
@@ -1413,11 +1436,11 @@ _Py_dg_strtod(const char *s00, char **se)
                 s1 = s;
                 while((c = *++s) >= '0' && c <= '9')
                     L = 10*L + c - '0';
-                if (s - s1 > 8 || L > 19999)
+                if (s - s1 > 8 || L > MAX_ABS_EXP)
                     /* Avoid confusion from exponents
                      * so large that e might overflow.
                      */
-                    e = 19999; /* safe for 16 bit ints */
+                    e = (int)MAX_ABS_EXP; /* safe for 16 bit ints */
                 else
                     e = (int)L;
                 if (esign)
@@ -1555,11 +1578,11 @@ _Py_dg_strtod(const char *s00, char **se)
     /* Put digits into bd: true value = bd * 10^e */
 
     bc.nd = nd;
-    bc.nd0 = nd0;       /* Only needed if nd > strtod_diglim, but done here */
+    bc.nd0 = nd0;       /* Only needed if nd > STRTOD_DIGLIM, but done here */
                         /* to silence an erroneous warning about bc.nd0 */
                         /* possibly not being initialized. */
-    if (nd > strtod_diglim) {
-        /* ASSERT(strtod_diglim >= 18); 18 == one more than the */
+    if (nd > STRTOD_DIGLIM) {
+        /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */
         /* minimum number of decimal digits to distinguish double values */
         /* in IEEE arithmetic. */
         i = j = 18;
@@ -1767,10 +1790,8 @@ _Py_dg_strtod(const char *s00, char **se)
                             /* accept rv */
                             break;
                         /* rv = smallest denormal */
-                        if (bc.nd >nd) {
-                            bc.uflchk = 1;
+                        if (bc.nd >nd)
                             break;
-                        }
                         goto undfl;
                     }
                 }
@@ -1786,10 +1807,8 @@ _Py_dg_strtod(const char *s00, char **se)
             else {
                 dval(&rv) -= ulp(&rv);
                 if (!dval(&rv)) {
-                    if (bc.nd >nd) {
-                        bc.uflchk = 1;
+                    if (bc.nd >nd)
                         break;
-                    }
                     goto undfl;
                 }
             }
@@ -1801,10 +1820,8 @@ _Py_dg_strtod(const char *s00, char **se)
                 aadj = aadj1 = 1.;
             else if (word1(&rv) || word0(&rv) & Bndry_mask) {
                 if (word1(&rv) == Tiny1 && !word0(&rv)) {
-                    if (bc.nd >nd) {
-                        bc.uflchk = 1;
+                    if (bc.nd >nd)
                         break;
-                    }
                     goto undfl;
                 }
                 aadj = 1.;