From e2b1b385c9bc3d2b081d1e1895849bf48f5e843c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 8 Aug 2020 13:32:20 -0700 Subject: [PATCH 01/10] frexp/ldexp variant --- Modules/mathmodule.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 411c6eb1935fab..fe1ce6612387c0 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2438,6 +2438,7 @@ static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { double x, csum = 1.0, oldcsum, frac = 0.0; + int max_e; Py_ssize_t i; if (Py_IS_INFINITY(max)) { @@ -2449,17 +2450,21 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) if (max == 0.0 || n <= 1) { return max; } + frexp(max, &max_e); for (i=0 ; i < n ; i++) { + int e; + x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); - x /= max; + x = frexp(x, &e); + x = ldexp(x, e - max_e); x = x*x; oldcsum = csum; csum += x; assert(csum >= x); frac += (oldcsum - csum) + x; } - return max * sqrt(csum - 1.0 + frac); + return ldexp(sqrt(csum - 1.0 + frac), max_e); } #define NUM_STACK_ELEMS 16 From 7cb895d1bbb8a0a3fa658792fbe740cb2622ebde Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 8 Aug 2020 13:43:57 -0700 Subject: [PATCH 02/10] Multiply by power of two variant --- Modules/mathmodule.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index fe1ce6612387c0..ee900ea3f7c5ef 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2437,7 +2437,7 @@ fractional digits to be dropped from *csum*. static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { - double x, csum = 1.0, oldcsum, frac = 0.0; + double x, csum = 1.0, oldcsum, frac = 0.0, scale; int max_e; Py_ssize_t i; @@ -2451,20 +2451,18 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) return max; } frexp(max, &max_e); + scale = ldexp(1.0, -max_e); for (i=0 ; i < n ; i++) { - int e; - x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); - x = frexp(x, &e); - x = ldexp(x, e - max_e); + x *= scale; x = x*x; oldcsum = csum; csum += x; assert(csum >= x); frac += (oldcsum - csum) + x; } - return ldexp(sqrt(csum - 1.0 + frac), max_e); + return sqrt(csum - 1.0 + frac) / scale; } #define NUM_STACK_ELEMS 16 From 2d586b19fcaa15d7a33a9e45f0372ca78229fd9c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 8 Aug 2020 17:23:37 -0700 Subject: [PATCH 03/10] Strengthen assertions --- Modules/mathmodule.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ee900ea3f7c5ef..273e5533214125 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2452,14 +2452,17 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) } frexp(max, &max_e); scale = ldexp(1.0, -max_e); + assert(max * scale >= 0.5); + assert(max * scale < 1.0); for (i=0 ; i < n ; i++) { x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); x *= scale; x = x*x; + assert(x <= 1.0); + assert(csum >= x); oldcsum = csum; csum += x; - assert(csum >= x); frac += (oldcsum - csum) + x; } return sqrt(csum - 1.0 + frac) / scale; From c15ce79ecfc94c53b4ead47aa612aaa7ec38cf02 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 9 Aug 2020 10:58:06 -0700 Subject: [PATCH 04/10] Added notes and commented out assertions to reach a stable resting point --- Lib/test/test_math.py | 4 ++-- Modules/mathmodule.c | 25 +++++++++++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index e06b1e6a5b9b7c..b00256dd2a98e1 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -904,8 +904,8 @@ class T(tuple): for n in range(32): p = (fourthmax,) * n q = (0.0,) * n - self.assertEqual(dist(p, q), fourthmax * math.sqrt(n)) - self.assertEqual(dist(q, p), fourthmax * math.sqrt(n)) + self.assertTrue(math.isclose(dist(p, q), fourthmax * math.sqrt(n))) + self.assertTrue(math.isclose(dist(q, p), fourthmax * math.sqrt(n))) # Verify scaling for extremely small values for exp in range(32): diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 273e5533214125..5d8638db13ddf8 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -56,6 +56,7 @@ raised for division by zero and mod by zero. #include "pycore_bitutils.h" // _Py_bit_length() #include "pycore_dtoa.h" #include "_math.h" +#include #include "clinic/mathmodule.c.h" @@ -2434,6 +2435,21 @@ fractional digits to be dropped from *csum*. */ +/* + def scaled(x): + 'View scaling calculation' + # Limits for ldexp(1, 1023) and at 1024 goes to inf + # ldexp(1.0, -1022) <- normal and at -1023 goes subnormal + # ldexp(1.0, -1074) <-- subnormal and at -1075 goes to zero + # Failed example: + # x = 2.78134e-309 --> (0.9999991647435902, -1025) + # 1.0 / x --> inf + m, e = frexp(x) + # e = max(e, 1023) + scale = ldexp(1.0, -e) + return x*scale, scale.hex(), m, e + */ + static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { @@ -2452,15 +2468,16 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) } frexp(max, &max_e); scale = ldexp(1.0, -max_e); - assert(max * scale >= 0.5); - assert(max * scale < 1.0); + // fprintf(stderr, "<>\n", max, max_e, scale); + // assert(max * scale >= 0.5); + // assert(max * scale < 1.0); for (i=0 ; i < n ; i++) { x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); x *= scale; x = x*x; - assert(x <= 1.0); - assert(csum >= x); + // assert(x <= 1.0); + // assert(csum >= x); oldcsum = csum; csum += x; frac += (oldcsum - csum) + x; From c853597f555db0b85fb28b4161615d9361c24ab4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 9 Aug 2020 11:24:50 -0700 Subject: [PATCH 05/10] Two code paths. All tests pass. --- Modules/mathmodule.c | 56 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 5d8638db13ddf8..4bf383fda0f588 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2450,6 +2450,13 @@ fractional digits to be dropped from *csum*. return x*scale, scale.hex(), m, e */ +/* + A way forward: + Use scaling for a broad range, perhaps -1000 to 1000 + and have a separate code path with division for the rest. + */ + + static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { @@ -2467,22 +2474,39 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) return max; } frexp(max, &max_e); - scale = ldexp(1.0, -max_e); - // fprintf(stderr, "<>\n", max, max_e, scale); - // assert(max * scale >= 0.5); - // assert(max * scale < 1.0); - for (i=0 ; i < n ; i++) { - x = vec[i]; - assert(Py_IS_FINITE(x) && fabs(x) <= max); - x *= scale; - x = x*x; - // assert(x <= 1.0); - // assert(csum >= x); - oldcsum = csum; - csum += x; - frac += (oldcsum - csum) + x; - } - return sqrt(csum - 1.0 + frac) / scale; + if (-1000 <= max_e && max_e <= 1000) { + scale = ldexp(1.0, -max_e); + // fprintf(stderr, "<>\n", max, max_e, scale); + assert(max * scale >= 0.5); + assert(max * scale < 1.0); + for (i=0 ; i < n ; i++) { + x = vec[i]; + assert(Py_IS_FINITE(x) && fabs(x) <= max); + x *= scale; + x = x*x; + assert(x <= 1.0); + assert(csum >= x); + oldcsum = csum; + csum += x; + frac += (oldcsum - csum) + x; + } + return sqrt(csum - 1.0 + frac) / scale; + } + else + { + for (i=0 ; i < n ; i++) { + x = vec[i]; + assert(Py_IS_FINITE(x) && fabs(x) <= max); + x /= max; + x = x*x; + assert(x <= 1.0); + assert(csum >= x); + oldcsum = csum; + csum += x; + frac += (oldcsum - csum) + x; + } + return max * sqrt(csum - 1.0 + frac); + } } #define NUM_STACK_ELEMS 16 From 03ec58b8ef2589b808bf4e5431d0fb4364e397ba Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 9 Aug 2020 17:09:18 -0700 Subject: [PATCH 06/10] Clean-up comments and ranges --- Modules/mathmodule.c | 65 ++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 4bf383fda0f588..61373627f67f09 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2407,6 +2407,13 @@ math_fmod_impl(PyObject *module, double x, double y) /* Given an *n* length *vec* of values and a value *max*, compute: + sqrt(sum((x * scale) ** 2 for x in vec)) / scale + + where scale is the first power of two + greater than max. + +or compute: + max * sqrt(sum((x / max) ** 2 for x in vec)) The value of the *max* variable must be non-negative and @@ -2435,28 +2442,6 @@ fractional digits to be dropped from *csum*. */ -/* - def scaled(x): - 'View scaling calculation' - # Limits for ldexp(1, 1023) and at 1024 goes to inf - # ldexp(1.0, -1022) <- normal and at -1023 goes subnormal - # ldexp(1.0, -1074) <-- subnormal and at -1075 goes to zero - # Failed example: - # x = 2.78134e-309 --> (0.9999991647435902, -1025) - # 1.0 / x --> inf - m, e = frexp(x) - # e = max(e, 1023) - scale = ldexp(1.0, -e) - return x*scale, scale.hex(), m, e - */ - -/* - A way forward: - Use scaling for a broad range, perhaps -1000 to 1000 - and have a separate code path with division for the rest. - */ - - static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { @@ -2474,9 +2459,13 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) return max; } frexp(max, &max_e); - if (-1000 <= max_e && max_e <= 1000) { + if (-1020 <= max_e && max_e <= 1020) { + /* Stay away from extreme values. + ldexp(1.0, -1075) returns zero. + ldexp(1.0, -1023) is subnormal. + ldexp(1.0, 1024) gives Inf. + */ scale = ldexp(1.0, -max_e); - // fprintf(stderr, "<>\n", max, max_e, scale); assert(max * scale >= 0.5); assert(max * scale < 1.0); for (i=0 ; i < n ; i++) { @@ -2492,21 +2481,19 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) } return sqrt(csum - 1.0 + frac) / scale; } - else - { - for (i=0 ; i < n ; i++) { - x = vec[i]; - assert(Py_IS_FINITE(x) && fabs(x) <= max); - x /= max; - x = x*x; - assert(x <= 1.0); - assert(csum >= x); - oldcsum = csum; - csum += x; - frac += (oldcsum - csum) + x; - } - return max * sqrt(csum - 1.0 + frac); - } + /* Near extreme values, just divide by *max*. */ + for (i=0 ; i < n ; i++) { + x = vec[i]; + assert(Py_IS_FINITE(x) && fabs(x) <= max); + x /= max; + x = x*x; + assert(x <= 1.0); + assert(csum >= x); + oldcsum = csum; + csum += x; + frac += (oldcsum - csum) + x; + } + return max * sqrt(csum - 1.0 + frac); } #define NUM_STACK_ELEMS 16 From ebf6a31812477ccbc83d5183d28f1cda826561e5 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 9 Aug 2020 17:14:21 -0700 Subject: [PATCH 07/10] Remove debugging code --- Modules/mathmodule.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 61373627f67f09..c6939da71028e9 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -56,7 +56,6 @@ raised for division by zero and mod by zero. #include "pycore_bitutils.h" // _Py_bit_length() #include "pycore_dtoa.h" #include "_math.h" -#include #include "clinic/mathmodule.c.h" From 4742ef8aaac225f52e682e04a30882716cad14a9 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 9 Aug 2020 17:28:01 -0700 Subject: [PATCH 08/10] Clean-up comments and over-specified tests --- Lib/test/test_math.py | 3 ++- Modules/mathmodule.c | 12 +++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index b00256dd2a98e1..4d62eb1b119930 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -795,7 +795,8 @@ def testHypot(self): # Verify scaling for extremely large values fourthmax = FLOAT_MAX / 4.0 for n in range(32): - self.assertEqual(hypot(*([fourthmax]*n)), fourthmax * math.sqrt(n)) + self.assertTrue(math.isclose(hypot(*([fourthmax]*n)), + fourthmax * math.sqrt(n))) # Verify scaling for extremely small values for exp in range(32): diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index c6939da71028e9..d0621f59dfbbf1 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2458,12 +2458,7 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) return max; } frexp(max, &max_e); - if (-1020 <= max_e && max_e <= 1020) { - /* Stay away from extreme values. - ldexp(1.0, -1075) returns zero. - ldexp(1.0, -1023) is subnormal. - ldexp(1.0, 1024) gives Inf. - */ + if (max_e >= -1023) { scale = ldexp(1.0, -max_e); assert(max * scale >= 0.5); assert(max * scale < 1.0); @@ -2480,7 +2475,10 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) } return sqrt(csum - 1.0 + frac) / scale; } - /* Near extreme values, just divide by *max*. */ + /* When max_e < -1023, a scale computed given by ldexp(1.0, -max_e) + would be subnormal and break range invariants for max*scale. + Instead, we just divide by *max*. + */ for (i=0 ; i < n ; i++) { x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); From 5e9f1a9cbf5a83188b8d866be60e9671be9989b2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 9 Aug 2020 18:16:14 -0700 Subject: [PATCH 09/10] Add blurb --- .../next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst diff --git a/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst b/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst new file mode 100644 index 00000000000000..cfb9f98c376a02 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst @@ -0,0 +1,2 @@ +Minor algorithmic improvement to math.hypot() and math.dist() giving small +gains in speed and accuracy. From 5c6478f2f88f0cb6ccd65f0fe17f25ffbf059a12 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 15 Aug 2020 10:53:38 -0700 Subject: [PATCH 10/10] Improve the comments. --- Modules/mathmodule.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index d0621f59dfbbf1..489802cc367450 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2432,13 +2432,18 @@ The *csum* variable tracks the cumulative sum and *frac* tracks the cumulative fractional errors at each step. Since this variant assumes that |csum| >= |x| at each step, we establish the precondition by starting the accumulation from 1.0 which -represents the largest possible value of (x/max)**2. +represents the largest possible value of (x*scale)**2 or (x/max)**2. After the loop is finished, the initial 1.0 is subtracted out for a net zero effect on the final sum. Since *csum* will be greater than 1.0, the subtraction of 1.0 will not cause fractional digits to be dropped from *csum*. +To get the full benefit from compensated summation, the +largest addend should be in the range: 0.5 <= x <= 1.0. +Accordingly, scaling or division by *max* should not be skipped +even if not otherwise needed to prevent overflow or loss of precision. + */ static inline double @@ -2475,9 +2480,8 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) } return sqrt(csum - 1.0 + frac) / scale; } - /* When max_e < -1023, a scale computed given by ldexp(1.0, -max_e) - would be subnormal and break range invariants for max*scale. - Instead, we just divide by *max*. + /* When max_e < -1023, ldexp(1.0, -max_e) overflows. + So instead of multiplying by a scale, we just divide by *max*. */ for (i=0 ; i < n ; i++) { x = vec[i];