Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Lib/test/test_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,8 @@ def testHypot(self):
# Verify scaling for extremely large values
fourthmax = FLOAT_MAX / 4.0
for n in range(32):
self.assertEqual(hypot(*([fourthmax]*n)), fourthmax * math.sqrt(n))
self.assertTrue(math.isclose(hypot(*([fourthmax]*n)),
Comment thread
rhettinger marked this conversation as resolved.
fourthmax * math.sqrt(n)))

# Verify scaling for extremely small values
for exp in range(32):
Expand Down Expand Up @@ -904,8 +905,8 @@ class T(tuple):
for n in range(32):
p = (fourthmax,) * n
q = (0.0,) * n
self.assertEqual(dist(p, q), fourthmax * math.sqrt(n))
self.assertEqual(dist(q, p), fourthmax * math.sqrt(n))
self.assertTrue(math.isclose(dist(p, q), fourthmax * math.sqrt(n)))
self.assertTrue(math.isclose(dist(q, p), fourthmax * math.sqrt(n)))

# Verify scaling for extremely small values
for exp in range(32):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Minor algorithmic improvement to math.hypot() and math.dist() giving small
gains in speed and accuracy.
35 changes: 33 additions & 2 deletions Modules/mathmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y)
/*
Given an *n* length *vec* of values and a value *max*, compute:

sqrt(sum((x * scale) ** 2 for x in vec)) / scale

where scale is the first power of two
greater than max.

or compute:

max * sqrt(sum((x / max) ** 2 for x in vec))

The value of the *max* variable must be non-negative and
Expand Down Expand Up @@ -2437,7 +2444,8 @@ fractional digits to be dropped from *csum*.
static inline double
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{
double x, csum = 1.0, oldcsum, frac = 0.0;
double x, csum = 1.0, oldcsum, frac = 0.0, scale;
int max_e;
Py_ssize_t i;

if (Py_IS_INFINITY(max)) {
Expand All @@ -2449,14 +2457,37 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
if (max == 0.0 || n <= 1) {
return max;
}
frexp(max, &max_e);
if (max_e >= -1023) {
Comment thread
rhettinger marked this conversation as resolved.
scale = ldexp(1.0, -max_e);
assert(max * scale >= 0.5);
assert(max * scale < 1.0);
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x *= scale;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is faster, x *= scale or x = ldexp(x, -max_e)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • The test as written was over-specified. It should have been written this way from the start.

  • The x *= scale is faster than x = ldexp(x, -max_e). The former is a single, fast in-line instruction and the latter is an external library call.

Here's the generated code for the loop:

L284:
    movsd   (%r12,%rax,8), %xmm0
    addq    $1, %rax
    cmpq    %rax, %rbp
    mulsd   %xmm4, %xmm0           <-- x *= scale
    movapd  %xmm0, %xmm1
    mulsd   %xmm0, %xmm1           <-- x *= x
    movapd  %xmm2, %xmm0
    addsd   %xmm1, %xmm2            <-- csum += x
    subsd   %xmm2, %xmm0
    addsd   %xmm1, %xmm0
    addsd   %xmm0, %xmm3
    jg  L284
    subsd   %xmm

x = x*x;
assert(x <= 1.0);
assert(csum >= x);
oldcsum = csum;
csum += x;
frac += (oldcsum - csum) + x;
}
return sqrt(csum - 1.0 + frac) / scale;
}
/* When max_e < -1023, a scale computed given by ldexp(1.0, -max_e)
would be subnormal and break range invariants for max*scale.
Instead, we just divide by *max*.
*/
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x /= max;
x = x*x;
assert(x <= 1.0);
assert(csum >= x);
oldcsum = csum;
csum += x;
assert(csum >= x);
frac += (oldcsum - csum) + x;
}
return max * sqrt(csum - 1.0 + frac);
Expand Down