-
-
Notifications
You must be signed in to change notification settings - Fork 34.5k
bpo-41513: Improve speed and accuracy of math.hypot() #21803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
e2b1b38
7cb895d
2d586b1
c15ce79
c853597
03ec58b
ebf6a31
4742ef8
5e9f1a9
5c6478f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Minor algorithmic improvement to math.hypot() and math.dist() giving small | ||
| gains in speed and accuracy. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y) | |
| /* | ||
| Given an *n* length *vec* of values and a value *max*, compute: | ||
|
|
||
| sqrt(sum((x * scale) ** 2 for x in vec)) / scale | ||
|
|
||
| where scale is the first power of two | ||
| greater than max. | ||
|
|
||
| or compute: | ||
|
|
||
| max * sqrt(sum((x / max) ** 2 for x in vec)) | ||
|
|
||
| The value of the *max* variable must be non-negative and | ||
|
|
@@ -2437,7 +2444,8 @@ fractional digits to be dropped from *csum*. | |
| static inline double | ||
| vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) | ||
| { | ||
| double x, csum = 1.0, oldcsum, frac = 0.0; | ||
| double x, csum = 1.0, oldcsum, frac = 0.0, scale; | ||
| int max_e; | ||
| Py_ssize_t i; | ||
|
|
||
| if (Py_IS_INFINITY(max)) { | ||
|
|
@@ -2449,14 +2457,37 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) | |
| if (max == 0.0 || n <= 1) { | ||
| return max; | ||
| } | ||
| frexp(max, &max_e); | ||
| if (max_e >= -1023) { | ||
|
rhettinger marked this conversation as resolved.
|
||
| scale = ldexp(1.0, -max_e); | ||
| assert(max * scale >= 0.5); | ||
| assert(max * scale < 1.0); | ||
| for (i=0 ; i < n ; i++) { | ||
| x = vec[i]; | ||
| assert(Py_IS_FINITE(x) && fabs(x) <= max); | ||
| x *= scale; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is faster,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Here's the generated code for the loop: |
||
| x = x*x; | ||
| assert(x <= 1.0); | ||
| assert(csum >= x); | ||
| oldcsum = csum; | ||
| csum += x; | ||
| frac += (oldcsum - csum) + x; | ||
| } | ||
| return sqrt(csum - 1.0 + frac) / scale; | ||
| } | ||
| /* When max_e < -1023, a scale computed given by ldexp(1.0, -max_e) | ||
| would be subnormal and break range invariants for max*scale. | ||
| Instead, we just divide by *max*. | ||
| */ | ||
| for (i=0 ; i < n ; i++) { | ||
| x = vec[i]; | ||
| assert(Py_IS_FINITE(x) && fabs(x) <= max); | ||
| x /= max; | ||
| x = x*x; | ||
| assert(x <= 1.0); | ||
| assert(csum >= x); | ||
| oldcsum = csum; | ||
| csum += x; | ||
| assert(csum >= x); | ||
| frac += (oldcsum - csum) + x; | ||
| } | ||
| return max * sqrt(csum - 1.0 + frac); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.