From e12470c525029f21864e183e6b081013a9924c89 Mon Sep 17 00:00:00 2001 From: colinleach Date: Wed, 3 Jan 2024 16:17:18 -0700 Subject: [PATCH 1/4] [Pythagorean Triplet]: Approaches Draft --- .../.approaches/config.json | 33 + .../.approaches/cubic/content.md | 20 + .../.approaches/cubic/snippet.txt | 8 + .../.approaches/introduction.md | 148 ++ .../.approaches/linear/content.md | 47 + .../.approaches/linear/snippet.txt | 8 + .../.approaches/quadratic/content.md | 45 + .../.approaches/quadratic/snippet.txt | 8 + .../pythagorean-triplet/.articles/config.json | 12 + .../.articles/performance/code/Benchmark.py | 124 + .../performance/code/create_plots.py | 42 + .../performance/code/fit_gradients.py | 38 + .../performance/code/run_times.feather | Bin 0 -> 5426 bytes .../performance/code/transposed_logs.feather | Bin 0 -> 4010 bytes .../.articles/performance/content.md | 104 + .../.articles/performance/slopes.svg | 1516 +++++++++++++ .../.articles/performance/snippet.md | 8 + .../.articles/performance/timeit_bar_plot.svg | 1986 +++++++++++++++++ 18 files changed, 4147 insertions(+) create mode 100644 exercises/practice/pythagorean-triplet/.approaches/config.json create mode 100644 exercises/practice/pythagorean-triplet/.approaches/cubic/content.md create mode 100644 exercises/practice/pythagorean-triplet/.approaches/cubic/snippet.txt create mode 100644 exercises/practice/pythagorean-triplet/.approaches/introduction.md create mode 100644 exercises/practice/pythagorean-triplet/.approaches/linear/content.md create mode 100644 exercises/practice/pythagorean-triplet/.approaches/linear/snippet.txt create mode 100644 exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md create mode 100644 exercises/practice/pythagorean-triplet/.approaches/quadratic/snippet.txt create mode 100644 exercises/practice/pythagorean-triplet/.articles/config.json create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/code/Benchmark.py create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/code/create_plots.py create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/code/fit_gradients.py create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/code/run_times.feather create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/code/transposed_logs.feather create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/content.md create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/slopes.svg create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/snippet.md create mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg diff --git a/exercises/practice/pythagorean-triplet/.approaches/config.json b/exercises/practice/pythagorean-triplet/.approaches/config.json new file mode 100644 index 00000000000..b9514f490aa --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/config.json @@ -0,0 +1,33 @@ +{ + "introduction": { + "authors": ["colinleach", + "BethanyG"], + "contributors": [] + }, + "approaches": [ + { + "uuid": "4d4b4e6c-a026-4ed3-8e07-6b9edfabe713", + "slug": "cubic", + "title": "Cubic", + "blurb": "Cubic-time approach with loops nested 3 deep.", + "authors": ["colinleach", + "BethanyG"] + }, + { + "uuid": "385c0ace-117f-4480-8dfc-0632d8893e60", + "slug": "quadratic", + "title": "Quadratic", + "blurb": "Quadratic-time approaches with doubly-nested loops.", + "authors": ["colinleach", + "BethanyG"] + }, + { + "uuid": "1addb672-6064-4a07-acad-4a08f92d9e43", + "slug": "linear", + "title": "Linear Loop", + "blurb": "Linear-time approaches with no nesting of loops.", + "authors": ["colinleach", + "BethanyG"] + } + ] +} diff --git a/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md b/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md new file mode 100644 index 00000000000..f0baaf7c8bc --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md @@ -0,0 +1,20 @@ +# Cubic-time triply-nested loops + +```python +def triplets_with_sum(n): + triplets = [] + for a in range(1, n + 1): + for b in range(a + 1, n + 1): + for c in range(b + 1, n + 1): + if a**2 + b**2 == c**2 and a + b + c == n: + triplets.append([a, b, c]) + return triplets +``` + +The strategy in this case is to scan through all three variables and test them in the innermost loop. + +This gives code that is simple, clear, and so slow as to be useless for all but the smallest values of `n`. + +We could tighten up the bounds on loop variables: for example, `a` is the smallest integer of a triplet that sums to `n`, so inevitably `a < n //3`. + +However, this is not nearly enough to rescue an inappropriate algorithm. diff --git a/exercises/practice/pythagorean-triplet/.approaches/cubic/snippet.txt b/exercises/practice/pythagorean-triplet/.approaches/cubic/snippet.txt new file mode 100644 index 00000000000..eb2b55c687d --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/cubic/snippet.txt @@ -0,0 +1,8 @@ +def triplets_with_sum(n): + triplets = [] + for a in range(1, n + 1): + for b in range(a + 1, n + 1): + for c in range(b + 1, n + 1): + if a**2 + b**2 == c**2 and a + b + c == n: + triplets.append([a, b, c]) + return triplets \ No newline at end of file diff --git a/exercises/practice/pythagorean-triplet/.approaches/introduction.md b/exercises/practice/pythagorean-triplet/.approaches/introduction.md new file mode 100644 index 00000000000..8e6202a90a7 --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/introduction.md @@ -0,0 +1,148 @@ +# Introduction + +The main challenge in solving the Pythagorean Triplet exercise is to use a fast enough algorithm. +The problem can easily become very large, and simple solutions may time out on the test runner. + +There are three reasonably common solutions to this problem +- a [cubic time][approaches-cubic] solution, which is fairly obvious +- a [quadratic time][approaches-quadratic] solution, which is reasonably easy to find +- a [linear time][approaches-linear] solution, requiring some deeper understanding of the mathematics + +If those terms are unclear to you, you might like to read about [time complexity][time-complexity]. + +The basic idea is to study how algorithms scale (in run time, memory usage, or whatever) as the input parameter becomes very large. +In this document we will focus on run time, which is critical for this exercise. + +## General guidance + +The goal of the Pythagorean Triplets exercise is to find combinations of three numbers `[a, b, c]` satisfying a set of conditions: +1. `a < b < c` +2. `a**2 + b**2 == c**2` +3. `a + b + c == n`, where `n` is supplied as a parameter to the function. + +For a given `n`, the solution should include all valid triplets as a list of lists. + +## Approach: Cubic time solution with 3-deep nested loops + +```python +def triplets_with_sum(n): + triplets = [] + for a in range(1, n + 1): + for b in range(a + 1, n + 1): + for c in range(b + 1, n + 1): + if a**2 + b**2 == c**2 and a + b + c == n: + triplets.append([a, b, c]) + return triplets +``` + +This is the most naive approach, scanning through all possible integers `<= n` that satisfy `a < b < c`. + +***Don't do this!*** + +It works for small values of `n`, but becomes impossibly slow as `n` grows larger. +The test suite will not complete within any reasonable time limit. + +## Approach: Quadratic time solution with 2-deep nested loops + +```python +def triplets_with_sum(n): + triplets = [] + for a in range(1, n + 1): + for b in range(a + 1, n + 1): + c = n - a - b + if a ** 2 + b ** 2 == c ** 2: + triplets.append([a, b, c]) + return triplets +``` + +Given the constraint that `a + b + c == n`, we can eliminate the innermost loop and calculate `c` directly. +This gives a substantial speed advantage, allowing the tests to run to completion in a reasonable time, locally. + +However, the Exercism test runner will still time out. + +Examining the code. it is clear that the upper bounds on loop variables are far too generous. + +The solution below tightens the bounds and pre-calculates `c * c` in the outer loop. +This gives about a 4-fold speedup, but still times out on the test runner. + +```python +def triplets_with_sum(n): + result = [] + for c in range(5, n - 1): + c_sq = c * c + for a in range(3, (n - c + 1) // 2): + b = n - a - c + if a < b < c and a * a + b * b == c_sq: + result.append([a, b, c]) + return result +``` + +If a quadratic-time algorithm was the best available option, there are other ways to squeeze out small performance gains. + +For bigger problems outside Exercism, there are third-party packages such as `numpy` or `numba` which replace Python +loops (versatile but relatively slow) with routines written in C/C++, perhaps with use of the GPU. +Runtime is still proportional to `n**2`, but the proportionality constant may be much smaller. + +Fortunately for the present discussion, mathematicians have been studying Pythagorean Triplets for centuries: see [Wikipedia][wiki-pythag], [Wolfram MathWorld][wolfram-pythag], or many other sources. + +There are much faster algorithms, at the expense of reduced readability. + +## Linear time solutions + +```python +from math import sqrt + +def triplets_with_sum(n): + N = float(n) + triplets = [] + for c in range(int(N / 2) - 1, int((sqrt(2) - 1) * N), -1): + D = sqrt(c ** 2 - N ** 2 + 2 * N * c) + if D == int(D): + triplets.append([int((N - c - D) / 2), int((N - c + D) / 2), c]) + return triplets +``` + +All clear? + +After some thoughtful mathematical analysis, there is now only a single loop. + +Run time is now much faster, especially for large `n`, but a reasonable person could find it difficult to understand what the code is doing. + +If you do things like this out in the real world ***please*** document your code carefully. +In a few weeks, the bare code will puzzle even yourself, and people seeing for the first time are likely to struggle. + +The code above uses fairly mainstream syntax. Another submission used the same basic algorithm but in a more pythonic way: + +```python +def triplets_with_sum(n): + def calculate_medium(small): + return (n ** 2 - 2 * n * small) / (2 * (n - small)) + + two_sides = ((int(medium), small) for small in range(3, n // 3) + if small < (medium := calculate_medium(small)) + and medium.is_integer()) + + return [[small, medium, (medium ** 2 + small ** 2) ** 0.5] + for medium, small in two_sides] +``` + +## Which approach to use? + +If we could be sure that the code only had to handle small values of `n`, a quadratic method would have the advantage of clarity. + +However, the test suite goes up to 30_000, and the online test runner times out. +We need to accept some less readable code and use a linear-time implementation. + +Full details of run-time benchmarking are given in the [Performance article][article-performance]. + +Overall, the results confirm the expectation that the linear-time methods are very much faster. +More surprisingly, the first example (with an explicit loop) proved slightly faster than the more Pythonic code. + +[approaches-cubic]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/cubic +[approaches-quadratic]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/quadratic +[approaches-linear]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/linear +[time-complexity]: https://en.wikipedia.org/wiki/Time_complexity +[article-performance]:https://exercism.org/tracks/python/exercises/pythagorean-triplet/articles/performance +[wiki-pythag]: https://en.wikipedia.org/wiki/Pythagorean_triple +[wolfram-pythag]: https://mathworld.wolfram.com/PythagoreanTriple.html + diff --git a/exercises/practice/pythagorean-triplet/.approaches/linear/content.md b/exercises/practice/pythagorean-triplet/.approaches/linear/content.md new file mode 100644 index 00000000000..06b4b3f7d61 --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/linear/content.md @@ -0,0 +1,47 @@ +# Linear-time algorithms with no nested loops + +```python +from math import sqrt + +def triplets_with_sum(n): + N = float(n) + triplets = [] + for c in range(int(N / 2) - 1, int((sqrt(2) - 1) * N), -1): + D = sqrt(c ** 2 - N ** 2 + 2 * N * c) + if D == int(D): + triplets.append([int((N - c - D) / 2), int((N - c + D) / 2), c]) + return triplets +``` + +The key point with this approach is that we only loop over the variable `c`. +Some mathematical analysis (essentially, solving simultaneous equations) then allows us to find valid values of `a` and `b`. + +Other than that, the code syntax above is fairly mainstream. + +A related approach instead loops over `a`. +The code below has no explicit `for` loop, but the comprehensions do essentially the same thing in a more Pythonic way. + +```python +def triplets_with_sum(n): + def calculate_medium(small): + return (n ** 2 - 2 * n * small) / (2 * (n - small)) + + two_sides = ((int(medium), small) for small in range(3, n // 3) + if small < (medium := calculate_medium(small)) + and medium.is_integer()) + + return [[small, medium, (medium ** 2 + small ** 2) ** 0.5] + for medium, small in two_sides] +``` + +Some implementation details to notice: +- Nested functions, with the inner function able to reference variables such as `n` in the outer function. +- The first comprehension creates `two_sides` as a lazily-evaluated iterator. +- The [`walrus operator`][walrus-operator] `:=` is new in Python 3.8. +- The `is_integer()` method replaces `if D == int(D)`. +- Using `** 0.5` to calculate the square roots avoids a `math` import. + +# _Bethany_, this is your submission +Is there anything else you want to say about it? + +[walrus-operator]: https://mathspp.com/blog/pydonts/assignment-expressions-and-the-walrus-operator diff --git a/exercises/practice/pythagorean-triplet/.approaches/linear/snippet.txt b/exercises/practice/pythagorean-triplet/.approaches/linear/snippet.txt new file mode 100644 index 00000000000..a8f26825c3c --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/linear/snippet.txt @@ -0,0 +1,8 @@ +def triplets_with_sum(n): + def calculate_medium(small): + return (n ** 2 - 2 * n * small) / (2 * (n - small)) + two_sides = ((int(medium), small) for small in range(3, n // 3) + if small < (medium := calculate_medium(small)) + and medium.is_integer()) + return [[small, medium, (medium ** 2 + small ** 2) ** 0.5] + for medium, small in two_sides] diff --git a/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md b/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md new file mode 100644 index 00000000000..73912fa4cf7 --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md @@ -0,0 +1,45 @@ +# Quadratic-time doubly-nested loops + +```python +def triplets_with_sum(n): + triplets = [] + for a in range(1, n + 1): + for b in range(a + 1, n + 1): + c = n - a - b + if a ** 2 + b ** 2 == c ** 2: + triplets.append([a, b, c]) + return triplets +``` + +Because `a + b + c == n`, we only loop over `a` and `b`. +The third variable `c` is then predictable. + +The above code loops over the full range of both variables. +We know enough about the problems to tighten this up. + +For example: +- The smallest pythagorean is (famously) `[3, 4, 5]`, so `a >= 3` +- `a + b == n - c` and `a <= b`, so `a <= (n - c) // 2` + +We can also avoid, to some extent, repeating the same multiplication many times. +This gets us to the code below. + +```python +def triplets_with_sum(n): + result = [] + for c in range(5, n - 1): + c_sq = c * c + for a in range(3, (n - c + 1) // 2): + b = n - a - c + if a < b < c and a * a + b * b == c_sq: + result.append([a, b, c]) + return result +``` + +We could have done a bit better. +Though not stated in the problem description, `a + b > c`, otherwise they could not form a triangle. + +This means that `c <= n // 2`, reducing the outer loop. + +However, these optimizations only reduce the run time by a small factor. +They do almost nothing to make the algorithm scale to large `n`. diff --git a/exercises/practice/pythagorean-triplet/.approaches/quadratic/snippet.txt b/exercises/practice/pythagorean-triplet/.approaches/quadratic/snippet.txt new file mode 100644 index 00000000000..fcac4bee78e --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.approaches/quadratic/snippet.txt @@ -0,0 +1,8 @@ +def triplets_with_sum(n): + triplets = [] + for a in range(1, n + 1): + for b in range(a + 1, n + 1): + c = n - a - b + if a ** 2 + b ** 2 == c ** 2: + triplets.append([a, b, c]) + return triplets \ No newline at end of file diff --git a/exercises/practice/pythagorean-triplet/.articles/config.json b/exercises/practice/pythagorean-triplet/.articles/config.json new file mode 100644 index 00000000000..07a0789875e --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.articles/config.json @@ -0,0 +1,12 @@ +{ + "articles": [ + { + "uuid": "b6ae73d5-6ee9-472d-bb48-d8eac8a097cf", + "slug": "performance", + "title": "Performance", + "blurb": "Results and analysis of timing tests for the various approaches.", + "authors": ["colinleach", + "BethanyG"] + } + ] +} diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/code/Benchmark.py b/exercises/practice/pythagorean-triplet/.articles/performance/code/Benchmark.py new file mode 100644 index 00000000000..a00e3fc68dd --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.articles/performance/code/Benchmark.py @@ -0,0 +1,124 @@ +import timeit +import pandas as pd +import numpy as np + + +n_values = (12, 30, 100, 300, 1_000, 3_000, 10_000, 30_000, 100_000) +col_headers = [str(n) for n in n_values] +row_headers = ["cubic", "quad_loose", "quad_tight", "linear_loop", "linear_comp"] + +# empty dataframe will be filled in one cell at a time later +df = pd.DataFrame(np.nan, index=row_headers, columns=col_headers) + +# create a dictionary with all the solution codes + +code = { + 'cubic': """ +def triplets_with_sum(number): + triplets = [] + for a in range(1, number + 1): + for b in range(a + 1, number + 1): + for c in range(b + 1, number + 1): + if a**2 + b**2 == c**2 and a + b + c == number: + triplets.append([a, b, c]) + return triplets +""", + + 'quad_loose': """ +def triplets_with_sum(number): + triplets = [] + for a in range(1, number + 1): + for b in range(a + 1, number + 1): + c = number - a - b + if a ** 2 + b ** 2 == c ** 2: + triplets.append([a, b, c]) + return triplets +""", + + 'quad_tight': """ +def triplets_with_sum(number): + result = [] + for c in range(5, number - 1): + c_sq = c * c + for a in range(3, (number - c + 1) // 2): + b = number - a - c + if a < b < c and a * a + b * b == c_sq: + result.append([a, b, c]) + return result +""", + + 'linear_loop': """ +from math import sqrt + +def triplets_with_sum(number): + N = float(number) + triplets = [] + for c in range(int(N / 2) - 1, int((sqrt(2) - 1) * N), -1): + D = sqrt(c ** 2 - N ** 2 + 2 * N * c) + if D == int(D): + triplets.append([int((N - c - D) / 2), int((N - c + D) / 2), c]) + return triplets +""", + + 'linear_comp': """ +def triplets_with_sum(number): + def calculate_medium(small): + return (number ** 2 - 2 * number * small) / (2 * (number - small)) + + two_sides = ((int(medium), small) for small in range(3, number // 3) + if small < (medium := calculate_medium(small)) + and medium.is_integer()) + + return [[small, medium, (medium ** 2 + small ** 2) ** 0.5] + for medium, small in two_sides] +""" +} + +# Workaround for needing to do fewer runs with slow code + +run_params = { + 'cubic': (5, n_values[:5]), + 'quad_loose': (0, n_values[:-1]), + 'quad_tight': (0, n_values[:-1]), + 'linear_loop': (1000, n_values), + 'linear_comp': (1000, n_values) +} + +# Run the timing tests - SLOW! + +for descriptor in row_headers: + loops = run_params[descriptor][0] + for n in run_params[descriptor][1]: + # ugly hack for the quadratic runs + if descriptor.startswith('quad'): + loops = 10 if n <= 10_000 else 3 + + # including a string comprehension in the timed part of the run would + # normally be a bad idea. + # For the slow runs, the overhead is insignificant in this exercise. + function_call = f"triplets_with_sum({n})" + val = timeit.timeit(function_call, code[descriptor], number=loops) / loops + + print(f"{descriptor}, n = {n:6n}: {val:.2e}") + df.loc[descriptor, str(n)] = val + +# Save the data to avoid constantly regenerating it + +df.to_feather('run_times.feather') +print("\nDataframe saved to './run_times.feather'") + +# The next bit will be useful for `introduction.md` +pd.options.display.float_format = '{:,.2e}'.format +print('\nDataframe in Markdown format:\n') +print(df.to_markdown(floatfmt=".1e")) + + +# To plot and fit the slopes, the df needs to be log10-transformed and transposed + +pd.options.display.float_format = '{:,.2g}'.format +log_n_values = np.log10(n_values) +df[df == 0.0] = np.nan +transposed = np.log10(df).T +transposed = transposed.set_axis(log_n_values, axis=0) +transposed.to_feather('transposed_logs.feather') +print("\nDataframe saved to './transposed_logs.feather'") diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/code/create_plots.py b/exercises/practice/pythagorean-triplet/.articles/performance/code/create_plots.py new file mode 100644 index 00000000000..59fa8896e10 --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.articles/performance/code/create_plots.py @@ -0,0 +1,42 @@ +import matplotlib as mpl +import matplotlib.pyplot as plt +import pandas as pd + + +# These dataframes are slow to create, so they should be saved in Feather format + +try: + df = pd.read_feather('./run_times.feather') +except FileNotFoundError: + print("File './run_times.feather' not found!") + print("Please run './Benchmark.py' to create it.") + exit(1) + +try: + transposed = pd.read_feather('./transposed_logs.feather') +except FileNotFoundError: + print("File './transposed_logs.feather' not found!") + print("Please run './Benchmark.py' to create it.") + exit(1) + +# Ready to start creating plots + +mpl.rcParams['axes.labelsize'] = 18 + +# bar plot of actual run times +ax = df.plot.bar(figsize=(10, 7), + logy=True, + ylabel="time (s)", + fontsize=14, + width=0.8, + rot=0) +plt.savefig('../timeit_bar_plot.svg') + +# log-log plot of times vs n, to see slopes +transposed.plot(figsize=(8, 6), + marker='.', + markersize=10, + ylabel="$log_{10}(time)$ (s)", + xlabel="$log_{10}(n)$", + fontsize=14) +plt.savefig('../slopes.svg') diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/code/fit_gradients.py b/exercises/practice/pythagorean-triplet/.articles/performance/code/fit_gradients.py new file mode 100644 index 00000000000..22ace93f819 --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.articles/performance/code/fit_gradients.py @@ -0,0 +1,38 @@ +import pandas as pd +import numpy as np +from numpy.linalg import lstsq + + +# These dataframes are slow to create, so they should be saved in Feather format + +try: + transposed = pd.read_feather('./transposed_logs.feather') +except FileNotFoundError: + print("File './transposed_logs.feather' not found!") + print("Please run './Benchmark.py' to create it.") + exit(1) + +n_values = (12, 30, 100, 300, 1_000, 3_000, 10_000, 30_000, 100_000) +log_n_values = np.log10(n_values) +row_headers = ["cubic", "quad_loose", "quad_tight", "linear_loop", "linear_comp"] + + +# Do a least-squares fit to get the slopes, working around missing values +# Apparently, it does need to be this complicated + +def find_slope(name): + log_times = transposed[name] + missing = np.isnan(log_times) + log_times = log_times[~missing] + valid_entries = len(log_times) + A = np.vstack([log_n_values[:valid_entries], np.ones(valid_entries)]).T + m, _ = lstsq(A, log_times, rcond=None)[0] + return m + + +# Print the slope results +slopes = [(name, find_slope(name)) for name in row_headers] +print('\nSlopes of log-log plots:') +for name, slope in slopes: + print(f'{name:>14} : {slope:.2f}') + diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/code/run_times.feather b/exercises/practice/pythagorean-triplet/.articles/performance/code/run_times.feather new file mode 100644 index 0000000000000000000000000000000000000000..6e93c9c3705b8846b936ce53a60118d5d2c7a26a GIT binary patch literal 5426 zcmeHLVQ5=b6uxcKHk4RXXr-NHb`O;aZI-mF%rf4~(1}y0tZs9-ZrN+{+NHj{#5Ac5 zAubk#{u?p}YjwrxkI7b8Q$a-bV?VTRE|oH!PMrz^#W}H?18u=s&$;j3G<|ugINu+k z7w)_7ob#RY-FtKIx$kCEXXn$;dI(v7@_r>DPJ*S1RFeu)PVDdorNoxgJ3zEC{cGj; z4rZvRx5E={cKWWzs(|;1tOewLLiPYYDFn5EGU)S%)p$tj7do1D3qGH&QM2HQALi0qcriT!S1LW zO<>-MB;;r`{334o0VUcW3~Q+Mw7T3bj{tdrv*#IGT^`qZAqBY>^Yw?2kcXjmL3Lwt zhB^byn@}TAy-+c4$NpMa0Ao4k{IyI64XJ@zkdvw1M*YThzcTczr+Rom!CbHZHiV2( zOewik1Pez2q3Z_PPZ-t)XAtzKATDBQTxfr1Sa%UDjtk>LLj;N`C0)y4F)m`CV%Xt4 zY%S<99+onw5SI)v+{7|Ch)Kp6?ott4DUC`SbPIlhvbI}M`-VgNi zw88_hh{ri6q?DfDyTFKj#^QiqMnAzFG$JXp5sc%+fw1BLjT0i{xsB)dAgmL_U<{md zUhr2({yMHOX(guf8f;wV_O!vyWo|!XuydK)>BmMs2G=(fZuc@7*Cm!+2Ky0%eZpYB zZm`>N!lCf^k1?6Y-^b*39Y&5YnTa`h9aEfT_RSmsE$-R;`9_mfG8;}V-apBFc^LUI zKVY#5TkH3Ehd@k^j*N_C?AOE|C{v$b5K(S8mk}qA&K~bkj*3_B_t?i1$3*I9;*a|8 z?prHOXR}$`+&qo*>)wmvSD*gz%+0d5#S=F!Z2fujWKn;h9eA!WB?0ez?H6tdWM4M_|GK2?pW;w{~96v2Ck6$y6<0-+V_62W%pY@h_77Rw&CQ#KXrdT?`-}+Os~cX zC^9dE%pI7{UVh;8-7`2pJMY;zDW#`6KD4wcl&-lntuM^JhFL(SZNe-`Q8b& z0qVU_`S#1bFuV(FH;6yp9rV8tmjgaE9PU@D$V4pIyFaD|HARlzMb1R@H@LKfBH(}p z()I$jcm_HkV`4UTfP{Gzv$YqVI3~=S_#2&tyA>9mFY@!SZoy|2^V>Gzud)BWlvIKMa1|4RP`vykWi literal 0 HcmV?d00001 diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/code/transposed_logs.feather b/exercises/practice/pythagorean-triplet/.articles/performance/code/transposed_logs.feather new file mode 100644 index 0000000000000000000000000000000000000000..aacb2e1c55fe3425a528b60980a3bd4c3ab15631 GIT binary patch literal 4010 zcmeHKZ%kWN6u+YdI;dU89|1?SdoFQq$ObMr8A>r|SO%HEjM;E`Jla>*<@LQ3+NjCjprrFFKb6M07A#pUQ!E^4r4_YW$V%P^0 zZgSs!=bYa?=ib|MfA6%Su5S11MTE>k8%QR^POzmB8%ZI_!~$>7R+;DYRuIixpKr!@ zFhfVZ1)gZLuy@^83cULz)h)FUa>qnShu~G+a=Yl#l#owt5lS5mf+*@bMQ)W9ajPf_ z`wJX`j?hgq19k|i>f%yyx(bAB=Ls}NkFCa()!#4TiAycrzrqMMcwE*mmM$#@il7;Gnnp06VV)f#+zdtMm0$Lj$ ze5)L2@oFk+i;4@k78VJRm`3;*V{u_o;WnWYt~{^@Ar2sh8!(=M9)#|Mj(lMOt^w9C zA=hl+IvB`W=!>`jbSVsO@DdBkkgYlSJ#KZ9eOW=_M*#%>H zPRKEizxE(JyR3v<=6E~F0&Ywpsf;b{oKy94h}{p+Fk##RSj6L55|YH8;A-|=IM*yD zA@Amb9hut7yb_a{ps!vX&QMm+*cmhHn0uf_4Ek8j5c?lXc@-u`BitDb3F6MqTNlNJWps5H_m!@9 zXVaTw7pI4n40^6He{Af6mEQSzWA!9Srair*{rTh3k%sEWXY2*zBVotT`9SlHa!-9_ zS55Q%a@{>&#D9qY{h3pH)9>cc3ERorH#cX~q|Z*aP7K?r@cw>hWNBF^%tRH zfA$xL8m2}*-1pJTk^0X^wp?2`_)YC^@$na&EAH>ai*2@CEswgsq?>yG&ZV}j%Irf| za_DH)_2)1AnoUo2dY|4jl}USsYfW{w4C-oZk$;G;p&#rzU42-v(t~duyL$e;RLb_5 z!2j)F`jZYJkCqNp_{=}%&;v&=AO7jtN9ldf8{tpNvZ&x_vq>*y(6dJd``+1Sr|s5H zcXW4K>5L~(=)Yv4yX2C#sxosz|1Ut$sierq<45Z&C(9#$_W!bTa2kLAJ|~^|YHGu` zCg&QGX|a>cbUXROWOp|H;|}-TTkB+tC0Tsb>p0XQw_4$bBZF8$((kmHl!1ODbi5CE z2hJkdK1pNB + + + + + + + 2024-01-02T11:59:37.790181 + image/svg+xml + + + Matplotlib v3.8.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/snippet.md b/exercises/practice/pythagorean-triplet/.articles/performance/snippet.md new file mode 100644 index 00000000000..53d6247de60 --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.articles/performance/snippet.md @@ -0,0 +1,8 @@ +def triplets_with_sum(n): + def calculate_medium(small): + return (n ** 2 - 2 * n * small) / (2 * (n - small)) + two_sides = ((int(medium), small) for small in range(3, n // 3) + if small < (medium := calculate_medium(small)) + and medium.is_integer()) + return [[small, medium, (medium ** 2 + small ** 2) ** 0.5] + for medium, small in two_sides] \ No newline at end of file diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg b/exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg new file mode 100644 index 00000000000..11a2031b8fb --- /dev/null +++ b/exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg @@ -0,0 +1,1986 @@ + + + + + + + + 2024-01-02T11:59:37.617935 + image/svg+xml + + + Matplotlib v3.8.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 3e74e10ec18b190d0f7e20f4101a5d3aec435313 Mon Sep 17 00:00:00 2001 From: colinleach Date: Thu, 4 Jan 2024 16:44:08 -0700 Subject: [PATCH 2/4] [Leap]: Add `isleap` approach and improved benchmarks --- .../.articles/performance/timeit_bar_plot.svg | 1049 +++++++++++++++++ 1 file changed, 1049 insertions(+) create mode 100644 exercises/practice/leap/.articles/performance/timeit_bar_plot.svg diff --git a/exercises/practice/leap/.articles/performance/timeit_bar_plot.svg b/exercises/practice/leap/.articles/performance/timeit_bar_plot.svg new file mode 100644 index 00000000000..1d11bd91fde --- /dev/null +++ b/exercises/practice/leap/.articles/performance/timeit_bar_plot.svg @@ -0,0 +1,1049 @@ + + + + + + + + 2024-01-04T15:50:15.840049 + image/svg+xml + + + Matplotlib v3.8.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 65b055a8e979ff93ea69aac8c2944e2a3a16e188 Mon Sep 17 00:00:00 2001 From: colinleach Date: Thu, 4 Jan 2024 16:51:47 -0700 Subject: [PATCH 3/4] Delete exercises/practice/leap/.articles/performance/timeit_bar_plot.svg Sorry, I committed this to the wrong branch. Embarrassingly amateurish! --- .../.articles/performance/timeit_bar_plot.svg | 1049 ----------------- 1 file changed, 1049 deletions(-) delete mode 100644 exercises/practice/leap/.articles/performance/timeit_bar_plot.svg diff --git a/exercises/practice/leap/.articles/performance/timeit_bar_plot.svg b/exercises/practice/leap/.articles/performance/timeit_bar_plot.svg deleted file mode 100644 index 1d11bd91fde..00000000000 --- a/exercises/practice/leap/.articles/performance/timeit_bar_plot.svg +++ /dev/null @@ -1,1049 +0,0 @@ - - - - - - - - 2024-01-04T15:50:15.840049 - image/svg+xml - - - Matplotlib v3.8.0, https://matplotlib.org/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From 6c71920de966e3173473affe031b027314494774 Mon Sep 17 00:00:00 2001 From: BethanyG Date: Wed, 17 Jan 2024 12:33:22 -0800 Subject: [PATCH 4/4] Suggestions, Edits, etc. Suggestions and edits. Because we need to PR to a different repo, the `.svg` images have been removed, and the text adjusted. --- .../.approaches/cubic/content.md | 6 +- .../.approaches/introduction.md | 118 +- .../.approaches/linear/content.md | 30 +- .../.approaches/quadratic/content.md | 8 +- .../.articles/performance/content.md | 26 +- .../.articles/performance/slopes.svg | 1516 ------------- .../.articles/performance/timeit_bar_plot.svg | 1986 ----------------- 7 files changed, 124 insertions(+), 3566 deletions(-) delete mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/slopes.svg delete mode 100644 exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg diff --git a/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md b/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md index f0baaf7c8bc..9611745c2ae 100644 --- a/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md +++ b/exercises/practice/pythagorean-triplet/.approaches/cubic/content.md @@ -12,8 +12,12 @@ def triplets_with_sum(n): ``` The strategy in this case is to scan through all three variables and test them in the innermost loop. +But the innermost loop will test all variables _every time_ the enclosing loop iterates. +And the enclosing loop up will iterate through all of its range _every time_ the outermost loop iterates. -This gives code that is simple, clear, and so slow as to be useless for all but the smallest values of `n`. +So the 'work' this code has to do for each additional value in `range(1, n+1)` is `n**3`. + +This gives code that is simple, clear, ...and so slow as to be useless for all but the smallest values of `n`. We could tighten up the bounds on loop variables: for example, `a` is the smallest integer of a triplet that sums to `n`, so inevitably `a < n //3`. diff --git a/exercises/practice/pythagorean-triplet/.approaches/introduction.md b/exercises/practice/pythagorean-triplet/.approaches/introduction.md index 8e6202a90a7..ab79ef1afde 100644 --- a/exercises/practice/pythagorean-triplet/.approaches/introduction.md +++ b/exercises/practice/pythagorean-triplet/.approaches/introduction.md @@ -1,27 +1,31 @@ # Introduction -The main challenge in solving the Pythagorean Triplet exercise is to use a fast enough algorithm. -The problem can easily become very large, and simple solutions may time out on the test runner. +The main challenge in solving the Pythagorean Triplet exercise is to come up with a 'fast enough' algorithm. +The problem space can easily become very large, and 'naive' or more 'brute force' solutions may time out on the test runner. -There are three reasonably common solutions to this problem -- a [cubic time][approaches-cubic] solution, which is fairly obvious -- a [quadratic time][approaches-quadratic] solution, which is reasonably easy to find -- a [linear time][approaches-linear] solution, requiring some deeper understanding of the mathematics +There are three reasonably common variations to this problem +1. A [cubic time][approaches-cubic] solution, which uses highly nested loops and is non-performant. +2. A [quadratic time][approaches-quadratic] solution, which uses one nested loop, and is reasonably easy to figure out. +3. A [linear time][approaches-linear] solution, requiring some deeper understanding of the mathematics of finding trplets. -If those terms are unclear to you, you might like to read about [time complexity][time-complexity]. -The basic idea is to study how algorithms scale (in run time, memory usage, or whatever) as the input parameter becomes very large. +If those terms are unclear to you, you might like to read about [time complexity][time-complexity], and how it is described by [asymptotic notation][asymptotic-notation]. + +The basic idea is to study how algorithms scale (_in CPU/GPU run time, memory usage, or other resource_) as the input parameters grow toward infinity. In this document we will focus on run time, which is critical for this exercise. + ## General guidance -The goal of the Pythagorean Triplets exercise is to find combinations of three numbers `[a, b, c]` satisfying a set of conditions: +The goal of `Pythagorean Triplets` is to find combinations of three numbers `[a, b, c]` satisfying a set of conditions: + 1. `a < b < c` -2. `a**2 + b**2 == c**2` +2. `a**2 + b**2 == c**2` (_otherwise known as the [Pythagorean theorem][Pythagorean-theorem]_) 3. `a + b + c == n`, where `n` is supplied as a parameter to the function. For a given `n`, the solution should include all valid triplets as a list of lists. + ## Approach: Cubic time solution with 3-deep nested loops ```python @@ -35,12 +39,16 @@ def triplets_with_sum(n): return triplets ``` -This is the most naive approach, scanning through all possible integers `<= n` that satisfy `a < b < c`. +This is the most 'naive' or 'brute force' approach, scanning through all possible integers `<= n` that satisfy `a < b < c`. +This might be the first thing you think of when sketching out the algorithm on paper following the exercise instructions. +It is useful to see the steps of the solution and to look at the size of the problem space. + +***Don't implement this in code!*** -***Don't do this!*** +While it is a valid solution and it indeed works for small values of `n`, it becomes impossibly slow as `n` grows larger. +For any truly large values of `n`, this code might take over all the available processing power on your local computer and never complete. +For Exercism's online environment, the test suite will time out and fail. -It works for small values of `n`, but becomes impossibly slow as `n` grows larger. -The test suite will not complete within any reasonable time limit. ## Approach: Quadratic time solution with 2-deep nested loops @@ -55,15 +63,17 @@ def triplets_with_sum(n): return triplets ``` -Given the constraint that `a + b + c == n`, we can eliminate the innermost loop and calculate `c` directly. -This gives a substantial speed advantage, allowing the tests to run to completion in a reasonable time, locally. +Given the constraint that `a + b + c == n`, we can eliminate the innermost loop from the cubic approach and calculate `c` directly. +This gives a substantial speed advantage, allowing the tests to run to completion in a reasonable time, _locally_. -However, the Exercism test runner will still time out. +However, the Exercism online test runner will still time out with this solution. -Examining the code. it is clear that the upper bounds on loop variables are far too generous. +Examining the code, it is clear that the upper bounds on the `loop` variables are far too generous, and too much work is bing done. + + +The solution below tightens the bounds and pre-calculates `c * c` in the outer `loop`. +This gives about a 4-fold speedup, but still times out on the online test runner: -The solution below tightens the bounds and pre-calculates `c * c` in the outer loop. -This gives about a 4-fold speedup, but still times out on the test runner. ```python def triplets_with_sum(n): @@ -79,13 +89,14 @@ def triplets_with_sum(n): If a quadratic-time algorithm was the best available option, there are other ways to squeeze out small performance gains. -For bigger problems outside Exercism, there are third-party packages such as `numpy` or `numba` which replace Python -loops (versatile but relatively slow) with routines written in C/C++, perhaps with use of the GPU. -Runtime is still proportional to `n**2`, but the proportionality constant may be much smaller. +For bigger problems outside Exercism, there are third-party packages such as [`numpy`][numpy] or [`numba`][numba] which replace Python +loops (_versatile but relatively slow_) with routines written in C/C++, perhaps with use of the GPU. +The runtime is still proportional to `n**2`, but the proportionality constant (_which would be measured in C/C++ as opposed to Python_) may be much smaller. Fortunately for the present discussion, mathematicians have been studying Pythagorean Triplets for centuries: see [Wikipedia][wiki-pythag], [Wolfram MathWorld][wolfram-pythag], or many other sources. -There are much faster algorithms, at the expense of reduced readability. +So mathematically there are much faster algorithms, at the expense of reduced readability. + ## Linear time solutions @@ -102,16 +113,20 @@ def triplets_with_sum(n): return triplets ``` -All clear? +_All clear?_ 😉 + +After some thoughtful mathematical analysis, there is now only a single loop! -After some thoughtful mathematical analysis, there is now only a single loop. +Run time is now much faster, especially for large `n`, but a reasonable person could find it quite difficult to understand what the code is doing. -Run time is now much faster, especially for large `n`, but a reasonable person could find it difficult to understand what the code is doing. +If you do things like this out in the 'real world' ***please*** document your code carefully. +It might also be helpful to choose variable names that are more descriptive to help readers understand all of the values and operations. +In a few weeks, the bare code will puzzle your future self. +People reading it for the first time are likely to struggle even more than you will. -If you do things like this out in the real world ***please*** document your code carefully. -In a few weeks, the bare code will puzzle even yourself, and people seeing for the first time are likely to struggle. +The code above uses fairly 'generic' programming syntax. +Another submission used the same basic algorithm but in a more 'Pythonic' way: -The code above uses fairly mainstream syntax. Another submission used the same basic algorithm but in a more pythonic way: ```python def triplets_with_sum(n): @@ -126,23 +141,52 @@ def triplets_with_sum(n): for medium, small in two_sides] ``` +Although it is important to note that this solution could have chosen a better name for the `n` parameter, and clearer formatting for the `generator-expression` and the `list-comprehension`: + +```python +def triplets_with_sum(number): + def calculate_medium(small): + + # We have two numbers, but need the third. + return (number ** 2 - 2 * number * small) / (2 * (number - small)) + + two_sides = ( + (int(medium), small) for + small in range(3, number // 3) if + + #Calls calculate_medium and assigns return value to variable medium + small < (medium := calculate_medium(small)) and + medium.is_integer() + ) + + return [ + [small, medium, (medium ** 2 + small ** 2) ** 0.5] + for medium, small in two_sides + ] +``` + + ## Which approach to use? If we could be sure that the code only had to handle small values of `n`, a quadratic method would have the advantage of clarity. -However, the test suite goes up to 30_000, and the online test runner times out. -We need to accept some less readable code and use a linear-time implementation. +However, the test suite goes up to 30_000, and the online test runner quickly times out. +We therefor need to accept some less readable code and use a linear-time implementation. Full details of run-time benchmarking are given in the [Performance article][article-performance]. -Overall, the results confirm the expectation that the linear-time methods are very much faster. -More surprisingly, the first example (with an explicit loop) proved slightly faster than the more Pythonic code. +Overall, the results confirm the expectation that the linear-time methods are _very much_ faster. +More surprisingly, the first example of the linear implementation (_with an explicit loop_) proved slightly faster than the more 'Pythonic' code. +This is likely due to the overhead of creating and tracking the iterator for the `generator-expression`, calculating the 'expensive' `calculate_medium()` function call within that generator, and the additional 'expensive' conversions to `int()`. +[Pythagorean-theorem]: https://en.wikipedia.org/wiki/Pythagorean_theorem [approaches-cubic]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/cubic -[approaches-quadratic]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/quadratic [approaches-linear]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/linear -[time-complexity]: https://en.wikipedia.org/wiki/Time_complexity +[approaches-quadratic]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches/quadratic [article-performance]:https://exercism.org/tracks/python/exercises/pythagorean-triplet/articles/performance +[asymptotic-notation]: https://www.khanacademy.org/computing/computer-science/algorithms/asymptotic-notation/a/asymptotic-notation +[numba]: https://numba.pydata.org/ +[numpy]: https://numpy.org/ +[time-complexity]: https://yourbasic.org/algorithms/time-complexity-explained/ [wiki-pythag]: https://en.wikipedia.org/wiki/Pythagorean_triple [wolfram-pythag]: https://mathworld.wolfram.com/PythagoreanTriple.html - diff --git a/exercises/practice/pythagorean-triplet/.approaches/linear/content.md b/exercises/practice/pythagorean-triplet/.approaches/linear/content.md index 06b4b3f7d61..93dfa8448cd 100644 --- a/exercises/practice/pythagorean-triplet/.approaches/linear/content.md +++ b/exercises/practice/pythagorean-triplet/.approaches/linear/content.md @@ -1,5 +1,12 @@ # Linear-time algorithms with no nested loops + +The key point with this approach is that we only loop over the variable `c` in a specific range. +Some mathematical analysis (_essentially, [solving simultaneous equations][simultaneous-equasions]_) then allows us to find valid values of `a` and `b`. + +Other than that, the code syntax below is fairly mainstream across programming languages. +A related approach instead loops over `a`. + ```python from math import sqrt @@ -13,13 +20,17 @@ def triplets_with_sum(n): return triplets ``` -The key point with this approach is that we only loop over the variable `c`. -Some mathematical analysis (essentially, solving simultaneous equations) then allows us to find valid values of `a` and `b`. -Other than that, the code syntax above is fairly mainstream. +This second code example has no explicit `for` loop (_in Python syntax_), but the `generator-expression` and the `list-comprehension` both translate to `FOR_ITER` at the bytecode level. + So this solution is essentially the same as the first, written in a more 'Pythonic' syntax -- but that syntax does incur a small overhead in performance. + The performance hit is likely due to the extra instructions in the bytecode used to manage the `generator-expression` (_pausing the loop, resuming the loop, yielding results_) and then calling or unpacking the generator in the `list comprehension`. + However, you would have to carefully profile the code to really determine the slowdown. + + With all that said, using a `generator` or `generator-expression` with or without a `list-comprehension` might be a better strategy if your code needs to process a very large number of triplets, as it avoids storing all the results in memory until they need to be returned. + Using a `generator` or `generator-expression` by itself can also nicely set up a scenario where results are "streamed" or emitted 'on demand' for another part of the program or application. + + For more details on what these two solutions look like at the byte code level, take a look at Pythons [`dis`][dis] module. -A related approach instead loops over `a`. -The code below has no explicit `for` loop, but the comprehensions do essentially the same thing in a more Pythonic way. ```python def triplets_with_sum(n): @@ -34,14 +45,15 @@ def triplets_with_sum(n): for medium, small in two_sides] ``` + Some implementation details to notice: -- Nested functions, with the inner function able to reference variables such as `n` in the outer function. -- The first comprehension creates `two_sides` as a lazily-evaluated iterator. +- Nested functions, with the inner function able to reference variables such as `n` passed into the outer function. +- The generator expression creates `two_sides` as a lazily-evaluated iterator (_smaller memory footprint_) - The [`walrus operator`][walrus-operator] `:=` is new in Python 3.8. - The `is_integer()` method replaces `if D == int(D)`. - Using `** 0.5` to calculate the square roots avoids a `math` import. -# _Bethany_, this is your submission -Is there anything else you want to say about it? +[dis]: https://docs.python.org/3/library/dis.html +[simultaneous-equasions]: https://thirdspacelearning.com/gcse-maths/algebra/simultaneous-equations/ [walrus-operator]: https://mathspp.com/blog/pydonts/assignment-expressions-and-the-walrus-operator diff --git a/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md b/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md index 73912fa4cf7..15f2055f9ac 100644 --- a/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md +++ b/exercises/practice/pythagorean-triplet/.approaches/quadratic/content.md @@ -15,14 +15,16 @@ Because `a + b + c == n`, we only loop over `a` and `b`. The third variable `c` is then predictable. The above code loops over the full range of both variables. -We know enough about the problems to tighten this up. +This means the 'work' this code has to do for each additional value in `range(1, n+1)` is `n**2`. +We know enough about the problems to tighten this up a bit. For example: - The smallest pythagorean is (famously) `[3, 4, 5]`, so `a >= 3` - `a + b == n - c` and `a <= b`, so `a <= (n - c) // 2` We can also avoid, to some extent, repeating the same multiplication many times. -This gets us to the code below. +This gets us to the code below: + ```python def triplets_with_sum(n): @@ -39,7 +41,7 @@ def triplets_with_sum(n): We could have done a bit better. Though not stated in the problem description, `a + b > c`, otherwise they could not form a triangle. -This means that `c <= n // 2`, reducing the outer loop. +This means that `c <= n // 2`, reducing the iterations in the outer loop. However, these optimizations only reduce the run time by a small factor. They do almost nothing to make the algorithm scale to large `n`. diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/content.md b/exercises/practice/pythagorean-triplet/.articles/performance/content.md index 469f49d7912..72e23c5d213 100644 --- a/exercises/practice/pythagorean-triplet/.articles/performance/content.md +++ b/exercises/practice/pythagorean-triplet/.articles/performance/content.md @@ -3,15 +3,17 @@ The [Approaches page][approaches-page] discusses three ways to approach this exercise, with very different performance. Adding in some ways to vary the coding details, we will discuss 5 implementations. + ## Cubic-time code We need to find sets of three variables meeting some criteria, so the most naive approach is to scan over the variables in nested loops, with a test for the criteria in the innermost loop. -This is simple but _very_ slow, and the run-time increases proportional to `n ** 3`. +This is simple and clear but _very_ slow, and the run-time increases proportional to `n ** 3`. When tested, `n = 1_000` took about 8 seconds to complete, and we can extrapolate that `n = 100_000` would take nearly 3 months. This is impractical! + ## Quadratic-time code For `cubic`, the loops were nested 3-deep. @@ -35,15 +37,17 @@ As a general principle: if run time varies as `a * n ** x`, this sort of coding The exponent `x` is ***very much*** more important, and only a better algorithm can change it. + ## Linear-time code The approaches discussed above have a strictly programming focus. To take the next step, we need to look at the problem as mathematicians — or as programmers who read what real mathematicians have already published. The [Approaches page][approaches-page] discusses two implementations with code that looks very different but uses a similar underlying algorithm. -These are shown in the analyses as `linear_loop` and `linear_comp` (the latter using list comprehensions). +These are shown in the analyses as `linear_loop` and `linear_comp` (the latter using generator expressions and list comprehensions). Performance is similarly impressive in both cases. + ## Measured timings The five code implementations were [benchmarked][benchmark-code], using appropriate values for the upper limit of `n` and number of runs too average over, to keep the total testing time reasonable. @@ -58,27 +62,21 @@ Numerical results are tabulated below. | linear_loop | 4.4e-07 | 5.7e-07 | 1.1e-06 | 3.4e-06 | 1.1e-05 | 3.1e-05 | 1.0e-04 | 3.1e-04 | 1.2e-03 | | linear_comp | 5.3e-07 | 1.1e-06 | 2.8e-06 | 9.0e-06 | 2.8e-05 | 8.3e-05 | 2.8e-04 | 8.1e-04 | 3.1e-03 | -Note the missing values, which also affect the graphical representation: - -![](timeit_bar_plot.svg) - -Also, note the logarithmic y-axis. +Note the missing values, which also affect the graphical representation. +Also, note the logarithmic y-axis in the graph output when running the Benchmark.py script. These run times vary over more than 7 orders of magnitude! + ## Testing algorithmic complexity -We have discussed these solutions as `cubic`, `quadratic` or `linear. +We have discussed these solutions as `cubic`, `quadratic` or `linear`. Do the experimental data support this? For a [power law][power-law] relationship, we have a run time `t` given by `t = a * n**x`, where `a` is a proportionality constant an `x` is the power. Taking logs of both sides, `log(t) = x * log(n) + constant.` -Plots of `log(t)` against `log(n)` will be a straight line with slope equal to the power `x`. - -This is how the graphs look: - -![](slopes.svg) +Plots of `log(t)` against `log(n)` will be a straight line with slope equal to the power `x`, which you can produce by running the Benchmark.py code. Encouragingly, these are all straight lines for larger values of `n`, as we expected. @@ -101,4 +99,4 @@ Removing these points and fitting only the linear portion would give a slope clo [approaches-page]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/approaches [benchmark-code]: https://exercism.org/tracks/python/exercises/pythagorean-triplet/ -[power-law]: https://en.wikipedia.org/wiki/Power_law \ No newline at end of file +[power-law]: https://en.wikipedia.org/wiki/Power_law diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/slopes.svg b/exercises/practice/pythagorean-triplet/.articles/performance/slopes.svg deleted file mode 100644 index 75422e9aa44..00000000000 --- a/exercises/practice/pythagorean-triplet/.articles/performance/slopes.svg +++ /dev/null @@ -1,1516 +0,0 @@ - - - - - - - - 2024-01-02T11:59:37.790181 - image/svg+xml - - - Matplotlib v3.8.0, https://matplotlib.org/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg b/exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg deleted file mode 100644 index 11a2031b8fb..00000000000 --- a/exercises/practice/pythagorean-triplet/.articles/performance/timeit_bar_plot.svg +++ /dev/null @@ -1,1986 +0,0 @@ - - - - - - - - 2024-01-02T11:59:37.617935 - image/svg+xml - - - Matplotlib v3.8.0, https://matplotlib.org/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -