diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 39daf3dd7f88..b865f88350ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.9 + rev: v0.15.15 hooks: - id: ruff-check - id: ruff-format @@ -32,7 +32,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.21.0 + rev: v2.23.0 hooks: - id: pyproject-fmt diff --git a/DIRECTORY.md b/DIRECTORY.md index ca454bd5fd82..daf71bab8162 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -471,6 +471,8 @@ * [Geometry](geometry/geometry.py) * [Graham Scan](geometry/graham_scan.py) * [Jarvis March](geometry/jarvis_march.py) + * [Ramer Douglas Peucker](geometry/ramer_douglas_peucker.py) + * [Segment Intersection](geometry/segment_intersection.py) * Tests * [Test Graham Scan](geometry/tests/test_graham_scan.py) * [Test Jarvis March](geometry/tests/test_jarvis_march.py) @@ -523,6 +525,7 @@ * [Graphs Floyd Warshall](graphs/graphs_floyd_warshall.py) * [Greedy Best First](graphs/greedy_best_first.py) * [Greedy Min Vertex Cover](graphs/greedy_min_vertex_cover.py) + * [Johnson](graphs/johnson.py) * [Kahns Algorithm Long](graphs/kahns_algorithm_long.py) * [Kahns Algorithm Topo](graphs/kahns_algorithm_topo.py) * [Karger](graphs/karger.py) @@ -543,6 +546,7 @@ * [Strongly Connected Components](graphs/strongly_connected_components.py) * [Tarjans Scc](graphs/tarjans_scc.py) * Tests + * [Test Johnson](graphs/tests/test_johnson.py) * [Test Min Spanning Tree Kruskal](graphs/tests/test_min_spanning_tree_kruskal.py) * [Test Min Spanning Tree Prim](graphs/tests/test_min_spanning_tree_prim.py) diff --git a/geometry/ramer_douglas_peucker.py b/geometry/ramer_douglas_peucker.py new file mode 100644 index 000000000000..a03bbb2e5086 --- /dev/null +++ b/geometry/ramer_douglas_peucker.py @@ -0,0 +1,184 @@ +""" +Ramer-Douglas-Peucker polyline simplification algorithm. + +Given a sequence of 2-D points and a tolerance epsilon, the algorithm +reduces the number of points while preserving the overall shape of the curve. + +Time complexity: O(n log n) average, O(n²) worst case +Space complexity: O(n) + +References: + https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm +""" + +from __future__ import annotations + +import math + + +def _euclidean_distance( + point_a: tuple[float, float], + point_b: tuple[float, float], +) -> float: + """Return the Euclidean distance between two 2-D points. + + >>> _euclidean_distance((0.0, 0.0), (3.0, 4.0)) + 5.0 + >>> _euclidean_distance((1.0, 1.0), (1.0, 1.0)) + 0.0 + """ + return math.hypot(point_b[0] - point_a[0], point_b[1] - point_a[1]) + + +def _perpendicular_distance( + point: tuple[float, float], + line_start: tuple[float, float], + line_end: tuple[float, float], +) -> float: + """Return the distance from *point* to the line **segment** between + *line_start* and *line_end*. + + When the perpendicular projection of *point* onto the infinite line falls + within the segment, this equals the perpendicular distance to that line. + When the projection falls outside the segment, the distance to the nearest + endpoint is returned instead (projection clamped to [0, 1]). + + This is the correct distance measure for the Ramer-Douglas-Peucker + algorithm: using the infinite-line distance can incorrectly discard points + whose projection lies beyond a segment endpoint. + + >>> _perpendicular_distance((4.0, 0.0), (0.0, 0.0), (0.0, 3.0)) + 4.0 + >>> # order of line_start and line_end does not affect the result + >>> _perpendicular_distance((4.0, 0.0), (0.0, 3.0), (0.0, 0.0)) + 4.0 + >>> _perpendicular_distance((4.0, 1.0), (0.0, 1.0), (0.0, 4.0)) + 4.0 + >>> _perpendicular_distance((2.0, 1.0), (-2.0, 1.0), (-2.0, 4.0)) + 4.0 + >>> # projection falls outside the segment; distance to nearest endpoint + >>> round(_perpendicular_distance((0.0, 2.0), (1.0, 0.0), (3.0, 0.0)), 6) + 2.236068 + """ + px, py = point + ax, ay = line_start + bx, by = line_end + dx, dy = bx - ax, by - ay + seg_len_sq = dx * dx + dy * dy + if seg_len_sq == 0.0: + # line_start and line_end coincide; fall back to point-to-point distance + return _euclidean_distance(point, line_start) + # Project point onto the segment line, then clamp t to [0, 1] so the + # nearest point is always on the segment rather than the infinite line. + t = max(0.0, min(1.0, ((px - ax) * dx + (py - ay) * dy) / seg_len_sq)) + nearest_x = ax + t * dx + nearest_y = ay + t * dy + return math.hypot(px - nearest_x, py - nearest_y) + + +def ramer_douglas_peucker( + pts: list[tuple[float, float]], + epsilon: float, +) -> list[tuple[float, float]]: + """Simplify a polyline using the Ramer-Douglas-Peucker algorithm. + + Given a sequence of 2-D points and a maximum allowable deviation + *epsilon* (>= 0), returns a simplified list of points such that no + discarded point is farther than *epsilon* from the simplified polyline. + + Parameters + ---------- + pts: + Ordered sequence of ``(x, y)`` points describing the polyline. + epsilon: + Maximum allowable distance of any discarded point from the + simplified polyline. Must be non-negative. + + Returns + ------- + list[tuple[float, float]] + Simplified list of ``(x, y)`` points. The first and last points of + *pts* are always preserved. + + Raises + ------ + ValueError + If *epsilon* is negative. + + References + ---------- + https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm + + Examples + -------- + >>> ramer_douglas_peucker([], epsilon=1.0) + [] + >>> ramer_douglas_peucker([(0.0, 0.0)], epsilon=1.0) + [(0.0, 0.0)] + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.0)], epsilon=1.0) + [(0.0, 0.0), (1.0, 0.0)] + >>> # middle point is within epsilon - it is discarded + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.1), (2.0, 0.0)], epsilon=0.5) + [(0.0, 0.0), (2.0, 0.0)] + >>> # middle point exceeds epsilon - it is kept + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)], epsilon=0.5) + [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] + >>> ramer_douglas_peucker([(0.0, 0.0), (1.0, 0.5), (2.0, 0.0)], epsilon=-1.0) + Traceback (most recent call last): + ... + ValueError: epsilon must be non-negative, got -1.0 + """ + if epsilon < 0: + msg = f"epsilon must be non-negative, got {epsilon!r}" + raise ValueError(msg) + + if len(pts) < 3: + return list(pts) + + # --------------------------------------------------------------------------- + # Iterative, stack-based implementation. + # + # The naive recursive approach copies sublists at every level via slicing + # (pts[:max_index+1] / pts[max_index:]), which is O(n) per call and makes + # the overall algorithm O(n²) in memory even for well-balanced splits. An + # explicit stack operating on index ranges avoids all copying and also + # eliminates the risk of hitting Python's recursion limit for long polylines. + # --------------------------------------------------------------------------- + n = len(pts) + + # keep[i] is True when pts[i] must appear in the output. + keep: list[bool] = [False] * n + keep[0] = True + keep[-1] = True + + # Stack of (start_index, end_index) pairs still to be examined. + stack: list[tuple[int, int]] = [(0, n - 1)] + + while stack: + start, end = stack.pop() + if end - start < 2: + # Only one interior candidate at most; nothing to split further. + continue + + # Find the interior point with the greatest distance to the segment. + max_dist = 0.0 + max_index = start + for i in range(start + 1, end): + dist = _perpendicular_distance(pts[i], pts[start], pts[end]) + if dist > max_dist: + max_dist = dist + max_index = i + + if max_dist > epsilon: + keep[max_index] = True + stack.append((start, max_index)) + stack.append((max_index, end)) + # else: all interior points are within epsilon; discard them all. + + return [pts[i] for i in range(n) if keep[i]] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/graphs/johnson.py b/graphs/johnson.py new file mode 100644 index 000000000000..6306ab5f8654 --- /dev/null +++ b/graphs/johnson.py @@ -0,0 +1,118 @@ +import heapq +from collections.abc import Hashable + +Node = Hashable +edge = tuple[Node, Node, float] +adjacency = dict[Node, list[tuple[Node, float]]] + + +def _collect_nodes_and_edges(graph: adjacency) -> tuple[list[Node], list[edge]]: + nodes = set() + edges: list[edge] = [] + for u, neighbors in graph.items(): + nodes.add(u) + for v, w in neighbors: + nodes.add(v) + edges.append((u, v, w)) + return list(nodes), edges + + +def _bellman_ford(nodes: list[Node], edges: list[edge]) -> dict[Node, float]: + """ + Bellman-Ford relaxation to compute potentials h[v] for all vertices. + Raises ValueError if a negative weight cycle exists. + """ + dist: dict[Node, float] = dict.fromkeys(nodes, 0.0) + n = len(nodes) + + for _ in range(n - 1): + updated = False + for u, v, w in edges: + if dist[u] + w < dist[v]: + dist[v] = dist[u] + w + updated = True + if not updated: + break + else: + for u, v, w in edges: + if dist[u] + w < dist[v]: + raise ValueError("Negative weight cycle detected") + return dist + + +def _dijkstra( + start: Node, + nodes: list[Node], + graph: adjacency, + potentials: dict[Node, float], +) -> dict[Node, float]: + """ + Dijkstra over reweighted graph, using potentials h to make weights non-negative. + Returns distances from start in the reweighted space. + """ + inf = float("inf") + dist: dict[Node, float] = dict.fromkeys(nodes, inf) + dist[start] = 0.0 + heap: list[tuple[float, Node]] = [(0.0, start)] + + while heap: + d_u, u = heapq.heappop(heap) + if d_u > dist[u]: + continue + for v, w in graph.get(u, []): + w_prime = w + potentials[u] - potentials[v] + if w_prime < 0: + raise ValueError( + "Negative edge weight after reweighting: numeric error" + ) + new_dist = d_u + w_prime + if new_dist < dist[v]: + dist[v] = new_dist + heapq.heappush(heap, (new_dist, v)) + return dist + + +def johnson(graph: adjacency) -> dict[Node, dict[Node, float]]: + """ + Compute all-pairs shortest paths using Johnson's algorithm. + + Reference: + https://en.wikipedia.org/wiki/Johnson%27s_algorithm + + Args: + graph: adjacency list {u: [(v, weight), ...], ...} + + Returns: + dict of dicts: dist[u][v] = shortest distance from u to v + + Raises: + ValueError: if a negative weight cycle is detected + + Example: + >>> g = { + ... 0: [(1, 3), (2, 8), (4, -4)], + ... 1: [(3, 1), (4, 7)], + ... 2: [(1, 4)], + ... 3: [(0, 2), (2, -5)], + ... 4: [(3, 6)], + ... } + >>> round(johnson(g)[0][3], 2) + 2.0 + """ + nodes, edges = _collect_nodes_and_edges(graph) + potentials = _bellman_ford(nodes, edges) + + all_pairs: dict[Node, dict[Node, float]] = {} + inf = float("inf") + for s in nodes: + dist_reweighted = _dijkstra(s, nodes, graph, potentials) + dists_orig: dict[Node, float] = {} + for v in nodes: + d_prime = dist_reweighted[v] + if d_prime < inf: + dists_orig[v] = d_prime - potentials[s] + potentials[v] + else: + dists_orig[v] = inf + all_pairs[s] = dists_orig + + return all_pairs diff --git a/graphs/tests/test_johnson.py b/graphs/tests/test_johnson.py new file mode 100644 index 000000000000..e149aac85d0f --- /dev/null +++ b/graphs/tests/test_johnson.py @@ -0,0 +1,24 @@ +import math + +import pytest + +from graphs.johnson import johnson + + +def test_johnson_basic(): + g = { + 0: [(1, 3), (2, 8), (4, -4)], + 1: [(3, 1), (4, 7)], + 2: [(1, 4)], + 3: [(0, 2), (2, -5)], + 4: [(3, 6)], + } + dist = johnson(g) + assert math.isclose(dist[0][3], 2.0, abs_tol=1e-9) + assert math.isclose(dist[3][2], -5.0, abs_tol=1e-9) + + +def test_johnson_negative_cycle(): + g2 = {0: [(1, 1)], 1: [(0, -3)]} + with pytest.raises(ValueError): + johnson(g2) diff --git a/pyproject.toml b/pyproject.toml index 34e099a46435..0b0c3f5cfda4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,14 +169,14 @@ skip = """\ python_version = "3.14" [tool.pytest] -ini_options.markers = [ - "mat_ops: mark a test as utilizing matrix operations.", -] ini_options.addopts = [ "--durations=10", "--doctest-modules", "--showlocals", ] +ini_options.markers = [ + "mat_ops: mark a test as utilizing matrix operations.", +] [tool.coverage] report.omit = [