diff --git a/commit-reach.c b/commit-reach.c index d3a9b3ed6fe561..1b2ffcc7b68a3e 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -49,6 +49,46 @@ static int queue_has_nonstale(struct prio_queue *queue) return 0; } +/* + * Two conditions must both hold for it to be safe to stop early: + * + * 1. No merge-base candidate is currently waiting in the queue to be + * recorded (pending_merges_count == 0). A candidate is a commit + * that has both PARENT1 and PARENT2 set, is not STALE, and has not + * yet had RESULT set on it. + * + * 2. At least one side has zero exclusive (non-STALE) commits left in + * the queue. An "exclusive" commit carries exactly one of PARENT1 + * or PARENT2 (and not STALE). New merge-base candidates are born + * only when an exclusive commit on one side propagates its flag + * onto a commit that already carries the other side's flag; if one + * side has run out of exclusive territory, no further such transition + * can occur. Propagation through STALE commits cannot create new + * candidates, because STALE commits already carry both PARENT1 and + * PARENT2. + * + * The callers maintain p1_exclusive_count, p2_exclusive_count, and + * pending_merges_count incrementally so this check is O(1). Each + * enqueue of a commit in a counted state contributes +1, balanced by + * either the flag transition that takes the commit out of that state or + * by its eventual dequeue, so the counts stay >= 0. The + * `(p->object.flags & flags) == flags` check on parent visits prevents + * enqueueing a second copy of a commit in the same flag state, which + * keeps the accounting straight even through diamond merges. Transitions + * out of (PARENT1|PARENT2) -- when a descendant merge-base propagates + * STALE up to a still-pending candidate -- are also counted, so that + * the candidate is not double-counted when it later dequeues with STALE + * set (and thus fails the dequeue-time decrement check). + */ +static int no_more_merge_bases_possible(int p1_exclusive_count, + int p2_exclusive_count, + int pending_merges_count) +{ + if (pending_merges_count) + return 0; + return !p1_exclusive_count || !p2_exclusive_count; +} + /* all input commits in one and twos[] must have been parsed! */ static int paint_down_to_common(struct repository *r, struct commit *one, int n, @@ -59,24 +99,75 @@ static int paint_down_to_common(struct repository *r, { struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; int i; + int found_result = 0; + int use_early_termination; + int p1_has_infinity = 0, p2_has_infinity = 0; + int p1_exclusive_count = 0, p2_exclusive_count = 0; + int pending_merges_count = 0; timestamp_t last_gen = GENERATION_NUMBER_INFINITY; struct commit_list **tail = result; if (!min_generation && !corrected_commit_dates_enabled(r)) queue.compare = compare_commits_by_commit_date; + /* + * The early termination optimization in no_more_merge_bases_possible() + * relies on the priority queue processing commits in an order where + * parents are never dequeued before their children. This is guaranteed + * by corrected commit dates (which ensure child_gen > parent_gen) for + * commits covered by the commit graph. Commits NOT in the graph get + * GENERATION_NUMBER_INFINITY and sort among themselves by commit date, + * which may violate monotonicity if timestamps are out of order. + * + * We handle this by tracking whether both sides have ever touched any + * INFINITY-generation commit; if so, we disable early termination + * because the dequeue order among INFINITY commits is not trustworthy + * and counter updates from later cross-side transitions can desync from + * the queue state. When only one side has INFINITY commits, the other + * side's territory is entirely in graph space (commit-graph writes are + * ancestry-closed) and no monotonicity violation can occur between them, + * so the optimization remains safe. + * + * Detection happens at push time -- both at the initial pushes of `one` + * and `twos[]` below, and when a parent is (re-)enqueued during the + * walk. The flag is sticky. + */ + use_early_termination = corrected_commit_dates_enabled(r); + one->object.flags |= PARENT1; if (!n) { commit_list_append(one, result); return 0; } prio_queue_put(&queue, one); + if (use_early_termination) { + p1_exclusive_count++; + if (commit_graph_generation(one) == GENERATION_NUMBER_INFINITY) + p1_has_infinity = 1; + } for (i = 0; i < n; i++) { + /* + * Skip duplicates within twos[]. A second push of an already-queued + * commit would inflate p2_exclusive_count, leaving a phantom that + * could delay the early-termination check. (Callers separately + * ensure twos[i] != one, so this is the only duplicate case we must + * handle here.) + */ + if (twos[i]->object.flags & PARENT2) + continue; twos[i]->object.flags |= PARENT2; prio_queue_put(&queue, twos[i]); + if (use_early_termination) { + p2_exclusive_count++; + if (commit_graph_generation(twos[i]) == GENERATION_NUMBER_INFINITY) + p2_has_infinity = 1; + } } + if (p1_has_infinity && p2_has_infinity) + use_early_termination = 0; + while (queue_has_nonstale(&queue)) { struct commit *commit = prio_queue_get(&queue); struct commit_list *parents; @@ -93,6 +184,29 @@ static int paint_down_to_common(struct repository *r, break; flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); + + /* + * Decrement queue counters for the dequeued commit. Only maintained + * while early termination is active, since otherwise the increments + * in the parent-visit block below are also skipped. The counts must + * stay >= 0 in graph-space: each enqueue of a commit in a counted + * state is balanced by exactly one dequeue (or by a flag transition + * that moves it out of that state while still queued). + */ + if (use_early_termination) { + if (flags == PARENT1) { + if (p1_exclusive_count-- <= 0) + BUG("p1_exclusive_count went negative"); + } else if (flags == PARENT2) { + if (p2_exclusive_count-- <= 0) + BUG("p2_exclusive_count went negative"); + } else if (flags == (PARENT1 | PARENT2) && + !(commit->object.flags & RESULT)) { + if (pending_merges_count-- <= 0) + BUG("pending_merges_count went negative"); + } + } + if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { commit->object.flags |= RESULT; @@ -100,7 +214,9 @@ static int paint_down_to_common(struct repository *r, } /* Mark parents of a found merge stale */ flags |= STALE; + found_result = 1; } + parents = commit->parents; while (parents) { struct commit *p = parents->item; @@ -123,9 +239,82 @@ static int paint_down_to_common(struct repository *r, return error(_("could not parse commit %s"), oid_to_hex(&p->object.oid)); } + /* + * Adjust counts for the flag transition on this parent. With + * corrected commit dates, parents are never dequeued before + * their children, so the `old_flags` we read here reflects + * the current state of every queue entry of p. + * + * The `(p->object.flags & flags) == flags` check above ensures + * we never enqueue a second copy of p with the same flag state, + * so the count of queued entries in each exclusive (non-STALE) + * state is exact: +1 on the transition that brings p into that + * state, -1 on the transition that takes it out, and -1 on + * dequeue if it is still in that state. In particular, the + * counts cannot drift negative. + */ + if (use_early_termination) { + unsigned old_flags = p->object.flags & + (PARENT1 | PARENT2 | STALE); + unsigned new_flags = (p->object.flags | flags) & + (PARENT1 | PARENT2 | STALE); + + /* + * Detect INFINITY exposure on this push. If both sides have + * touched any INFINITY commit, monotonicity among those + * commits is no longer guaranteed and counters can desync; + * disable the optimization before mutating them. In normal + * usage (ancestry-closed commit graphs) this is reached only + * when at least one side's tip is itself INFINITY, which is + * already detected at the initial pushes above; the check + * here is a safety net for unusual graph layouts. + */ + if (commit_graph_generation(p) == GENERATION_NUMBER_INFINITY) { + if (new_flags & PARENT1) + p1_has_infinity = 1; + if (new_flags & PARENT2) + p2_has_infinity = 1; + if (p1_has_infinity && p2_has_infinity) + use_early_termination = 0; + } + + if (use_early_termination && old_flags != new_flags) { + if (old_flags == PARENT1) { + if (p1_exclusive_count-- <= 0) + BUG("p1_exclusive_count went negative"); + } else if (old_flags == PARENT2) { + if (p2_exclusive_count-- <= 0) + BUG("p2_exclusive_count went negative"); + } else if (old_flags == (PARENT1 | PARENT2) && + !(p->object.flags & RESULT)) { + /* + * p was a pending merge-base candidate; STALE is being + * propagated to it from a descendant that has already + * been recorded. When p later dequeues with STALE + * set, the dequeue path will not match its + * (PARENT1|PARENT2)&&!RESULT check, so we must + * decrement here to keep pending_merges_count exact. + */ + if (pending_merges_count-- <= 0) + BUG("pending_merges_count went negative"); + } + if (new_flags == PARENT1) + p1_exclusive_count++; + else if (new_flags == PARENT2) + p2_exclusive_count++; + else if (new_flags == (PARENT1 | PARENT2)) + pending_merges_count++; + } + } p->object.flags |= flags; prio_queue_put(&queue, p); } + + if (found_result && use_early_termination && + no_more_merge_bases_possible(p1_exclusive_count, + p2_exclusive_count, + pending_merges_count)) + break; } clear_prio_queue(&queue); @@ -544,6 +733,15 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, if (repo_parse_commit(r, reference[i])) return ignore_missing_commits ? 0 : -1; + /* + * Short-circuit when commit is itself one of the references: a commit + * is trivially reachable from itself. This also avoids tripping the + * counter invariants in paint_down_to_common(), which assumes the + * initial pushes of `one` and `twos[]` paint disjoint commits. + */ + if (commit == reference[i]) + return 1; + generation = commit_graph_generation(reference[i]); if (generation > max_generation) max_generation = generation; diff --git a/t/perf/p6012-merge-base-deep-side-branch.sh b/t/perf/p6012-merge-base-deep-side-branch.sh new file mode 100755 index 00000000000000..66c90b0ff3fc8a --- /dev/null +++ b/t/perf/p6012-merge-base-deep-side-branch.sh @@ -0,0 +1,119 @@ +#!/bin/sh + +test_description='Performance of merge-base with a deep side branch + +Synthetic repository that triggers slow merge-base when a merge commit +introduces a side branch rooted far back in history. The pathological +case forces paint_down_to_common() to chase STALE flags through many +intermediate commits after the merge-base is already found. + +Topology (N = $NUM_COMMITS commits on main before the merge, F is +the BRANCH_POINT-th of them, S = $SIDE_COMMITS commits on the side): + +# F -- s1 -- ... -- sS (side, dates set old) +# / \ +# 1 -- 2 -- ... -- N-1 -- N -- M (main) +# | +# P (pr branch) + +merge-base(M, P) = N. Without early termination, the algorithm +walks from N back through roughly N - BRANCH_POINT commits to F +to exhaust STALE processing. +' + +. ./perf-lib.sh + +NUM_COMMITS=500000 +BRANCH_POINT=10000 +SIDE_COMMITS=5 + +test_expect_success 'setup synthetic repo' ' + git init --bare repo.git && + awk -v N=$NUM_COMMITS -v BP=$BRANCH_POINT -v S=$SIDE_COMMITS '\'' + BEGIN { + # Shared blob + print "blob" + print "mark :1" + print "data 8" + print "content" + print "" + + # Main branch commits: mark :2 is the 1st main + # commit, mark :(N+1) is the Nth. + for (i = 2; i <= N + 1; i++) { + print "commit refs/heads/main" + print "mark :" i + print "committer C " (1000000 + i) " +0000" + print "data 2" + print "x" + if (i > 2) + print "from :" (i - 1) + print "M 100644 :1 file" + print "" + } + + # Side branch forks from the BP-th main commit + # (mark :(BP+1)), with old dates. + side_start = BP + 1 + side_mark_base = N + 2 + for (j = 0; j < S; j++) { + mark = side_mark_base + j + if (j == 0) + from_mark = side_start + else + from_mark = mark - 1 + print "commit refs/heads/side" + print "mark :" mark + print "committer C " (500000 + j) " +0000" + print "data 2" + print "x" + print "from :" from_mark + print "" + } + + # Merge side into main + main_tip = N + 1 + side_tip = side_mark_base + S - 1 + merge_mark = side_tip + 1 + print "commit refs/heads/main" + print "mark :" merge_mark + print "committer C " (1000000 + N + 2) " +0000" + print "data 2" + print "x" + print "from :" main_tip + print "merge :" side_tip + print "" + + # PR branch forked from main tip (just before merge) + pr_mark = merge_mark + 1 + print "commit refs/heads/pr" + print "mark :" pr_mark + print "committer C " (1000000 + N + 3) " +0000" + print "data 2" + print "x" + print "from :" main_tip + print "" + } + '\'' expect +' + +test_expect_success 'merge-base result is correct' ' + git -C repo.git merge-base --all main pr >actual && + test_cmp expect actual +' + +test_perf 'merge-base: commit introducing old side branch' ' + git -C repo.git merge-base --all main pr +' + +test_perf 'merge-base: no side branch (baseline)' ' + git -C repo.git merge-base --all main~1 pr +' + +test_done diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index dc0421ed2f3726..af4d3198608521 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -49,6 +49,65 @@ test_expect_success 'setup' ' git tag -a -m "$x-$i" tag-$x-$i commit-$x-$i || return 1 done done && + + # Build a small side topology to exercise the (PARENT1|PARENT2) -> + # (PARENT1|PARENT2|STALE) transition in paint_down_to_common(); the + # 10x10 grid above does not exercise it because no merge-base candidate + # there is a descendant of another, so STALE never reaches a + # still-pending candidate. + # + # ps-X + # /|\ + # / | \ + # ps-Z ps-B ps-W + # | / \ | + # | / \ | + # |/ \| + # ps-T1 ps-T2 + # + # where ps-T1=merge(ps-Z,ps-B), ps-T2=merge(ps-W,ps-B), so + # merge-base(ps-T1,ps-T2) = ps-B. During the walk, ps-X transitions + # to (PARENT1|PARENT2) via ps-Z and ps-W before ps-B is dequeued; + # then the STALE-walk from ps-B transitions ps-X to + # (PARENT1|PARENT2|STALE). + git checkout --orphan ps-orphan && + test_commit ps-X && + git checkout -b ps-B-br ps-X && test_commit ps-B && + git checkout -b ps-Z-br ps-X && test_commit ps-Z && + git checkout -b ps-W-br ps-X && test_commit ps-W && + git checkout -b ps-T1 ps-Z && + git merge --no-ff -m ps-T1 ps-B && + git checkout -b ps-T2 ps-W && + git merge --no-ff -m ps-T2 ps-B && + + # Build a side topology that lives entirely outside the half + # commit-graph and has non-monotonic commit dates, to exercise the + # push-time INFINITY-gate in paint_down_to_common. With both tips + # outside the graph, their generation is INFINITY and the priority + # queue falls back to commit-date order, which here is non-monotonic + # between parent and child. If the optimization were not disabled, + # a cross-side flag transition on an already-dequeued INFINITY parent + # would underflow the exclusive counts. + # + # pi-X (date 500, PARENT1 tip) --> pi-P, pi-D + # pi-D (date 480) --> pi-C + # pi-C (date 200) --> pi-B + # pi-B (date 100, PARENT2 tip) --> pi-P + # pi-P (date 450, root) + # + # merge-base(pi-X, pi-B) = pi-B (it is an ancestor of pi-X and is + # itself one of the queried tips). + git checkout --orphan pi-orphan && + test_commit --date "@450 +0000" pi-P && + test_commit --date "@100 +0000" pi-B && + test_commit --date "@200 +0000" pi-C && + test_commit --date "@480 +0000" pi-D && + GIT_AUTHOR_DATE="@500 +0000" GIT_COMMITTER_DATE="@500 +0000" \ + git commit-tree -p pi-D -p pi-P -m pi-X pi-D^{tree} >pi-X-oid && + pi_x="$(cat pi-X-oid)" && + git branch -f pi-X-br "$pi_x" && + git tag pi-X "$pi_x" && + git commit-graph write --reachable && mv .git/objects/info/commit-graph commit-graph-full && chmod u+w commit-graph-full && @@ -146,6 +205,16 @@ test_expect_success 'in_merge_bases_many:miss-heuristic' ' test_all_modes in_merge_bases_many ' +test_expect_success 'in_merge_bases_many:self' ' + cat >input <<-\EOF && + A:commit-6-8 + X:commit-5-9 + X:commit-6-8 + EOF + echo "in_merge_bases_many(A,X):1" >expect && + test_all_modes in_merge_bases_many +' + test_expect_success 'is_descendant_of:hit' ' cat >input <<-\EOF && A:commit-5-7 @@ -183,6 +252,51 @@ test_expect_success 'get_merge_bases_many' ' test_all_modes get_merge_bases_many ' +test_expect_success 'get_merge_bases_many:duplicate-twos' ' + cat >input <<-\EOF && + A:commit-5-7 + X:commit-4-8 + X:commit-4-8 + X:commit-6-6 + X:commit-6-6 + X:commit-8-3 + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse commit-5-6 \ + commit-4-7 | sort + } >expect && + test_all_modes get_merge_bases_many +' + +test_expect_success 'get_merge_bases_many:pending-stale' ' + # Exercises the (PARENT1|PARENT2) -> (...|STALE) transition path in + # paint_down_to_common(). See the topology comment in the setup test. + cat >input <<-\EOF && + A:ps-T1 + X:ps-T2 + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse ps-B + } >expect && + test_all_modes get_merge_bases_many +' + +test_expect_success 'get_merge_bases_many:infinity-both-sides' ' + # Exercises the push-time INFINITY-gate in paint_down_to_common(). See + # the pi-* topology comment in the setup test. + cat >input <<-\EOF && + A:pi-X + X:pi-B + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse pi-B + } >expect && + test_all_modes get_merge_bases_many +' + test_expect_success 'reduce_heads' ' cat >input <<-\EOF && X:commit-1-10