From 4811c143e441afbd303b2a0682c31f1babb36a52 Mon Sep 17 00:00:00 2001 From: Divyanshu-hash Date: Tue, 23 Jun 2026 01:05:37 +0530 Subject: [PATCH] Fix Boyer-Moore bug bad character shift logic --- strings/boyer_moore_search.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/strings/boyer_moore_search.py b/strings/boyer_moore_search.py index ad14a504f792..3783c4bfe35c 100644 --- a/strings/boyer_moore_search.py +++ b/strings/boyer_moore_search.py @@ -13,9 +13,12 @@ If there is no mismatch then the pattern matches with text block. -Time Complexity : O(n/m) +Time Complexity : O(n/m) average case with bad character heuristic n=length of main string m=length of pattern string + +Note: The bad character shift requires a while loop so positions are + actually skipped. A for loop ignores loop-variable reassignment. """ @@ -78,23 +81,33 @@ def mismatch_in_text(self, current_pos: int) -> int: def bad_character_heuristic(self) -> list[int]: """ - Finds the positions of the pattern location. + Finds the positions of the pattern in text using the bad character + heuristic. A while loop is used so the shift actually skips + positions, achieving O(n/m) average performance instead of the + O(nm) brute-force that a for loop would produce. >>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB") >>> bms.bad_character_heuristic() [0, 3] + >>> bms2 = BoyerMooreSearch(text="AAAAAA", pattern="AA") + >>> bms2.bad_character_heuristic() + [0, 1, 2, 3, 4] + >>> bms3 = BoyerMooreSearch(text="ABCDEF", pattern="XY") + >>> bms3.bad_character_heuristic() + [] """ positions = [] - for i in range(self.textLen - self.patLen + 1): + i = 0 + while i <= self.textLen - self.patLen: mismatch_index = self.mismatch_in_text(i) if mismatch_index == -1: positions.append(i) + i += 1 else: match_index = self.match_in_pattern(self.text[mismatch_index]) - i = ( - mismatch_index - match_index - ) # shifting index lgtm [py/multiple-definition] + # Use max to prevent shifting backwards + i = max(i + 1, mismatch_index - match_index) return positions