Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions strings/boyer_moore_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@

If there is no mismatch then the pattern matches with text block.

Time Complexity : O(n/m)
Time Complexity : O(n/m) average case with bad character heuristic
n=length of main string
m=length of pattern string

Note: The bad character shift requires a while loop so positions are
actually skipped. A for loop ignores loop-variable reassignment.
"""


Expand Down Expand Up @@ -78,23 +81,33 @@ def mismatch_in_text(self, current_pos: int) -> int:

def bad_character_heuristic(self) -> list[int]:
"""
Finds the positions of the pattern location.
Finds the positions of the pattern in text using the bad character
heuristic. A while loop is used so the shift actually skips
positions, achieving O(n/m) average performance instead of the
O(nm) brute-force that a for loop would produce.

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.bad_character_heuristic()
[0, 3]
>>> bms2 = BoyerMooreSearch(text="AAAAAA", pattern="AA")
>>> bms2.bad_character_heuristic()
[0, 1, 2, 3, 4]
>>> bms3 = BoyerMooreSearch(text="ABCDEF", pattern="XY")
>>> bms3.bad_character_heuristic()
[]
"""

positions = []
for i in range(self.textLen - self.patLen + 1):
i = 0
while i <= self.textLen - self.patLen:
mismatch_index = self.mismatch_in_text(i)
if mismatch_index == -1:
positions.append(i)
i += 1
else:
match_index = self.match_in_pattern(self.text[mismatch_index])
i = (
mismatch_index - match_index
) # shifting index lgtm [py/multiple-definition]
# Use max to prevent shifting backwards
i = max(i + 1, mismatch_index - match_index)
return positions


Expand Down