KMP algorithm improves pattern searching by avoiding unnecessary re-comparisons after a mismatch using a preprocessing table called Longest Prefix Suffix (LPS).
def compute_lps(pattern):
lps = [0] * len(pattern)
length = 0 # length of previous longest prefix suffix
i = 1
while i < len(pattern):
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
length = lps[length - 1]
else:
lps[i] = 0
i += 1
return lps
def kmp_search(text, pattern):
lps = compute_lps(pattern)
i = j = 0 # index for text[], pattern[]
occurrences = []
while i < len(text):
if pattern[j] == text[i]:
i += 1
j += 1
if j == len(pattern):
occurrences.append(i - j)
j = lps[j - 1]
else:
if j != 0:
j = lps[j - 1]
else:
i += 1
return occurrences
# Usage example
text = "abxabcabcaby"
pattern = "abcaby"
print("Pattern found at indices:", kmp_search(text, pattern))
Rabin-Karp uses hashing to find any one of a set of pattern strings in a text. It calculates hash values of the pattern and substrings of the text and compares the hash values to detect potential matches.
def rabin_karp(text, pattern, prime=101):
n, m = len(text), len(pattern)
d = 256 # number of chars in input alphabet
h = pow(d, m-1) % prime
p = 0 # hash value for pattern
t = 0 # hash value for text window
result = []
for i in range(m):
p = (d * p + ord(pattern[i])) % prime
t = (d * t + ord(text[i])) % prime
for s in range(n - m + 1):
if p == t:
if text[s:s+m] == pattern:
result.append(s)
if s < n - m:
t = (d*(t - ord(text[s]) * h) + ord(text[s+m])) % prime
if t < 0:
t += prime
return result
# Usage example
text = "abxabcabcaby"
pattern = "abcaby"
print("Pattern found at indices:", rabin_karp(text, pattern))