- 思路
- example
- haystack = 'abcabcabd', size = n
- needle = 'abcabd', size = m, 模式(基準(zhǔn))串
- output: 3
- 暴力法:
-
(haystack)
-
(needle: 失敗1)
-
(needle: 失敗2)
-
(needle: 失敗3)
-
(needle: 成功)
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
n, m = len(haystack), len(needle)
for i in range(n-m+1):
if haystack[i:i+m] == needle:
return i
return -1
- KMP
-
雙指針. i: haystack index; cur: needle (模式串)index
- 暴力法中的失敗1和失敗2嘗試可以跳過(guò)。以失敗1為例:
- abcabcabd (haystack)
-
abcabd (雖然模式串中第一個(gè)字符a就匹配失敗,但從另一個(gè)角度模式串黑體字符前面的位置與haystack是不匹配的)
-
abcab (失敗1發(fā)生時(shí)前面的子串, 成功的匹配需要needle中的字符去match haystack中的當(dāng)前字符c -- 假設(shè)指標(biāo)為
)
- 考慮模式串(因?yàn)槭∏懊娴奈恢脙蓚€(gè)串有相同部分),利用對(duì)稱性 (前綴 = 后綴 的最大長(zhǎng)度)
- 前綴: x, ..., i (以ith 結(jié)尾,x > 0) 對(duì)應(yīng)的子串
- 后綴:0, ..., y (以0th開(kāi)頭, y < i) 對(duì)應(yīng)的子串
- 上面的例子,abcab,最長(zhǎng)“公共”前后綴長(zhǎng)度 = 2 (模式串中下一個(gè)待匹配位置為2)
- 從而馬上進(jìn)入到(needle: 成功)的情況。
- 關(guān)鍵:建立模式串的next 數(shù)組(模式串的最長(zhǎng)公共前后綴長(zhǎng)度數(shù)組). 這樣當(dāng)模式串的ith位置與haystack[z]匹配失敗的時(shí)候,調(diào)用next[i-1]可得到模式串中準(zhǔn)備與haystack[z]比較的位置。(必然有next[i-1] < i) (回退)。
- abcabd 模式串
- 000120 next (回退)數(shù)組
-
next數(shù)組 暴力版,
def getNext(s): # s: 模式串
nxt = [0]*len(s)
for i in range(1, len(s)):
for k in range(i, 0, -1):
if s[:k] == s[i-k+1:i+1]:
break
nxt[i] = k
return nxt
- 優(yōu)化版next數(shù)組見(jiàn)下面代碼。
- 思想與strStr主體函數(shù)一樣,但是在getNext中是模式子串自己與自己的匹配!
- 復(fù)雜度. 時(shí)間:O(n+m), 空間: O(m)
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
def getNext(s):
nxt = [0]*len(s)
cur, i = 0, 1
while i < len(s):
if s[cur] == s[i]:
nxt[i] = cur + 1
cur += 1
i += 1
elif cur != 0:
cur = nxt[cur-1]
# i remain unchanged
else: # cur = 0
nxt[i] = 0
i += 1
# cur remain unchanged, = 0
return nxt
n, m = len(haystack), len(needle)
nxt = getNext(needle)
i, cur = 0, 0 # i: haystack中待比較位置,cur: needle中待比較位置
while i < n:
if haystack[i] == needle[cur]:
i += 1
cur += 1
elif cur != 0:
cur = nxt[cur-1]
# i remain unchanged
else: # cur = 0
i += 1
# cur remain unchanged
if cur == m:
return i - m
return -1
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
def getNext(needle):
m = len(needle)
nxt = [0 for _ in range(m)]
j, i = 0, 1
while i < m:
if needle[i] == needle[j]:
nxt[i] = j + 1
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return nxt
n, m = len(haystack), len(needle)
j = 0 # index in needle
i = 0 # index in haystack
nxt = getNext(needle)
while i < n:
if haystack[i] == needle[j]:
i += 1
j += 1
if j == m:
return i - m
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return -1
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
def compute_next(s):
n = len(s)
nxt = [0 for _ in range(n)]
i = 1
j = 0
while i < n:
if s[i] == s[j]:
nxt[i] = j + 1
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
nxt[i] = 0
i += 1
return nxt
m, n = len(haystack), len(needle)
nxt = compute_next(needle)
i, j = 0, 0
while i < m:
if haystack[i] == needle[j]:
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
if j == n:
return i - n
return -1
- 思路
- example
- 暴力法: 窮舉子串的長(zhǎng)度
- s[j] vs s[j-i] for j in range(i, n)
- 復(fù)雜度. 時(shí)間:
, 空間:
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
n = len(s)
for i in range(1, n//2+1): # i: length of substring
if n % i == 0:
flag = False
for j in range(i, n):
if s[j] != s[j-i]:
flag = True
break
if flag == False:
return True
return False
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
n = len(s)
for k in range(1, n//2+1):
if n % k != 0:
continue
i = k
while i < n:
if s[i-k:i] != s[i:i+k]:
break
i += k
if i == n:
return True
return False
- 利用KMP
- 假設(shè)s = s's', 那到s + s = s's's's', 去掉s+s的第一個(gè)和最后一個(gè)字符。
- 假設(shè)s' = ab, 那到s+s = abababab,
- 去掉s+s的第一個(gè)和最后一個(gè)字符: t = bababa, 我們可以在t中找到一個(gè)s子串。
- 下面的版本還可以接著優(yōu)化(TBA).
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
def getNext(s): # pattern string
nxt = [0]*len(s)
cur = 0 #
i = 1 #
while i < len(s):
if s[i] == s[cur]:
nxt[i] = cur + 1
i += 1
cur += 1
elif cur != 0:
cur = nxt[cur-1]
else:
nxt[i] = 0
i += 1
return nxt
#
nxt = getNext(s)
ss = s + s
ss = ss[1:len(ss)-1]
n, m = len(ss), len(s)
i, cur = 0, 0 # i: ss index; cur: s index
while i < n:
if ss[i] == s[cur]:
i += 1
cur += 1
elif cur != 0:
cur = nxt[cur-1]
else:
i += 1
if cur == m:
return True
return False
class Solution:
def repeatedSubstringPattern(self, s: str) -> bool:
def getNext(s):
n = len(s)
nxt = [0 for _ in range(n)]
j, i = 0, 1
while i < n:
if s[i] == s[j]:
nxt[i] = j + 1
i += 1
j += 1
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return nxt
ss = s + s
ss = ss[1:len(ss)-1]
n, m = len(ss), len(s)
j, i = 0, 0
nxt = getNext(s)
while i < n:
if ss[i] == s[j]:
i += 1
j += 1
if j == m:
return True
else:
if j > 0:
j = nxt[j-1]
else:
i += 1
j = 0
return False