0%

30. Substring with Concatenation of All Words

sliding window O(n * l) time O(m * l) space
这道题主要思路是把每个单词当成单个字母来处理,用sliding window找出所有符合要求的结果,即对于s[0:10)来说,假设words的每个单词长度为3,那么第一次处理s[0:3) s[3:6) s[6:9) s[9:10),第二次处理s[1:4) s[4:7) s[7:10),第三次处理s[2:5) s[5:8) s[8:10)
76. Minimum Window Substring思路接近
438. Find All Anagrams in a String解法一样

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
if not s or not words: return []
d = defaultdict(int)
for w in words:
d[w] += 1
n, m, l = len(s), len(words), len(words[0])
total, res = m * l, []
for i in range(l):
td, cnt = d.copy(), m
for j in range(i, n, l): # 应该是j + l <= n但是python里字符串切片越界也不影响
t = s[j:j + l]
td[t] -= 1
if td[t] >= 0:
cnt -= 1
if j - total >= i:
t = s[j - total:j - total + l]
td[t] += 1
if td[t] > 0:
cnt += 1
if cnt == 0:
res.append(j - total + l)
return res
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
if (words.empty() || s.empty()) return {};
int n = s.length(), m = words.size(), l = words[0].length(), total = m * l;
unordered_map<string_view, int> hm;
for (const auto &w : words) {
++hm[w];
}
string_view sv{s};
vector<int> res;
for (int i = 0; i < l; ++i) {
auto t = hm;
int cnt = m;
for (int j = i; j + l <= n; j += l) {
if (--t[sv.substr(j, l)] >= 0) { // 从0减小成非0不更新cnt
--cnt;
}
if (j - total >= i && ++t[sv.substr(j - total, l)] > 0) { // cnt只有在大于等于0以上更新才有用,从非0变成0不贡献频数所以不更新cnt
++cnt;
}
if (cnt == 0) {
res.push_back(j - total + l);
}
}
}
return res;
}
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
if (words.empty() || s.empty()) return {};
int n = s.length(), m = words.size(), l = words[0].length(), total = m * l;
unordered_map<string, int> hm;
for (const auto &w : words) {
++hm[w];
}
vector<int> res;
for (int i = 0; i < l; ++i) {
auto t = hm;
int cnt = m;
for (int j = i; j + l <= n; j += l) {
auto w = s.substr(j, l);
if (t.count(w) && t[w]-- > 0) {
--cnt;
}
if (j >= total) {
w = s.substr(j - total, l);
if (t.count(w) && ++t[w] > 0) {
++cnt;
}
}
if (cnt == 0) {
res.push_back(j - total + l);
}
}
}
return res;
}
};

一点优化:如果当前统计的单词不存在则sliding window直接跳过该单词开始新的统计

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
if (words.empty() || s.empty()) return {};
int n = s.length(), m = words.size(), len = words[0].size();
unordered_map<string, int> hm; // 先统计单词频数
for (const auto &w : words) {
++hm[w];
}
vector<int> res;
for (int i = 0; i < len; ++i) { // trick在扫描方式上,假设单词长度为3,那么第一次扫0369第二次147第三次258
unordered_map<string, queue<int>> t; // 每个queue放每个有效单词的出现位置
int cnt = 0;
for (int j = i, b = j; j + len <= n; j += len) {
auto w = s.substr(j, len);
if (hm.count(w) == 0) { // 如果当前位置单词不存在,之前统计全部清空
t.clear();
cnt = 0;
b = j + len;
} else if (t[w].size() < hm[w]) { // 如果当前位置单词存在且个数未超过上限
t[w].push(j);
++cnt;
} else { // 如果当前位置单词存在但是个数已达上限,清除当前位置单词在窗口内最早出现位置之前的所有单词
b = t[w].front() + len;
for (auto &&p : t) {
while (!p.second.empty() && p.second.front() < b) {
--cnt;
p.second.pop();
}
}
t[w].push(j);
++cnt;
}
if (cnt == m) { // 如果找到符合要求的单词组合
res.push_back(b);
--cnt;
t[s.substr(b, len)].pop();
b += len;
}
}
}
return res;
}
};