30. Substring with Concatenation of All Words

sliding window O(n * l) time O(m * l) space
这道题主要思路是把每个单词当成单个字母来处理，用sliding window找出所有符合要求的结果，即对于s[0:10)来说，假设words的每个单词长度为3，那么第一次处理s[0:3) s[3:6) s[6:9) s[9:10)，第二次处理s[1:4) s[4:7) s[7:10)，第三次处理s[2:5) s[5:8) s[8:10)
跟76. Minimum Window Substring思路接近
跟438. Find All Anagrams in a String解法一样

class Solution:
    def findSubstring(self, s: str, words: List[str]) -> List[int]:
        if not s or not words: return []
        d = defaultdict(int)
        for w in words:
            d[w] += 1
        n, m, l = len(s), len(words), len(words[0])
        total, res = m * l, []
        for i in range(l):
            td, cnt = d.copy(), m
            for j in range(i, n, l): # 应该是j + l <= n但是python里字符串切片越界也不影响
                t = s[j:j + l]
                td[t] -= 1
                if td[t] >= 0:
                    cnt -= 1
                if j - total >= i:
                    t = s[j - total:j - total + l]
                    td[t] += 1
                    if td[t] > 0:
                        cnt += 1
                if cnt == 0:
                    res.append(j - total + l)
        return res

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        if (words.empty() || s.empty()) return {};
        int n = s.length(), m = words.size(), l = words[0].length(), total = m * l;
        unordered_map<string_view, int> hm;
        for (const auto &w : words) {
            ++hm[w];
        }
        string_view sv{s};
        vector<int> res;
        for (int i = 0; i < l; ++i) {
            auto t = hm;
            int cnt = m;
            for (int j = i; j + l <= n; j += l) {
                if (--t[sv.substr(j, l)] >= 0) { // 从0减小成非0不更新cnt
                    --cnt;
                }
                if (j - total >= i && ++t[sv.substr(j - total, l)] > 0) { // cnt只有在大于等于0以上更新才有用，从非0变成0不贡献频数所以不更新cnt
                    ++cnt;
                }
                if (cnt == 0) {
                    res.push_back(j - total + l);
                }
            }
        }
        return res;
    }
};

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        if (words.empty() || s.empty()) return {};
        int n = s.length(), m = words.size(), l = words[0].length(), total = m * l;
        unordered_map<string, int> hm;
        for (const auto &w : words) {
            ++hm[w];
        }
        vector<int> res;
        for (int i = 0; i < l; ++i) {
            auto t = hm;
            int cnt = m;
            for (int j = i; j + l <= n; j += l) {
                auto w = s.substr(j, l);
                if (t.count(w) && t[w]-- > 0) {
                    --cnt;
                }
                if (j >= total) {
                    w = s.substr(j - total, l);
                    if (t.count(w) && ++t[w] > 0) {
                        ++cnt;
                    }
                }
                if (cnt == 0) {
                    res.push_back(j - total + l);
                }
            }
        }
        return res;
    }
};

一点优化：如果当前统计的单词不存在则sliding window直接跳过该单词开始新的统计

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        if (words.empty() || s.empty()) return {};
        int n = s.length(), m = words.size(), len = words[0].size();
        unordered_map<string, int> hm; // 先统计单词频数
        for (const auto &w : words) {
            ++hm[w];
        }
        vector<int> res;
        for (int i = 0; i < len; ++i) { // trick在扫描方式上，假设单词长度为3，那么第一次扫0369第二次147第三次258
            unordered_map<string, queue<int>> t; // 每个queue放每个有效单词的出现位置
            int cnt = 0;
            for (int j = i, b = j; j + len <= n; j += len) {
                auto w = s.substr(j, len);
                if (hm.count(w) == 0) { // 如果当前位置单词不存在，之前统计全部清空
                    t.clear();
                    cnt = 0;
                    b = j + len;
                } else if (t[w].size() < hm[w]) { // 如果当前位置单词存在且个数未超过上限
                    t[w].push(j);
                    ++cnt;
                } else { // 如果当前位置单词存在但是个数已达上限，清除当前位置单词在窗口内最早出现位置之前的所有单词
                    b = t[w].front() + len;
                    for (auto &&p : t) {
                        while (!p.second.empty() && p.second.front() < b) {
                            --cnt;
                            p.second.pop();
                        }
                    }
                    t[w].push(j);
                    ++cnt;
                }
                if (cnt == m) { // 如果找到符合要求的单词组合
                    res.push_back(b);
                    --cnt;
                    t[s.substr(b, len)].pop();
                    b += len;
                }
            }
        }
        return res;
    }
};