程式師世界 >> 編程語言 >> C語言 >> C++ >> C++入門知識 >> LeetCode:Substring with Concatenation of All Words （summarize）

LeetCode:Substring with Concatenation of All Words （summarize）

編輯：C++入門知識

You are given a string, S, and a list of words, L, that are all of the same length. Find all starting indices of substring(s) in S that is a concatenation of each word in L exactly once and without any intervening characters. For example, given: S: "barfoothefoobarman" L: ["foo", "bar"] You should return the indices: [0,9]. (order does not matter). 算法1 暴力解法，從字符串s的每個位置都判斷一次（如果從當前位置開始的子串長度小於L中所有單詞長度，不用判斷），從當前位置開始的子串的前段部分能不能由集合L裡面的單詞拼接而成。從某一個位置 i 判斷時，依次判斷單詞s[i,i+2], s[i+3,i+5], s[i+6, i+8]…是否在集合中，如果單詞在集合中，就從集合中刪除該單詞。我們用一個hash map來保存單詞，這樣可以在O(1)時間內判斷單詞是否在集合中算法的時間復雜度是O（n*(l*k)）n是字符串的長度，l是單詞的個數，k是單詞的長度遞歸代碼如下： class Solution { private: int wordLen; public: vector<int> findSubstring(string S, vector<string> &L) { unordered_map<string, int>wordTimes; for(int i = 0; i < L.size(); i++) if(wordTimes.count(L[i]) == 0) wordTimes.insert(make_pair(L[i], 1)); else wordTimes[L[i]]++; wordLen = L[0].size(); vector<int> res; for(int i = 0; i <= (int)(S.size()-L.size()*wordLen); i++) if(helper(S, i, wordTimes, L.size())) res.push_back(i); return res; } //判斷子串s[index...]的前段是否能由L中的單詞組合而成 bool helper(string &s, const int index, unordered_map<string, int>&wordTimes, const int wordNum) { if(wordNum == 0)return true; string firstWord = s.substr(index, wordLen); unordered_map<string, int>::iterator ite = wordTimes.find(firstWord); if(ite != wordTimes.end() && ite->second > 0) { (ite->second)--; bool res = helper(s, index+wordLen, wordTimes, wordNum-1); (ite->second)++;//恢復hash map的狀態 return res; } else return false; } }; 非遞歸代碼如下： class Solution { private: int wordLen; public: vector<int> findSubstring(string S, vector<string> &L) { unordered_map<string, int>wordTimes; for(int i = 0; i < L.size(); i++) if(wordTimes.count(L[i]) == 0) wordTimes.insert(make_pair(L[i], 1)); else wordTimes[L[i]]++; wordLen = L[0].size(); vector<int> res; for(int i = 0; i <= (int)(S.size()-L.size()*wordLen); i++) if(helper(S, i, wordTimes, L.size())) res.push_back(i); return res; } //判斷子串s[index...]的前段是否能由L中的單詞組合而成 bool helper(const string &s, int index, unordered_map<string, int>wordTimes, int wordNum) { for(int i = index; wordNum != 0 && i <= (int)s.size()-wordLen; i+=wordLen) { string word = s.substr(i, wordLen); unordered_map<string, int>::iterator ite = wordTimes.find(word); if(ite != wordTimes.end() && ite->second > 0) {ite->second--; wordNum--;} else return false; } if(wordNum == 0)return true; else return false; } }; OJ遞歸的時間小於非遞歸時間，因為非遞歸的helper函數中，hash map參數是傳值的方式，每次調用都要拷貝一次hash map，遞歸代碼中一直只存在一個hash map對象算法2 回想前面的題目：LeetCode:Longest Substring Without Repeating Characters 和 LeetCode:Minimum Window Substring ，都用了一種滑動窗口的方法。這一題也可以利用相同的思想。比如s = “a1b2c3a1d4”L={“a1”,“b2”,“c3”，“d4”} 窗口最開始為空， a1在L中，加入窗口【a1】b2c3a1d4 本文地址 b2在L中，加入窗口【a1b2】c3a1d4 c3在L中，加入窗口【a1b2c3】a1d4 a1在L中了，但是前面a1已經算了一次，此時只需要把窗口向右移動一個單詞a1【b2c3a1】d4 d4在L中，加入窗口a1【b2c3a1d4】找到了一個匹配如果把s改為“a1b2c3kka1d4”，那麼在第四步中會碰到單詞kk，kk不在L中，此時窗口起始位置移動到kk後面a1b2c3kk【a1d4 class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { unordered_map<string, int>wordTimes;//L中單詞出現的次數 for(int i = 0; i < L.size(); i++) if(wordTimes.count(L[i]) == 0) wordTimes.insert(make_pair(L[i], 1)); else wordTimes[L[i]]++; int wordLen = L[0].size(); vector<int> res; for(int i = 0; i < wordLen; i++) {//為了不遺漏從s的每一個位置開始的子串，第一層循環為單詞的長度 unordered_map<string, int>wordTimes2;//當前窗口中單詞出現的次數 int winStart = i, cnt = 0;//winStart為窗口起始位置,cnt為當前窗口中的單詞數目 for(int winEnd = i; winEnd <= (int)S.size()-wordLen; winEnd+=wordLen) {//窗口為[winStart,winEnd) string word = S.substr(winEnd, wordLen); if(wordTimes.find(word) != wordTimes.end()) { if(wordTimes2.find(word) == wordTimes2.end()) wordTimes2[word] = 1; else wordTimes2[word]++; if(wordTimes2[word] <= wordTimes[word]) cnt++; else {//當前的單詞在L中，但是它已經在窗口中出現了相應的次數，不應該加入窗口 //此時，應該把窗口起始位置想左移動到，該單詞第一次出現的位置的下一個單詞位置 for(int k = winStart; ; k += wordLen) { string tmpstr = S.substr(k, wordLen); wordTimes2[tmpstr]--; if(tmpstr == word) { winStart = k + wordLen; break; } cnt--; } } if(cnt == L.size()) res.push_back(winStart); } else {//發現不在L中的單詞 winStart = winEnd + wordLen; wordTimes2.clear(); cnt = 0; } } } return res; } }; 算法時間復雜度為O（n*k)）n是字符串的長度，k是單詞的長度