O(s_length*word_length) C++ solution 36 ms


  • 5
    D
    1. Assign id to each word and count repeating words. Store counts of words into baseline_counts array.
    2. Split the string into chunks of word length and thus convert the string into a vector of word id's. After that move a sliding window of size N, maintain count for each word id in this window and recalculate the distance between the current count vector and the baseline count vector. The distance is calculated as Sum{i=1..N}(abs(count[i] - baseline_count[i])) and is updated in constant time on each step.
    3. The procedure 2 is repeated (word_length-1) times by moving staring position in the string.
    
    class Solution {
    public:
        vector<int> findSubstring(string S, vector<string> &L) {
            vector<int> res;
    
            if (L.empty() || L.size()*L[0].size()>S.length())
                return res;
    
            int l = S.length();             // string length
            long long int n = L.size();     // number of words
            int wl = L[0].size();           // word length
            
            // give 1..n id's to words
            unordered_map<string, int> dict;
            dict.reserve(n);
            vector<int> dc(n+1,0);          // baseline counts (needed for repeating words)
            for (int i = 0; i < n; ++i) {
                if (dict[L[i]] == 0)
                    dict[L[i]] = i+1;
                dc[dict[L[i]]]++;
            }
            
            vector<int> cc;
            cc.reserve(l/wl);
                
            // shift start position from 0 to wl-1
            for (int sh = 0; sh < wl; ++sh) {
                cc.clear();
                // convert words to their id's
                for (int p = sh; p + wl <= l; p += wl) {
                    auto di = dict.find(S.substr(p, wl));
                    if (di != dict.end())
                        cc.push_back(di->second);   // replace known word with id
                    else
                        cc.push_back(-1);           // replace unknown word with -1
                }
                
                // Fill the sliding window with first n-1 elements
                // we keep the count of each word and calculate error as the distance
                // between the current counts vector and the baseline count vector
                vector<int> cnt(n+1,0);
                long long int err = n;
                for (int k = 0; k < n-1; ++k) {
                    int id = cc[k];
                    if (id > 0) {
                        int cp = cnt[id];
                        err -= abs(cp - dc[id]);
                        err += abs(cp + 1 -dc[id]);
                        cnt[id]++;
                    }
                }
                
                // move the window of size n maintaining the counts
                for (int j = 0; j + n <= cc.size(); ++j) {
                    // add next element
                    int id = cc[j+n-1];
                    if (id > 0) {
                        int cp = cnt[id];
                        err -= abs(cp - dc[id]);
                        err += abs(cp+1 -dc[id]);
                        cnt[id]++;
                    }
                    
                    // if counts match
                    if (err == 0)
                        res.push_back(sh + j*wl);
                    
                    // remove first element
                    id = cc[j];
                    if (id > 0) {
                        int cp = cnt[id];
                        err -= abs(cp - dc[id]);
                        err += abs(cp-1 - dc[id]);
                        cnt[id]--;
                    }
                }
            }
            return res;
        }
    };
    

  • 2
    F

    Cleaned up davenger's code for better readability

    vector<int> findSubstring(string S, vector<string> &words) {
    	vector<int> res;
    
    	if (words.empty() || words.size()*words[0].size()>S.length())
    		return res;
    
    	int lens = S.length();             // string length
    	int lenw = words.size();     // number of words
    	int wordLen = words[0].size();           // word length
    
    	// give 1..lenw id's to words, this maps each word with an unique id
    	unordered_map<string, int> dict;
    	dict.reserve(lenw);
    	vector<int> toBeFound(lenw + 1, 0);          // baseline counts (needed for repeating words)
    	for (int i = 0; i < lenw; ++i) {
    		if (dict[words[i]] == 0)
    			dict[words[i]] = i + 1;
    		toBeFound[dict[words[i]]]++;
    	}
    
    	vector<int> curIdArray;
    	curIdArray.reserve(lens / wordLen);
    
    	// try start position from 0 to wordLen-1
    	for (int startPos = 0; startPos < wordLen; startPos++) {
    		curIdArray.clear();
    		// from this specific start position, convert every substr of wordLen to their id's (if the word is valid)
    		// or -1, if the word is not in the words
    		for (int i = startPos; i + wordLen <= lens; i += wordLen) {
    			auto di = dict.find(S.substr(i, wordLen));
    			if (di != dict.end())
    				curIdArray.push_back(di->second);   // replace known word with id
    			else
    				curIdArray.push_back(-1);           // replace unknown word with -1
    		}
    
    		// Fill the sliding window with first lenw-1 elements
    		// we keep the count of each word and calculate error as the distance
    		// between the current counts vector and the baseline count vector
    		vector<int> hasFound(lenw + 1, 0);
    		int err = lenw;
    		for (int i = 0; i < lenw - 1; i++) {
    			int id = curIdArray[i];
    			if (id > 0) {
    				err -= abs(hasFound[id] - toBeFound[id]);
    				err += abs(hasFound[id] + 1 - toBeFound[id]);
    				hasFound[id]++;
    			}
    		}
    
    		// move the window of size lenw maintaining the counts
    		for (int i = 0; i + lenw <= curIdArray.size(); i++) {
    			// add next element
    			int id = curIdArray[i + lenw - 1];
    			if (id > 0) {
    				err -= abs(hasFound[id] - toBeFound[id]);
    				err += abs(hasFound[id] + 1 - toBeFound[id]);
    				hasFound[id]++;
    			}
    
    			// if counts match
    			if (err == 0)
    				res.push_back(startPos + i*wordLen);
    
    			// remove first element
    			id = curIdArray[i];
    			if (id > 0) {
    				err -= abs(hasFound[id] - toBeFound[id]);
    				err += abs(hasFound[id] - 1 - toBeFound[id]);
    				hasFound[id]--;
    			}
    		}
    	}
    	return res;
    }

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.