Better solution than brute-force?

• Not sure if there exists better solutions than a brute-force one. Here is my Python code.

``````from collections import defaultdict
import unittest

def subconcat (S, L):
"""Substring with Concatenation of All Words.
http://oj.leetcode.com/problems/substring-with-concatenation-of-all-words/
"""
ret = []
len0 = len(L[0])
lenL = len(L)
cntL = defaultdict(int)
for x in L:
cntL[x] += 1
for i in xrange(len(S) - lenL * len0 + 1):
cntLtmp = cntL.copy()
for j in xrange(lenL):
s = S[i+j*len0: i+(j+1)*len0]
if s not in cntLtmp or cntLtmp[s] == 0:
break
cntLtmp[s] -= 1
else:  # i.e., no break
ret.append(i)
return ret

class Test (unittest.TestCase):
"""Accepted."""
def test (self):
tcases = {
('barfoothefoobarman', '["foo", "bar"]'): [0, 9],  # barfoo, foobar
('axy01bb01xy', '["01", "xy"]'): [1, 7],  # xy01,01xy
('axy01bb01xy', '["0", "1", "x", "y"]'): [1, 7],  # xy01,01xy
('axy01bb01xy', '["0"]'): [3, 7],
}
for inp, out in tcases.iteritems():
self.assertListEqual(subconcat(inp[0], eval(inp[1])), out)

if __name__ == '__main__':
unittest.main()``````

• This post is deleted!

• Hi chentao169, as old discuss will close later, could you please share your own solution or some solution in old discuss which you think is nice, rather then a link? Thanks!

• Note the above Brute force solution takes O(n* l *w) time and O( l * w) auxiliary space.
Here note 'n' is number of characters in the string ,'l' is the number of words in the list. and 'w' is the words length
A better solution is done by taking advantage of the fact that the continuous words will be just the same for two valid sub-strings that are distant just by w characters

The brute force approach stops at each character and checks for all the words are in the list each time constructing the dictionary again and again O(n) times.

The words are all of the same width, Hence the dictionary will be the same for overlapping valid substrings (Ex: "foobarisbarfoobar" ["foo","bar"])
in which the "barfoobar" has overlapping valid substrings "barfoo" and "foobar" and both have same dictionary.Thus we need not construct the dictionary
again, instead reuse the dictionary with some proper cases.

Secondly instead of traversing the string character by character we can traverse it in the form of words by words( w increments) fot the above purpose.
then do the same for the a character shifted to find out the valid strings there.

Ex: the same above one will be traversed as "foo" "bar" "isb" "arf" "oob" "ar" on the first iteration and then "oob" "ari" "sba" "rfo" "oba" "r"
and next "oba" "ris" "bar" "foo" "bar" next.

This will reduce the need for construction of Dictionaries O(n) times instead we do it O(w) times.
and we will be making a total of O(n) iterations with O(w) string comparision operations each time,
hence a time complexity of O(nw) and an auxiliaty space of O(lw)

The algorithm works like this

``````    a) initially store the number of occurences of each word in Hash table "cn".
b) For i = 0 to w
start = i;
For w : all words at intervals i, i+w, i+2w, .. n
i) check if word w is  valid, if not set start to next word as the substring uptil now will be invalid.

ii) if word is valid, check the number of occurences of this word in the substring  is less than availbale words
increment  count of the word in the hash table cntL

iii) if the word is valid , but already occured maximum number of times, then consider the substring starting after the first occurence of this word.set start.

if (all the dictionary words are used then store the start as the valid substring start.
``````

Code:

``````class Solution {
private:
vector<int> res;
map<string,int> cntL;
map<string,int> cn;
int n ;
public:
vector<int> findSubstring(string S, vector<string> &L)
{   res.clear();
cntL.clear();
cn.clear();

n = S.length();
int e = L.size();
int t = L[0].length();
int k = 0;

for(int i = 0; i < e ; i++)
{   if(cn.count(L[i]) == 0)
{ cn[L[i]] = 1;
k++;
}
else
{ cn[L[i]] += 1;
k++;
}
}

string tr ,du;
int r = 0;
int st = 0;

for(int j = 0 ; j < t ; j++)
{ r = 0; st = j;
for(int i = j; i < n; i += t)
{     tr = S.substr(i,t);
if( cn.count(tr) == 0 || cn[tr] == 0 )
{ cntL.clear();
r =  0;
st = i+t;
}
else if(cntL[tr] < cn[tr])
{ cntL[tr] += 1;
r++;
}
else
{  du = S.substr(st,t);
while(du != tr)
{   cntL[du]--;
r--;
st += t;
du = S.substr(st,t);
}
st += t;
}
if(r == k)
{   res.push_back(st);
du = S.substr(st,t);
cntL[du]--;
r--;
st += t;
}

}
cntL.clear();
}
sort(res.begin(),res.end());
return res ;
}
};
``````

• Could you please format your code? Since the last `};` is out of your code box.

• Another brute-force idea, need to iterate O(N) times and each time compare O(M) times, M = L.length * L[0].length(). Use Tier tree.

``````public class Solution {

public static class Tier {
private static int hashcodeCount = 0;
private int hashcode;
public final char letter;
public final Map<Character, Tier> next = new HashMap<Character, Tier>();
public int count = 0;
public Tier (char l) {
letter = l;
++hashcodeCount;
hashcode = hashcodeCount;
}

public int hashCode() {
return hashcode;
}

public boolean equals (Object obj) {
if (obj == null || !(obj instanceof Tier)) {
return false;
}
Tier that = (Tier) obj;
return this.next == that.next && this.count == that.count && count == that.count;
}
}

public ArrayList<Integer> findSubstring(String S, String[] L) {
if (L.length == 0 || L[0].isEmpty()) {
return new ArrayList<Integer>();
}

Map<Character, Tier> roots = new HashMap<Character, Tier>();
for (String word : L) {
Map<Character, Tier> curr = roots;
Tier node = null;
for (int i = 0; i < word.length(); ++i) {
char ch = word.charAt(i);
node = curr.get(ch);
if (node == null) {
node = new Tier(ch);
curr.put(ch, node);
}
curr = node.next;
}
++node.count;
}
ArrayList<Integer> ret = new ArrayList<Integer>();
int len = L.length * L[0].length();
Map<Tier, Integer> count = new HashMap<Tier, Integer>();
for (int i = 0; i < S.length(); ++i) {
Map<Character, Tier> curr = roots;
int hitCount = 0;
count.clear();
for (int j = i; i + len <= S.length() && j < S.length() && j - i < len; ++j) {
char ch = S.charAt(j);
Tier node = curr.get(ch);
if (node == null) {
hitCount = -1;
break;
} else if (node.count == 0) {
curr = node.next;
} else {
Integer times = count.get(node);
times = times == null ? 1 : times + 1;
count.put(node, times);
if (times <= node.count) {
++hitCount;
} else {
hitCount = -1;
break;
}
curr = roots;
}
}

if (hitCount == L.length) {
}
}

return ret;
}
}``````

• The best known answer is O(nw) time complexity and O(Lw) extra memory, here n is the S length, w is the word length, and L is the total word number. But to real implement it without retrieve need hashtable. Also, given the condition that words could have multiple copies, an array and an Index are needed to point at the earliest appearance of the location record.

``````vector<int> findSubstring(string S, vector<string> &L) {
vector<int> res;
if (L.size()==0) return res;
if (S.size() < L.size()*L[0].size()) return res;

int wl = L[0].size();
unordered_map<string, vector<int> > hashLocation;
unordered_map<string, vector<int> >::iterator hashIt;
unordered_map<string, int> hashIndex;
vector<int> empty;
for (int i=0; i<wl; ++i) {
hashLocation.clear();
for (int k=0; k<L.size(); ++k) {
if (hashLocation.find(L[k]) == hashLocation.end()) {
hashLocation[L[k]] = empty;
}
hashLocation[L[k]].push_back(-1);
hashIndex[L[k]] = 0;
}
while (curr<S.size()) {
string a = S.substr(curr, wl);
hashIt = hashLocation.find(a);
if (hashIt==hashLocation.end()) {
hitCnt = 0;
continue;
}
int earliestCur = hashIt->second[hashIndex[a]];
hitCnt++;
if (hitCnt == L.size()) {
hitCnt--;
}
} else {
}
hashIt->second[hashIndex[a]] = curr;
hashIndex[a] = (hashIndex[a]+1)%hashIt->second.size();
curr += wl;
}
}
return res;
}
``````

• You solution is brilliant, and i have only one suggestion: give the variables longer and more meaningful names to make the solution more readable.

• This post is deleted!

• what is the time complexity of s.substr(), I think it is not O(1)........

• Best explanation so far of the O(nw) algo. Thanks dude!

• Python implementation:

``````   class Solution:

# @param S, a string
# @param L, a list of string
# @return a list of integer
def findSubstring(self, S, L):
step = len(L[0])
n = len(L)
counters = {}
results = []
for x in L:
counters[x] = counters.get(x, 0) + 1
for i in xrange(step):
_counters = {}
_n = 0
left = i
for j in xrange(i, len(S) - step + 1, step):
word = S[j:j+step]
if word in counters:
# is a valid word, update counter
_counters[word] = _counters.get(word, 0) + 1
_n += 1
# saw repeated word,
# move start pointer to the first appearance of the repeating word,
# and clear counters for words before and include that word
while _counters[word] > counters[word]:
tmp = S[left:left+step]
_counters[tmp] -= 1
_n -= 1
left += step
# reached enough object, move right one step
if _n == n:
results.append(left)
tmp = S[left:left+step]
_counters[tmp] -= 1
_n -= 1
left += step
else:
_n = 0
_counters = {}
left = j + step
return results
``````

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.