c++ code KMP+boolean array 26ms, Hashing + intervals 145ms, Trie + boolean array 160ms.


  • 4
    A

    Let string s.size() = m, dict.size() = k, longest word length in dict is n. The question has two parts. The first is to find occurrences of words in s and the second is how to tag given these occurrences.

    • The first solution is to use KMP algorithm. For each word in dict, we search all occurrences of word in s, and use boolean array to tag. The time complexity is O(k(m + n)). The answer accepted in around 26ms.
    class Solution {
    public:
        vector<int> preprocessKMP(string& p, string& s) {
            vector<int> ret(p.size());//ret[i] means the longest proper prefix that is also a suffix in p[0...i]
            for (int i=1; i<p.size(); i++) {
                int len = ret[i-1];
                while ((len > 0) && (p[i] != p[len])) {
                    len = ret[len - 1];
                }  
                if (p[i] == p[len]) {
                    ret[i] = len + 1;
                }//else ret[i] == 0
            }
            return ret;
        }
        
        string addBoldTag(string s, vector<string>& dict) {
            if (s.size() == 0) {
                return s;
            }
            vector<bool> bold(s.size());
            for (auto word : dict) {
                //Do the kmp search for the pattern word in s
                vector<int> kmp = preprocessKMP(word, s);
                int idx = 0;
                int last = -1;
                for (int i=0; i<s.size(); i++) {
                    if (word[idx] == s[i]) {
                        idx++;
                    } else {
                        while ((idx > 0) && (s[i] != word[idx])) {
                            idx = kmp[idx - 1];
                        }
                        if (word[idx] == s[i]) {
                            idx++;
                        }                    
                    }
                    if (idx == kmp.size()) {
                        int start = max(last + 1, i - (int)kmp.size() + 1);
                        fill(bold.begin() + start, bold.begin() + i + 1, true);
                        last = i;
                        idx = kmp[idx - 1];
                    }
                }
            }
            //Tag string s using boolean array bold
            string ret;
            bool state = true;
            for (int i=0; i<s.size(); i++) {
                if (state && bold[i]) {
                    ret += "<b>";
                    state = false;
                } else if (!state && !bold[i]) {
                    ret += "</b>";
                    state = true;
                }
                ret.push_back(s[i]);
            }
            if (!state) {
                ret += "</b>";
            }
            return ret;        
        }
    };
    
    • The second solution is to use hashing + interval merging. Simply enumerate all possible substrings (double for-loops) and to check if it is in dict by hashing, then merge it into pos arrays, which stores the intervals need to be bold. Time complexity in average is O(m^2 + kn), first term for substrings enumeration, second term for hash map building. Accepted in around 1100ms.
      However, we can improve it a little bit. When enumerating the substrings starting at s[i], we only have to enumerate the ending char from s[i] + (maxLength of word in dict) down to s[i+1]. The time complexity is now O(mn + kn). Accepted in around 145ms.
    class Solution {
    public:
        string addBoldTag(string s, vector<string>& dict) {
            if (s.size() == 0) {
                return s;
            }
            unordered_set<string> hash;
            vector<pair<int, int>> pos;
            int maxLen = 0;
            //Build hash map
            for (auto word : dict) {
                hash.insert(word);
                maxLen = max(maxLen, (int)word.size());
            }
            //Enumerate substrings and record tag intervals
            for (int i = 0; i < s.size(); i++) {
                int end = min(i + maxLen - 1, (int)s.size() - 1);
                string t = string(s.begin() + i, s.begin() + end + 1);
                int start = (pos.size() > 0) ? pos.back().second + 1 : -1;
                start = max(i, start);
                //Enumerate substrings starting at s[i], ending char is in s[start...end]
                for (int j = end; j >= start; j--) {
                    if (hash.find(t) != hash.end()) {
                        pair<int, int> p = {i, j};
                        if ((pos.size() == 0) || (i > pos.back().second + 1)) {
                            pos.push_back(p);
                        } else {
                            pos.back().second = j;
                        }
                        break;
                    }
                    t.pop_back();
                }
            }
            //Tagging intervals
            if (pos.size() == 0) {
                return s;
            }
            string ret = (pos[0].first > 0) ? string(s.begin(), s.begin() + pos[0].first) : "";
            for (int i=0; i<pos.size(); i++) {
                ret += "<b>";
                ret += string(s.begin() + pos[i].first, s.begin() + pos[i].second + 1);
                ret += "</b>";
                if (i < pos.size() - 1) {
                    ret += string(s.begin() + pos[i].second + 1, s.begin() + pos[i+1].first);
                }
            }
            if (pos.back().second < s.size() - 1) {
                ret += string(s.begin() + pos.back().second + 1, s.end());
            }
            return ret;
        }
    };
    
    • The third solution using trie + boolean array solution. Build up a trie on the words in dict. Then for each index i in s, find the longest substring that matches words in trie and mark all the chars in this substring so that we know how to add the bold tag. The time complexity is O(kn + mn). The first term is the time to build a trie, the second term is the time for searching substrings of s in the trie. The solution is accepted in around 160ms.
    struct Node {
       bool isEnd;
       unordered_map<char, Node*> children;   
       Node() : isEnd() {}
    };
    
    class Trie {
       Node* root;
    public:
       Trie() {
           root = new Node();
       }
       
       void add(string& s) {
           Node* node = root;
           int idx = 0;
           while (idx < s.size()) {
               if (node->children.find(s[idx]) == node->children.end()) {
                   Node* newNode = new Node();
                   node->children[s[idx]] = newNode;
               }
               node = node->children[s[idx]];
               idx++;
           }
           node->isEnd = true;
       }
       
       int search(int idx, string& s) {
           Node* node = root;
           int ret = -1;
           while (idx < s.size()) {
               if (node->children.find(s[idx]) == node->children.end()) {
                   return ret;
               }
               node = node->children[s[idx]];
               if (node->isEnd) {
                   ret = idx;
               }            
               idx++;
           }
           return ret;
       }
    };
    
    class Solution {
    public:
       string addBoldTag(string s, vector<string>& dict) {
           if (s.size() == 0) {
               return s;
           }
           vector<bool> bold(s.size());
           //Build a trie
           Trie trie;
           for (auto word : dict) {
               trie.add(word);
           }
           //Search in s, mark bold array
           for (int i=0; i<s.size(); i++) {
               int idx = trie.search(i, s);
               for (int j=i; j<=idx; j++) {
                   bold[j] = true;
               }
           }
           //Tag using boolean array
           bool state = true;
           string ret;
           for (int i=0; i<s.size(); i++) {
               if (state && bold[i]) {
                   ret += "<b>";
                   state = false;
               } else if (!state && !bold[i]) {
                   ret += "</b>";
                   state = true;
               }
               ret.push_back(s[i]);
           }
           if (!state) {
               ret += "</b>";
           }
           return ret;
       }
    };
    

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.