Evolve from intuitive solution to optimal - a review of all solutions


  • 11

    There are three challenges. Remove minimum parenthesis, the result is valid and without duplicates. All BFS and DFS solutions look like O(n2^n), but the performance is different depending on how they solve the challenges.

    BFS Solutions

    1. BFS guarantees shortest path. Since the problem asks to remove minimum parenthesis, it is natural think of BFS. A straightforward approach is to remove a parenthesis from the current string until we get a valid string. It generates both duplicate and invalid strings. We can use a hash table to remove duplicates and check each string for validity. The idea is from @jeantimex.
        vector<string> removeInvalidParentheses(string s) {
            queue<string> q;
            unordered_set<string> ht;
            q.push(s);
            vector<string> res;
            while(!q.empty()) {
                string ss = q.front();
                q.pop();
                if(ht.count(ss)) continue;
                ht.insert(ss);
                if(isValid(ss)) res.push_back(ss);
                else if (res.empty()) 
                    for(int i=0;i<ss.size();i++) 
                        if(ss[i]==')'|| ss[i]=='(') q.push(ss.substr(0,i)+ss.substr(i+1));
            }
            return res;
        }
        bool isValid(string &s) {
            int count=0;
            for(auto c:s) {
                if(c=='(') count++;
                if(c==')')
                    if(count>0) count--;
                    else return false;
            }
            return !count;
        }
    
    1. We can speed up and get rid of the hash table by generating unique strings only. There are two types of duplicates. First is due to removing the same set of characters in different order. For example, "(()(()", remove 0th then 3rd or remove 3rd then 0th both generates "()()". So we can enforce an order by keeping the last removal index and remove after it only. The other is handling consecutive same chars, say, "(()". We get the same string by removing either the 0th or 1st '('. We can just remove the 0th. The idea is from @dietpepsi.
        vector<string> removeInvalidParentheses(string s) {
            queue<pair<string,int>> q;
            q.push(make_pair(s,0));
            vector<string> res;
            while(!q.empty()) {
                auto p=q.front();
                q.pop();
                string ss=p.first;
                if(isValid(ss)) res.push_back(ss);
                else if (res.empty()) 
                    for(int i=p.second;i<ss.size();i++) 
                        if((ss[i]==')'|| ss[i]=='(') && (i==p.second || ss[i]!=ss[i-1])) 
                            q.push(make_pair(ss.substr(0,i)+ss.substr(i+1),i));
            }
            return res;
        }
    
    1. The optimal BFS should not generate invalid strings either. Again, the idea is from @dietpepsi.
        vector<string> removeInvalidParentheses(string s) {
            queue<tuple<string,int,int,char>> q;
            q.push(make_tuple(s,0,0,'('));
            vector<string> res;
            while(!q.empty()) {
                auto t=q.front();
                q.pop();
                string str=get<0>(t);
                int start =get<1>(t), lastRm=get<2>(t), count = 0;
                char l = get<3>(t), r = l=='('?')':'(';
                for(int i=start; i<str.size();i++) {
                    if(str[i] == l) count++;
                    else if(str[i]==r) count--;
                    if(count>=0) continue;
                    for(int j=lastRm;j<=i;j++)
                        if(str[j]==r && (j==lastRm || str[j-1]!=r))
                            q.push(make_tuple(str.substr(0,j)+str.substr(j+1),i,j,l));
                    break;
                }
                if(count < 0) continue;
                reverse(str.begin(),str.end());
                if(l=='(') q.push(make_tuple(str,0,0,')'));
                else res.push_back(str);
            }
            return res;
        }
    

    DFS solutions

    1. A naive DFS is to generate all the 2^n substr. We use hash table to remove duplicates. and then return the longest ones. It is less efficient than BFS because DFS does not guarantee shortest path. So we cannot stop after the first valid strings as in BFS.
        vector<string> removeInvalidParentheses(string s) {
            unordered_set<string> ht;
            string cur;
            dfs(0,cur,s,ht);
            size_t ml = 0;
            for(auto& str:ht) ml = max(ml,str.size());
            vector<string> res;
            for(auto& str:ht) if(str.size()==ml) res.push_back(str);
            return res;
        }
        void dfs(int p, string& cur, string& s, unordered_set<string>& res) {
            if(p==s.size()) {
                if(isValid(cur)) res.insert(cur);
                return;
            }
            cur+=s[p];
            dfs(p+1,cur,s,res);
            cur.pop_back();
            if(s[p]=='('||s[p]==')') dfs(p+1,cur,s,res); 
        }
    
    1. The naive DFS searches from a lot of duplicate states. We can use a hash table to avoid it.
        vector<string> removeInvalidParentheses(string s) {
            unordered_map<string,int> vstd;
            string cur;
            unordered_set<string> ht;
            dfs(0,cur,s, ht, vstd);
            size_t ml = 0;
            for(auto& str:ht) ml = max(ml,str.size());
            vector<string> res;
            for(auto& str:ht) if(str.size()==ml) res.push_back(str);
            return res;
        }
        void dfs(int p, string& cur, string& s, unordered_set<string>& res, unordered_map<string,int>& vstd) {
            auto it = vstd.find(cur);
            if(it!=vstd.end() && it->second <= p) return;
            if(p==s.size()) {
                if(isValid(cur)) res.insert(cur);
                return;
            }
            cur+=s[p];
            dfs(p+1,cur,s,res,vstd);
            cur.pop_back();
            if(s[p]=='('||s[p]==')') dfs(p+1,cur,s,res,vstd);
            if(it == vstd.end()) vstd[cur] = p;
            else it->second = min(it->second,p);
        }
    
    1. The above DFS approaches are not efficient at all since they generate strings of all lengths. BFS only removes the minimum number of strings so we should be able to do the same thing in DFS. Since the problem asks to remove minimum number of parenthesis, it is natural to compute the min number first and then use it to guide the DFS. This guarantees we only generate valid strings. The other challenge is to handle duplicate strings. We still use hash table to remove duplicates. The disadvantage is that duplicates are generated so the search space is not minimal. The idea is from @yavinci,
        vector<string> removeInvalidParentheses(string s) {
            int rmL=0,rmR=0;
            for(int i=0;i<s.size();i++) {
                if(s[i]=='(') rmL++;
                if(s[i]==')') {
                    if(rmL>0) rmL--;
                    else rmR++;
                } 
            }
            unordered_set<string> res; 
            string one;
            dfs(s, 0, rmL, rmR, 0, one, res);
            return vector<string> (res.begin(),res.end());
        }
        void dfs(string &s, int i, int rmL, int rmR, int pl, string &one, unordered_set<string> &res) {
            if(i == s.size() || rmL<0 || rmR<0 ||pl<0) {
                if(rmL==0 && rmR==0 && pl==0) res.insert(one);
                return;
            }
            if(s[i]=='(') {
                dfs(s,i+1,rmL-1,rmR,pl,one,res);
                one+='(';
                dfs(s,i+1,rmL,rmR,pl+1,one,res);
            } else if(s[i]==')') {
                dfs(s,i+1,rmL,rmR-1,pl,one,res);
                one+=')';
                dfs(s,i+1,rmL,rmR,pl-1,one,res);
            } else {
                one+=s[i];
                dfs(s,i+1,rmL,rmR,pl,one,res);
            }
            one.pop_back();
        } 
    
    1. DFS of unique strings from @cqnkxy . Search space is not optimal because it expands from invalid strings.
    2. DFS of unique and valid strings from @dietpepsi This is the optimal solution and source of all the ideas.
        vector<string> removeInvalidParentheses(string s) {
            vector<string> res;
            dfs(0,0,"()",s,res);
            return res;
        }
        void dfs(int p, int lastRm, char dir[], string s, vector<string>& res) {
            for(int i=p, count=0;i<s.size();i++) {
                if(s[i]==dir[0]) count++;
                else if(s[i]==dir[1]) count--;
                if(count>=0) continue;
                for(int j=lastRm;j<=i;j++)
                    if(s[j]==dir[1] && (j==lastRm || s[j-1]!=dir[1]))
                        dfs(i,j,dir,s.substr(0,j)+s.substr(j+1),res);
                return;        
            }
            reverse(s.begin(),s.end());
            if(dir[0]=='(') dfs(0,0,")(",s,res);
            else res.push_back(s);
        }
    

  • 0
    L

    @yu6 Thank you! This is the best summary for this problem.


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.