HashMap + Trie => O(nL) solution


  • 7
    M

    The basic idea is to group all conflicted words, and then resolve the conflicts using Trie. The time complexity will be O(nL) for building trie, O(nL) to resolve conflicts, O(n) to group words. So the time complexity will be O(n(2L + 1). n is the number of words, and L is the average length of each words.

    I added the comments in code, so you can directly see the code. Please correct me if I make some mistakes and welcome to make my code concise.

    public class Solution {
        
        public List<String> wordsAbbreviation(List<String> dict) {
            Map<String, List<Integer>> abbrMap = new HashMap<>();
            // 1) create result set
            List<String> res = new ArrayList<>(Collections.nCopies(dict.size(), null));
            // 2) Group all words with the same shortest abbreviation. For example:
            // "internal", "interval" => grouped by "i6l"
            // "intension", "intrusion" => grouped by "i7n"
            // "god" => grouped by "god"
            // we can notice that only words with the same length and the same start
            // and end letter could be grouped together
            for (int i = 0; i < dict.size(); i ++) {
                String word = dict.get(i);
                String st = getShortestAbbr(word);
                List<Integer> pos = abbrMap.get(st);
                if (pos == null) {
                    pos = new ArrayList<>();
                    abbrMap.put(st, pos);
                }
                pos.add(i);
            }
            // 3) Resolve conflicts in each group
            for (Map.Entry<String, List<Integer>> entry : abbrMap.entrySet()) {
                String abbr = entry.getKey();
                List<Integer> pos = entry.getValue();
                resolve(dict, res, abbr, pos);
            }
            return res;
        }
        
        /**
         * To resolve conflicts in a group, we could build a trie for all the words
         * in the group. The trie node will contain the count of words that has the
         * same prefix. Then we could use this trie to determine when we could resolve
         * a conflict by identifying that the count of words in that trie node will only
         * have one word has the prefix.
         */
        private void resolve(List<String> dict, List<String> res, String abbr, List<Integer> pos) {
            if (pos.size() == 1) {
                res.set(pos.get(0), abbr);
            } else {
                Trie trie = buildTrie(dict, pos);
                for (int p : pos) {
                    String w = dict.get(p);
                    Trie cur = trie;
                    int i = 0;
                    int n = w.length();
                    // while loop to find the trie node which only has the 1 word which has
                    // the prefix. That means in that position, only current word has that
                    // specific character.
                    while (i < n && cur.next.get(w.charAt(i)).cnt > 1) {
                        cur = cur.next.get(w.charAt(i));
                        i ++;
                    }
                    if (i >= n - 3) {
                        res.set(p, w);
                    } else {
                        String pre = w.substring(0, i+1);
                        String st = pre + (n - i - 2) + "" + w.charAt(n - 1);
                        res.set(p, st);
                    }
                }
            }
        }
        
        /**
         * Get the shortest abbreviation for a word
         */ 
        private String getShortestAbbr(String s) {
            if (s.length() <= 3) {
                return s;
            } else {
                return s.charAt(0) + "" + (s.length() - 2) + "" + s.charAt(s.length() - 1);
            }
        }
        
        /**
         * Standard way to build the trie, but we record each trie node with the information
         * of the count of words with the same prefix.
         */
        private Trie buildTrie(List<String> dict, List<Integer> pos) {
            Trie root = new Trie();
            for (int p : pos) {
                String w = dict.get(p);
                Trie cur = root;
                for (int i = 0; i < w.length(); i ++) {
                    char c = w.charAt(i);
                    if (cur.next.containsKey(c)) {
                        cur = cur.next.get(c);
                    } else {
                        Trie next = new Trie();
                        cur.next.put(c, next);
                        cur = next;
                    }
                    cur.cnt ++;
                }
            }
            return root;
        }
        
        private class Trie {
            int cnt = 0;
            Map<Character, Trie> next = new HashMap<>();
        }
    }
    

  • 2

    Same idea, but got MLE in C++ :(

    class Solution {
    private:
        struct TrieNode {
            int count;
            unordered_map<char, TrieNode*> nexts;
            TrieNode() : count(0) {}
        };
    
        void insert(TrieNode* root, string& s) {
            TrieNode* cur = root;
            for (char& c : s) {
                if (!cur->nexts.count(c-'a')) cur->nexts[c-'a'] = new TrieNode();
                cur = cur->nexts[c-'a'];
                ++cur->count;
            }
        }
    
        int query(TrieNode* root, string& s) {
            int n = s.size();
            TrieNode* cur = root;
            for (int i = 0; i < n; ++i) {
                cur = cur->nexts[s[i]-'a'];
                if (cur->count == 1) return i;
            }
            return n;
        }
    public:
        vector<string> wordsAbbreviation(vector<string>& dict) {
            int n = dict.size();
            vector<string> res(n);
            unordered_map<string, vector<int>> m;
            for (int i = 0; i < n; ++i) {
                string& s = dict[i];
                int len = s.size();
                if (len <= 3) res[i] = s;
                else {
                    string abbr = s[0] + to_string(len-2) + s[len-1];
                    m[abbr].push_back(i);
                    res[i] = abbr;
                }
            }
            
            for (auto& p : m) {
                if (p.second.size() <= 1) continue;
                TrieNode* root = new TrieNode();
                for (int i : p.second) insert(root, dict[i]);
                for (int i : p.second) {
                    int len = dict[i].size();
                    int j = query(root, dict[i]);
                    if (j >= len-3) res[i] = dict[i];
                    else res[i] = dict[i].substr(0, j+1) + to_string(len-2-j) + dict[i][len-1];
                }
            }
            return res;
        }
    };
    

  • 0
    M

    @vesion Interesting. The trie for each group consumes memory. In Java, I think GC will collect it after we finish processing a group, but for C++, should you free the memory when it becomes useless? I am not familiar with C++.

    By the way, since you are using a unorder_map in trie, why should you use "c-'a'" as the key rather than directly use "c"?


  • 1
    J

    The above C++ solution would got MLE because of 2 major points:

    1. C++ dosen't performe GC, so we need to release every trie by ourselves
    2. Use vector, not unordered_map to store "nexts", as we only handle lowcase letters

    Here is the accepted version, where I only did a few modifications to the original code:

    class Solution {
    private:
        struct TrieNode {
            int count;
            vector<TrieNode*> nexts;
            TrieNode() : count(0) {nexts.resize(26, NULL);}
        };
    
        void insert(TrieNode* root, string& s) {
            TrieNode* cur = root;
            for (char& c : s) {
                if (!(cur->nexts)[c-'a']) cur->nexts[c-'a'] = new TrieNode();
                cur = cur->nexts[c-'a'];
                ++cur->count;
            }
        }
    
        int query(TrieNode* root, string& s) {
            int n = s.size();
            TrieNode* cur = root;
            for (int i = 0; i < n; ++i) {
                cur = cur->nexts[s[i]-'a'];
                if (cur->count == 1) return i;
            }
            return n;
        }
        
        void releaseTrie(TrieNode* root){
            if(!root) return;
            for(auto &p : root->nexts) releaseTrie(p);
            delete(root);
        }
        
    public:
        vector<string> wordsAbbreviation(vector<string>& dict) {
            int n = dict.size();
            vector<string> res(n);
            unordered_map<string, vector<int>> m;
            for (int i = 0; i < n; ++i) {
                string& s = dict[i];
                int len = s.size();
                if (len <= 3) res[i] = s;
                else {
                    string abbr = s[0] + to_string(len-2) + s[len-1];
                    m[abbr].push_back(i);
                    res[i] = abbr;
                }
            }
            
            for (auto& p : m) {
                if (p.second.size() <= 1) continue;
                TrieNode* root = new TrieNode();
                for (int i : p.second) insert(root, dict[i]);
                for (int i : p.second) {
                    int len = dict[i].size();
                    int j = query(root, dict[i]);
                    if (j >= len-3) res[i] = dict[i];
                    else res[i] = dict[i].substr(0, j+1) + to_string(len-2-j) + dict[i][len-1];
                }
                releaseTrie(root);
            }
            return res;
        }
    };
    

  • 0

    @JadenPan Thank you for reminding, I'm always forgetting to do GC manually. But I think unordered_map uses less memory than a vector with fixed 26 pointers because it only stores the real characters we meet in a word, especially for the last case with only character 'a' and 'b'.


  • 0
    K

    @JadenPan
    I use the same method but also got MLE.

    public:
    class TrieNode
    {
    public:
        int cnt;
        TrieNode* next[26];
        TrieNode()
        {
            memset(next,0,sizeof(next));
            cnt=0;
        }
    };
    void insert(TrieNode* root,string& word)
    {
        //Time O(length of word)
        TrieNode* p=root;
        for(char c:word)
        {
            if(p->next[c-'a']==NULL)
                p->next[c-'a']=new TrieNode();
            p=p->next[c-'a'];
            p->cnt++;
        }
    }
    void releaseTrie(TrieNode* root)
    {
        if(root==NULL) return;
        for(auto &p:root->next)
            releaseTrie(p);
        delete root;
    }
        vector<string> wordsAbbreviation(vector<string>& dict) {
            unordered_map<string,vector<int>> mp;
            vector<string> res(dict.size());
            //Time O(n)
            for(int i=0;i<dict.size();++i)
            {
                if(dict[i].size()<=3)
                    res[i]=dict[i];
                else
                {
                    string abb=dict[i][0]+to_string(dict[i].size()-2)+dict[i].back();
                    mp[abb].push_back(i);
                }
            }
            for(auto p:mp)
            {
                if(p.second.size()==1)
                    res[p.second[0]]=p.first;
                else
                {
                    TrieNode* root=new TrieNode();
                    //O(n*L)
                    for(auto i:p.second)
                        insert(root,dict[i]);
                    //compute abbreviation
                    //O(n*L)
                    for(auto i:p.second)
                    {
                        TrieNode* node=root;
                        int pre=0;
                        for(char c:dict[i])
                        {
                            pre++;
                            if(node->next[c-'a']->cnt==1)
                                break;
                            node=node->next[c-'a'];
                        }
                        string abb=dict[i].substr(0,pre)+to_string(dict[i].size()-pre-1)+dict[i].back();
                        if(abb.size()>=dict[i].size()) abb=dict[i];
                        res[i]=abb;
                    }
                    releaseTrie(root);
                }
            }
            return res;
        }
    };

  • 1
    F

    @vesion Same here, very frustrated, almost every time I used a trie, I got MLE. A more ironical example can be found here:
    https://discuss.leetcode.com/topic/89844/20-line-c-169-ms-beats-100-why-i-think-this-problem-is-not-properly-judged


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.