[Java/C++] Union Find


  • 22
    1. The key task here is to connect those emails, and this is a perfect use case for union find.
    2. to group these emails, each group need to have a representative, or parent.
    3. At the beginning, set each email as its own representative.
    4. Emails in each account naturally belong to a same group, and should be joined by assigning to the same parent (let's use the parent of first email in that list);

    Simple Example:

    a b c // now b, c have parent a
    d e f // now e, f have parent d
    g a d // now abc, def all merged to group g
    
    parents populated after parsing 1st account: a b c
    a->a
    b->a
    c->a
    
    parents populated after parsing 2nd account: d e f
    d->d
    e->d
    f->d
    
    parents populated after parsing 3rd account: g a d
    g->g
    a->g
    d->g
    
    

    Java

    class Solution {
        public List<List<String>> accountsMerge(List<List<String>> acts) {
            Map<String, String> owner = new HashMap<>();
            Map<String, String> parents = new HashMap<>();
            Map<String, TreeSet<String>> unions = new HashMap<>();
            for (List<String> a : acts) {
                for (int i = 1; i < a.size(); i++) {
                    parents.put(a.get(i), a.get(i));
                    owner.put(a.get(i), a.get(0));
                }
            }
            for (List<String> a : acts) {
                String p = find(a.get(1), parents);
                for (int i = 2; i < a.size(); i++)
                    parents.put(find(a.get(i), parents), p);
            }
            for(List<String> a : acts) {
                String p = find(a.get(1), parents);
                if (!unions.containsKey(p)) unions.put(p, new TreeSet<>());
                for (int i = 1; i < a.size(); i++)
                    unions.get(p).add(a.get(i));
            }
            List<List<String>> res = new ArrayList<>();
            for (String p : unions.keySet()) {
                List<String> emails = new ArrayList(unions.get(p));
                emails.add(0, owner.get(p));
                res.add(emails);
            }
            return res;
        }
        private String find(String s, Map<String, String> p) {
            return p.get(s) == s ? s : find(p.get(s), p);
        }
    }
    

    C++

    class Solution {
    public:
        vector<vector<string>> accountsMerge(vector<vector<string>>& acts) {
            map<string, string> owner;
            map<string, string> parents;
            map<string, set<string>> unions;
            for (int i = 0; i < acts.size(); i++) {
                for (int j = 1; j < acts[i].size(); j++) {
                    parents[acts[i][j]] = acts[i][j];
                    owner[acts[i][j]] = acts[i][0];
                }
            }
            for (int i = 0; i < acts.size(); i++) {
                string p = find(acts[i][1], parents);
                for (int j = 2; j < acts[i].size(); j++)
                    parents[find(acts[i][j], parents)] = p;
            }
            for (int i = 0; i < acts.size(); i++)
                for (int j = 1; j < acts[i].size(); j++)
                    unions[find(acts[i][j], parents)].insert(acts[i][j]);
    
            vector<vector<string>> res;
            for (pair<string, set<string>> p : unions) {
                vector<string> emails(p.second.begin(), p.second.end());
                emails.insert(emails.begin(), owner[p.first]);
                res.push_back(emails);
            }
            return res;
        }
    private:
        string find(string s, map<string, string>& p) {
            return p[s] == s ? s : find(p[s], p);
        }
    };
    

    C++ Lambda

    class Solution {
    public:
        vector<vector<string>> accountsMerge(vector<vector<string>>& acts) {
            map<string, string> owner;
            map<string, string> parents;
            function<string(string)> find = [&](string s) {return parents[s] == s ? s : find(parents[s]); };
            for (int i = 0; i < acts.size(); i++) {
                for (int j = 1; j < acts[i].size(); j++) {
                    parents[acts[i][j]] = acts[i][j];
                    owner[acts[i][j]] = acts[i][0];
                }
            }
            for (int i = 0; i < acts.size(); i++) {
                string p = find(acts[i][1]);
                for (int j = 2; j < acts[i].size(); j++) {
                    parents[find(acts[i][j])] = p;
                }
            }
            map<string, set<string>> unions;
            for (int i = 0; i < acts.size(); i++) {
                for (int j = 1; j < acts[i].size(); j++) {
                    unions[find(acts[i][j])].insert(acts[i][j]);
                }
            }
            vector<vector<string>> merged;
            for (pair<string, set<string>> p : unions) {
                vector<string> emails(p.second.begin(), p.second.end());
                emails.insert(emails.begin(), owner[p.first]);
                merged.push_back(emails);
            }
            return merged;
        }
    };
    

  • 0

    @alexander Nice solution.


  • 2
    L

    Adding a python version... :)

    def accountsMerge(self, accounts):
        parent = {}
        email_to_name = {}
    
        def find(i):
            if parent[i] != i:
                parent[i] = find(parent[i])
            return parent[i]
    
        def union(i, j):
            parent[find(i)] = find(j)
    
        for account in accounts:
            name = account[0]
            for email in account[1:]:
                if email not in parent:
                    parent[email] = email
                email_to_name[email] = name
                union(email, account[1])  # account[1]: the first email
    
        trees = defaultdict(list)
        for email in parent.keys():
            trees[find(email)].append(email)
    
        return [[email_to_name[root]] + sorted(emails) for (root, emails) in trees.items()]
    

  • 3
    H

    Thanks for your very simple algorithm! No need to use map though, unordered_map would do just as fine.

    And we can do path compression at the same time of find(), saves some recursions.

        string find(string s, unordered_map<string, string> &parent) {//在findRoot的同时做路径压缩
            if (p[s] != s) {
                p[s] = find(p[s], p);
            }
            return p[s];            
        }
    

  • 0
    This post is deleted!

  • 0
    C

    What's the time compexity


  • 0
    B
    This post is deleted!

  • 0
    T

    can somebody explain,whats the purpose of parents[acts[i][j]] = acts[i][j];

    since both key and value are same here


  • 0

    @terminator123456 In union find, initially, parent of every node should be set to itself.


  • 0
    T

    ok thanks :)


  • 1
    J

    @alexander Clean code and good explanation, Thank you!

    An minor change to the third loop that improves performance:

        for(List<String> a : accounts){ 
            String p = find(a.get(1), parents);
            if(!unions.containsKey(p)) unions.put(p, new TreeSet<>());
            for(int i = 1; i < a.size(); i++){
                unions.get(p).add(a.get(i));
            }
        }

  • 0

    @Jingshi Yes, you are right, there is no need to check for every email in the same account, because at this point they already have the same parent for sure. Thanks. Updated
    from:

            for (List<String> a : acts) {
                for (int i = 1; i < a.size(); i++) {
                    String p = find(a.get(i), parents);
                    if (!unions.containsKey(p)) unions.put(p, new TreeSet<>());
                    unions.get(p).add(a.get(i));
                }
            }
    

    to

            for (List<String> a : acts) {
                String p = find(a.get(1), parents);
                if (!unions.containsKey(p)) unions.put(p, new TreeSet<>());
                for (int i = 1; i < a.size(); i++)
                    unions.get(p).add(a.get(i));
            }
    

  • 0
    X

    Hi @alexander

    Very nice solution!

    In your C++ implementation, we could do 3 optimizations and improve the performance by 6x.

    1. update parent in find. which has been explained.
    2. use unordered_map instead of map, since map has to keep elements ordered, it takes more time in insertion.
    3. use call by reference.
     for (pair<string, set<string>> p : unions) {
                vector<string> emails(p.second.begin(), p.second.end());
                emails.insert(emails.begin(), owner[p.first]);
                res.push_back(emails);
     }
    

    In this loop, when every p has to be copied, every element in its set are also copied one by one, which is unnecessary. If you do not wish to change its value, just add a const.

     for (const auto& p : unions) {
                vector<string> emails(p.second.begin(), p.second.end());
                emails.insert(emails.begin(), owner[p.first]);
                res.push_back(emails);
     }
    

  • 1
    T

    whats the complexity of this ?


  • 0
    J

    Path compression in Java: (iteration)

    public String find(String p, Map<String, String> parent) {
            while(p != parent.get(p)) {
                //path compression
                parent.put(p, parent.get(parent.get(p)));
                p = parent.get(p);
            }
            
            return p;
        }
    

    I also want to see if my understanding is correct: the reason we do not use "equals" is we are actually comparing the string's references.


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.