10-Line C# solution O(N) by building your own hash code -- Clear and Straightforward


  • 3
    L
    public IList<string> FindRepeatedDnaSequences(string s){
        HashSet<int> hist = new HashSet<int>();
        HashSet<string> resultSet = new HashSet<string>();
        for (int i = 0, hash = 0, mask = (1 << 18) - 1; i < s.Length; i++){   
            if(i > 9) hash &= mask;
            hash = (hash << 2) | (s[i] == 'A' ? 0 : s[i] == 'C' ? 1 : s[i] == 'G' ? 2 : 3);
            if(i < 9) continue;
            if (!hist.Contains(hash)) hist.Add(hash);
            else resultSet.Add(s.Substring(i - 9, 10));
        }
        return resultSet.ToList();
    }
    

    A straightforward solution easier to understand but not optimized.

    public IList<string> FindRepeatedDnaSequences(string s){
        HashSet<int> hist = new HashSet<int>();
        HashSet<string> resultSet = new HashSet<string>();
        for (int i = 0; i < s.Length - 9; i++){
            int hash = getMyHash(s.Substring(i, 10));
            if (!hist.Contains(hash)) hist.Add(hash);
            else resultSet.Add(s.Substring(i, 10));
        }
        return resultSet.ToList();
    }
    // You don't need write a brand new hash algorithm for all chars.
    // Need only for 4 DNA chars.
    private int getMyHash(string str){ //A 0, C 1, G 2, T 3
        int hash = 0;
        foreach (var s in str)
            hash = (hash << 2) | (s == 'A' ? 0 : s == 'C' ? 1 : s == 'G' ? 2 : 3);
        return hash;
    }

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.