C++ solution using bit manipulation O(n) - 120 ms (Can be better with trie?)


  • -1
    P
    class Solution {
    public:
        vector<string> findRepeatedDnaSequences(string s) {
            unordered_map<int, int> start_pos_map;
            unordered_set<int> done_set;
            vector<string> result;
            int n = s.size();
            int num = 0;
            for (int i = 0; i < n; ++i) {
                if (start_pos_map.find(num) != start_pos_map.end() && done_set.find(num) == done_set.end()) {
                    result.push_back(string(s, start_pos_map[num], 10));
                    done_set.insert(num);
                }
                if (i > 9) {
                    start_pos_map[num] = i-10;
                    num = num&(~(~0<<18)); // mask all bits except the ones from last 9 letters
                }
                // shift, then or the bits of the next char
                num = ((num<<2) | getBits(s[i]));
            }
            
            // chck for the last num
            if (start_pos_map.find(num) != start_pos_map.end() && done_set.find(num) == done_set.end()) {
              result.push_back(string(s, start_pos_map[num], 10));  
            }
            return result;
        }
        
        // 2 bits for each letter
        inline int getBits(char c) {
            switch(c) {
                case 'A' : return 0; // 00
                case 'C' : return 1; // 01
                case 'G' : return 2; // 10
                case 'T' : return 3; // 11
            }
        }
    };

  • 6
    X

    Why not just using bitset? My code here using 19ms.

    class Solution {
    public:
    vector<string> findRepeatedDnaSequences(string s) {
        if (s.size()<10) return vector<string>();
        bitset<1048576> cou;
        set<string> temp;
        int index = 0;
        for (int i=0; i<10; ++i){
            index = index * 4 + decode(s[i]); 
        }
        cou.set(index);
        for(int i=10; i<s.size();++i){
            index = index % 262144;
            index = index * 4 + decode(s[i]);
            if (cou.test(index))
                temp.insert(s.substr(i-9, 10));
            else
                cou.set(index);
        }
        return vector<string>(temp.begin(), temp.end());
    }
    
    int decode(char c){
        switch (c){
            case 'A': return 0;
            case 'C': return 1;
            case 'G': return 2;
            case 'T': return 3;
        }
    }
    

    };


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.