My C++ solution with array, lookup table and pointers: 10ms


  • 1
    C
    vector<string> findRepeatedDnaSequences(string s) {
        vector<string> vs;
        int const N = s.size();
        if(N <= 10){
            return vs;
        }
    
        int lut['Z'];
        lut['A'] = 0;
        lut['C'] = 1;
        lut['G'] = 2;
        lut['T'] = 3;
    
        vector<char> m(1<<20); // 10-letter-long => 20bits
        int sig = 0;
        char *p = &s[0], *e = p + N;
        for(int i = 0; i < 10; ++i, ++p){
            sig = ((sig << 2) | lut[*p]);
        }
        m[sig] = 1;
        while(p != e){
            sig = (((sig << 2) | lut[*p++]) & 0xFFFFF);
            char &c = m[sig];
            if(c < 2){
                if(++c == 2){
                    char t = *p;
                    *p = 0; // truncate
                    vs.push_back(p-10);
                    *p = t; // recover
                }
            }
        }
        return vs;
    }

  • 0
    P

    this would break if the same 10 letter string repeats more than 2^8 times since you are using char to store how many times the string is repeated. Correct me if I am wrong?


  • 0
    C

    The "if(c < 2){..." check limits each element in 0,1,2 only.


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.