Share my C solution, easy to understand


  • 2
    H

    There is only 4 possible character so I simply encode each substring in 20 bits.

    I don't want to implement a hash map so I simply sort the encoded values and then check if any value is equal to its adjacent value.

    After a repeated substring is found, restore it from the encoded value.

    #define CODE(a) ('A' == (a) ? 0 : 'C' == (a) ? 1 : 'G' == (a) ? 2 : 3)
    #define CHAR(a) (0 == (a) ? 'A' : 1 == (a) ? 'C' : 2 == (a) ? 'G' : 'T')
    int comp(const void* p1, const void* p2) {
        int a = *((int*) p1);
        int b = *((int*) p2);
        return a == b ? 0 : a > b ? 1 : -1;
    }
    char** findRepeatedDnaSequences(char* s, int* returnSize) {
        int len = s && *s ? (int) strlen(s) - 9 : 0, *encode;
        int i, j, code = 0, mask = (1 << 20) - 1;
        char** result;
        if (1 >= len) {
            *returnSize = 0;
            return NULL;
        }
        encode = malloc(sizeof(int) * len);
        for (i = 0, j = 0; s[i]; ++i) {
            code = ((code << 2) | CODE(s[i])) & mask;
            9 <= i ? (encode[j++] = code) : 0;
        }
        result = malloc(sizeof(char*) * len);
        qsort(encode, len, sizeof(int), comp);
        code = 0;
        for (i = 1; i < len; ++i) {
            if (encode[i] == encode[i - 1] && 
                (i == len - 1 || encode[i] != encode[i + 1])) {
                result[code] = malloc(11);
                for (j = 0; j < 10; ++j) {
                    result[code][j] = CHAR((encode[i] >> ((9 - j) << 1)) & 3);
                }
                result[code][j] = '\0';
                code++;
            }
        }
        free(encode);
        *returnSize = code;
        return result;
    }

  • 0
    J

    I just wonder how long it runs?


  • 0
    H

    The result shows 28 ms.


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.