Surprised that my solution(12ms) should beat 100.00% C submissions


  • 0
    U

    to be honest, i did not even come up with the solution all by myself. i had known it is a variant of the longest-substring-without-repetition problem, however, i could not come up with a clear solution at the moment.

    afte referring to the discussion forum and with the help of the explanation in this question, i started to write my own solution in C, then came the solution, and the result runtime really surprised myself:

    // for sort the |words|
    int strcmp1(const void* p1, const void* p2) {
        return strcmp(*(char**)p1, *(char**)p2);
    }
    
    // to handle duplicate words in |words|
    struct word_t {
        char* w;
        int count;
        int* pos;
        int cur;
    };
    
    int word_t_cmp(const void* p1, const void* p2) {
        return strcmp(((const struct word_t*)p1)->w, ((const struct word_t*)p2)->w);
    }
    
    /**
     * Return an array of size *returnSize.
     * Note: The returned array must be malloced, assume caller calls free().
     * 
     * It is a variant of the longest-substring-without-repetition problem
     */
    int* findSubstring(char* s, char** words, int wordsSize, int* returnSize) {
        if (!s || wordsSize == 0 || strlen(s) < wordsSize*strlen(words[0])) {
            *returnSize = 0;
            return NULL;
        }
        if (strlen(s) == 0 && strlen(words[0]) == 0) {
            *returnSize = 1;
            int* ret = (int*) malloc(sizeof(int));
            ret[0] = 0;
            return ret;
        }
    
        int n = strlen(s);
        int k = strlen(words[0]);
        // sort |words| first, prepare for binary search
        qsort(words, wordsSize, sizeof(char*), strcmp1);
        // construct array of word_t
        // @note please note that after construction, |wts| is already sorted
        struct word_t* wts = (struct word_t*) malloc(wordsSize * sizeof(struct word_t));
        int wtsSize = 0;
        for (int i = 0; i < wordsSize;) {
            char* w = words[i];
            int count = 1;
            while (++i < wordsSize && !strcmp(w, words[i]))
                count++;
            struct word_t* wt_ptr = wts + wtsSize++;
            wt_ptr->w = w;
            wt_ptr->count = count;
            wt_ptr->pos = (int*) malloc(count * sizeof(int));
            wt_ptr->cur = 0;
        }
        // store one word
        struct word_t wt;
        wt.w = (char*) malloc((k+1) * sizeof(char));
        // return value
        int cap = 10;
        int* ret = (int*) malloc(cap * sizeof(int));
        int size = 0;
        for (int offset = 0; offset < k; offset++) {
            // init word_t array
            for (int i = 0; i < wtsSize; i++) {
                struct word_t* wt_ptr = wts + i;
                for (int j = 0; j < wt_ptr->count; j++)
                    wt_ptr->pos[j] = -1;
                wt_ptr->cur = 0;
            }
            int start = offset; // start pos of current substring
            int len = 0; // number of words encountered
    
            for (int i = offset; i <= n - k; i += k) {
                strncpy(wt.w, s+i, k);
                wt.w[k] = '\0';
                struct word_t* p = (struct word_t*) bsearch(&wt, wts, wtsSize, sizeof(struct word_t), word_t_cmp);
                if (!p) {
                    start = i + k;
                    len = 0;
                    continue;
                }
                // @note the following five lines covers extra occurrence of
                // (possible duplicate) word, you can draw some special case
                // on a paper if it's hard to understand why it could cover
                // all boundary conditions
                // |p->cur| and |p->pos| implement a simple rounded queue,
                // and |p->cur| always point to the smallest index position
                int pos = p->pos[p->cur];
                p->pos[p->cur++] = i;
                if (p->cur >= p->count)
                    p->cur -= p->count;
                if (pos < start) { // valid occurrence of this word in current substring started at |start|
                    len++;
                    if (len == wordsSize) { // all words encounterd, add to result set
                        if (size == cap) { // extend the array
                            cap = 2*cap;
                            int* tmp = (int*) malloc(cap * sizeof(int));
                            memcpy(tmp, ret, size*sizeof(int));
                            free(ret);
                            ret = tmp;
                        }
                        ret[size++] = start;
                        // move substring forward by one word length
                        start += k;
                        len--;
                    }
                } else { // extra occurence of (possible duplicat) word encountered, update |start| and |len|
                    start = pos + k;
                    len = (i - start)/k + 1;
                }
            }
        }
    
        // cleanup
        for (int i = 0; i < wtsSize; i++)
            free(wts[i].pos);
        free(wts);
    
        *returnSize = size;
        if (size == 0) {
            free(ret);
            ret = NULL;
        }
        return ret;
    }
    

    i did not take much speed considerations when coding, though, the result should beat 100.00% C submissions. interesting :-)


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.