My c++ code that implements Boyer-Moore string search got accepted in 12ms.


  • 11
    L

    I implements Boyer-Moore string search algorithm and it turns out to be very efficient (accepted in 12ms). Boyer-Moore uses information gained by preprocessing the pattern string to skip as many alignments as possible. A shift is calculated by applying both rules: the bad character rule and the good suffix rule. The actual shifting offset is the maximum of the shifts calculated by these rules.

    char *strStr(char *haystack, char *needle) {
    	if(NULL==haystack||NULL==needle)
    		return NULL;
    	int plen = strlen(needle);
    	int slen = strlen(haystack);
    	if(0==plen)
    		return haystack;
    	else if(plen>slen)
    		return NULL;
    	int badChar[256];
    	int np = 0;
    	int i,j,k;
    
    	for(i=0;i<256;i++)
    		badChar[i] = plen;
    
    	while(np<plen)
    	{
    		badChar[*(needle+np)] = plen-np-1;
    		np++;
    	}
    
    	int* goodSuffix = new int[plen];
    
    	int prefix_index = plen;
    	for(i=plen-1;i>=0;i--)
    	{
    		goodSuffix[i] = prefix_index;
    		if(*(needle+i)==*(needle+plen-1-i)&&prefix_index==i+1)
    			prefix_index = i;
    	}
    
    	for(i=0;i<plen-1;i++)
    	{
    		j = plen-1, k = 0;
    		while(k<i&&*(needle+j)==*(needle+i-k))
    		{j--;k++;}
    		if(*(needle+plen-1)==*(needle+i))
    			goodSuffix[j] = plen-1-i;
    	}
    	goodSuffix[plen-1] = 0;
    
    	int sp = 0;
    	while(sp<slen)
    	{
    		i = plen-1;
    		while(i>=0&&*(haystack+sp+i)==*(needle+i))
    			i--;
    		if(i<0)
    			return haystack+sp;
    		int bj = badChar[*(haystack+sp+i)] - plen + i + 1;
    		sp += (bj>goodSuffix[i]?bj:goodSuffix[i]);
    	}
    
    	delete goodSuffix;
    	return NULL;
    }

  • 0
    Y

    My Morris-Pratt implementation costs 48 ms, seems like Boyer-Moore is better for this set of tests


  • 0
    J

    The timer is not accurate. You should use more test cases to test your function. ;-)


  • 0
    Z

    My C++ KMP solution taking only 6ms.

    int * prep_kmp_table(char * needle,int m){
            int i,j,k;
            int *T=new int[m];
            T[0]=-1;
            T[1]=0;
            k=0;
            //cout<<needle;
            for(i=2;i<m;){
                if(needle[i-1]==needle[k]){
                    k=k+1;
                    T[i]=k;
                    i++;
                }
                else if(k>0&&needle[i-1]!=needle[k])
                    k=T[k];
                else{
                    T[i]=0;
                    i=i+1;
                }
            }
            return T;
        }
        
        class Solution {
        public:
            int strStr(char *haystack, char *needle) {
                int n=strlen(haystack);
                int m=strlen(needle);
                //cout<<haystack<<endl<<needle;
                if(m ==0) return 0;
                int *T=prep_kmp_table(needle,m);
                int i,j,k;
                for(i=0,j=0;i+j<n;){
                    if(haystack[i+j]==needle[j]){
                        j++;
                        if(j==m)
                            return i;
                    }
                    else{
                        if(T[j]>-1){
                            i=i+j-T[j];
                            j=T[j];
                        }
                        else {
                            j=0;
                            i=i+1;
                        }
                    }
                }
                return -1;
            }
        };

Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.