MSD sort on suffix array, no hash involved


  • 0
    X
    public class Solution {
    public List<String> findRepeatedDnaSequences(String s) {
    	List<String> result = new ArrayList<String>();
    	int N = s.length(), R = 4;
    	if (N < 11) return result;
    	int[] sa = new int[N - 9], aux = new int[N - 9];
    	for (int i = 0; i <= N - 10; i++) sa[i] = i;	
    
    	for (int i = 0; i < 10; i++) {
    		int[] count = new int[R + 1];
    		for (int j : sa) count[getInt(s, j + i) + 1]++;
    		for (int r = 0; r < R; r++) count[r + 1] += count[r];
    		for (int j : sa) aux[count[getInt(s, j + i)]++] = j;
    		for (int j = 0; j < sa.length; j++) sa[j] = aux[j];
    	}
    
    	Set<String> set = new HashSet<String>();
    	for (int i = 1; i < sa.length; i++) {
    		if (s.substring(sa[i], sa[i] + 10).equals(s.substring(sa[i - 1], sa[i - 1] + 10))){ 
    		    set.add(s.substring(sa[i], sa[i] + 10));
    		    i++;
    		}
    	}
    	result.addAll(set);
    	return result;
    }
    
    private int getInt(String str, int i) {
    	char c = str.charAt(i);
    	if (c == 'A') return 0;
    	else if (c == 'C') return 1;
    	else if (c == 'G') return 2;
    	else return 3;
    }
    

    }


Log in to reply
 

Looks like your connection to LeetCode Discuss was lost, please wait while we try to reconnect.