Sunday, December 6, 2015

Longest Repeated Substring using Suffix Arrays - Java

Suffix arrays can be used to find the longest repeated subtring of a string.

This code finds the longest repeated substring (can be overlapping) for a given string using suffix arrays.

Source:

import java.util.Arrays;

public class LongestRepeatedSubstring {
    public static String lrs(String s){
        int N = s.length();
        String[] suffixes = new String[N];
        
        System.out.println("Suffixes: ");
        for(int i=0;i<suffixes.length;++i){
            suffixes[i] = s.substring(i);
            System.out.println(suffixes[i]);
        }
        
        System.out.println();
        Arrays.sort(suffixes);
        System.out.println("Sorted Suffixes: ");
        for(String sss:suffixes)System.out.println(sss);
        
        String lrs = "";
        
        for(int i=0;i<N-1;++i){
            int len = lcp(suffixes[i], suffixes[i+1]);
            if(len>=0 && len>lrs.length()){
                lrs = suffixes[i].substring(0, len+1);
            }
        }
        return lrs;        
    }
    
    public static int lcp(String s, String ss){
        int i=0;
        for(;i<s.length();++i){
            if(s.charAt(i)!=ss.charAt(i))break;
        }
        return i-1;
    }
    
    public static void main(String[] args){
        String s = "bababababababaaa";
        
        //Include overlapping substrings
        System.out.println("\n\nLongest Repeated Substring -> "+lrs(s));
    }
}

Output:

Suffixes: 
bababababababaaa
ababababababaaa
babababababaaa
abababababaaa
bababababaaa
ababababaaa
babababaaa
abababaaa
bababaaa
ababaaa
babaaa
abaaa
baaa
aaa
aa
a

Sorted Suffixes: 
a
aa
aaa
abaaa
ababaaa
abababaaa
ababababaaa
abababababaaa
ababababababaaa
baaa
babaaa
bababaaa
babababaaa
bababababaaa
babababababaaa
bababababababaaa


Longest Repeated Substring -> abababababa


No comments:

Post a Comment