Sunday, December 13, 2015

Simluating NFA for Pattern matching using Regular Expressions - Java

Worst Case Performance : MN

M -> The M character pattern to be matched
N  -> Number of characters in the text

(Algorithms Part II - coursera.org)

Source:

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Stack;


public class NFA {
    private char[] re; //match transitions
    private ArrayList<ArrayList<Integer>> G; //epsilon transition digraph
    private int M; //number of states
    private boolean[] marked;
    
    public NFA(String regexp){
        M = regexp.length();
        re = regexp.toCharArray();
        G = buildEpsilonTransitionDigraph();
    }
    
    public boolean recognizes(String txt){
        ArrayList<Integer> pc = new ArrayList<>();
        marked = new boolean[M+1];
        dfs(0);
        for(int v=0;v<=M;++v) if(marked[v])pc.add(v); 
        
        for(int i=0;i<txt.length();++i){
            ArrayList<Integer> match = new ArrayList<>();
            for(int v:pc){
                if(v==M)continue; //Input still there
                if((re[v] == txt.charAt(i))||re[v]=='.')
                    match.add(v+1);
            } 
        
            Arrays.fill(marked, false);
            pc.clear();
            for(int ii:match)dfs(ii);
            for(int ii=0;ii<=M;++ii)if(marked[ii])pc.add(ii);
        }
        
        for(int v:pc)if(v==M)return true;
        return false; 
    }
    
    private ArrayList<ArrayList<Integer>> buildEpsilonTransitionDigraph(){
        ArrayList<ArrayList<Integer>> G = new ArrayList<>(M+1);
        for(int i=0;i<=M;++i)G.add(new ArrayList<>());
        
        Stack<Integer> st = new Stack<>();
        
        for(int i=0;i<M;++i){
            int lp = i;
            
            if(re[i]=='(' || re[i] == '|')st.push(i);
            else if(re[i] == ')'){
                int or = st.pop();
                if(re[or]=='|'){
                    lp = st.pop();
                    G.get(lp).add(or+1);
                    G.get(or).add(i);
                }else lp = or;
            }
            
            if(i<M-1 && re[i+1] == '*'){
                G.get(lp).add(i+1);
                G.get(i+1).add(lp);
            }
            
            if(re[i] == '(' || re[i] == '*' || re[i] == ')')
                G.get(i).add(i+1);
        }
        
        return G; 
    }
    
    public void dfs(int i){
        if(marked[i])return;
        marked[i] = true;
        for(int ii:G.get(i)){
            dfs(ii);
        }
    } 
    
    public static void main(String[] args){
        NFA nfa = new NFA("(a|b)*.*bz");
        
        /*
            (a|b)* will match abababaaaabbbbbababab 
            .* will match zoqoibzqoizbq 
            bz will match bz
        */
        String text = "abababaaaabbbbbabababzoqoibzqoizbqbz";
        System.out.println(nfa.recognizes(text));
        
        //fails
        System.out.println(nfa.recognizes(text+"X"));
    }
    
}

Output:

true
false

No comments:

Post a Comment