Java實(shí)現(xiàn)正則式轉(zhuǎn)MFA

//要求正則式中不含有空集或空串,程序不檢查符號(hào)是否合法
//NFA初始開始狀態(tài)記為0,終止?fàn)顟B(tài)記為1,新增的狀態(tài)用2,3,...表示,存儲(chǔ)結(jié)構(gòu)為(起點(diǎn)狀態(tài),終點(diǎn)狀態(tài),正則式)三元組的列表
//假設(shè)沒有不可達(dá)狀態(tài)和死狀態(tài)
package compiler;

import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Scanner;
import java.util.Set;

class triple {
    int start, end;
    String regex;

    public triple(int i, int j, String init) {
        start = i;
        end = j;
        regex = init;
    }

    @Override
    public String toString() {
        return start + "," + regex + "->" + end;
    }

    @Override
    public boolean equals(Object arg0) {
        if(arg0 == null || arg0.getClass() != this.getClass()) return false;
        triple t = (triple)arg0;
        return start==t.start&&end==t.end&&regex.equals(t.regex);
    }
}

public class RegexToMFA {

    static String regex;
    static LinkedList<triple> nfa;
    static LinkedList<Integer> finalStates;
    static LinkedList<triple> dfa;
    static Set<Character> chars;
    static int mfaStart;
    static LinkedList<Integer> mfaFinal;
    static LinkedList<triple> mfa;

    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        regex = sc.nextLine().trim();
        sc.close();
        if (regex.length() == 1) {
            System.out.println("MFA:初態(tài)0,終態(tài)1,(0," + regex + ")->1");
            return;
        }
        regexToNFA();
        System.out.println("NFA:初態(tài)0,終態(tài)1");
        for (triple t : nfa)
            System.out.println(t);
        NFAtoDFA();
        System.out.println("DFA:初態(tài)0,終態(tài)" + finalStates);
        for (triple t : dfa)
            System.out.println(t);
        DFAtoMFA();
        System.out.println("MFA:初態(tài)"+mfaStart+",終態(tài)"+mfaFinal);
        for (triple t : mfa)
            System.out.println(t);
    }

    static void regexToNFA() {
        nfa = new LinkedList<>();
        nfa.add(new triple(0, 1, regex));
        int state = 2;
        boolean loop = true;// 每輪迭代對(duì)nfa中的一個(gè)映射應(yīng)用規(guī)則拆分,直到不能再拆分為止
        while (loop) {
            loop = false;
            // 遍歷映射,若拆分,loop=true,跳出
            // pattern1,2,3檢查不可調(diào)換順序
            for (triple t : nfa) {
                int s = t.start, e = t.end;
                String reg = t.regex;
                if (reg.length() <= 1)
                    continue;// 不能拆分
                // 去括號(hào)
                if (reg.charAt(0) == '(') {
                    int bracketCnt = 1;
                    for (int i = 1; i < reg.length(); ++i) {
                        if (reg.charAt(i) == '(')
                            bracketCnt++;
                        if (reg.charAt(i) == ')') {
                            bracketCnt--;
                            if (bracketCnt == 0) {
                                if (i == reg.length() - 1)
                                    t.regex = reg.substring(1, i);
                                break;
                            }
                        }
                    }
                }
                reg = t.regex;
                int i;
                if ((i = isPattern1(reg)) != -1) {
                    nfa.remove(t);
                    nfa.add(new triple(s, e, reg.substring(0, i)));
                    nfa.add(new triple(s, e, reg.substring(i + 1)));
                    loop = true;
                    break;
                }
                if ((i = isPattern2(reg)) != -1) {
                    nfa.remove(t);
                    nfa.add(new triple(s, state, reg.substring(0, i)));
                    nfa.add(new triple(state, e, reg.substring(i)));
                    state++;
                    loop = true;
                    break;
                }
                if (isPattern3(reg)) {
                    nfa.remove(t);
                    nfa.add(new triple(s, state, ""));
                    nfa.add(new triple(state, e, ""));
                    nfa.add(new triple(state, state, reg.substring(0, reg.length() - 1)));
                    state++;
                    loop = true;
                    break;
                }
            }
        }
    }

    static int isPattern1(String regex)// 若符合規(guī)則,返回|的下標(biāo),否則返回-1
    {
        int bracketCnt = 0;
        for (int i = 0; i < regex.length(); ++i) {
            if (regex.charAt(i) == '(')
                bracketCnt++;
            if (regex.charAt(i) == ')')
                bracketCnt--;
            if (regex.charAt(i) == '|' && bracketCnt == 0)
                return i;
        }
        return -1;
    }

    static int isPattern2(String reg)// 若符合規(guī)則,返回e2開始處的下標(biāo),否則返回-1
    {
        if (reg.charAt(0) != '(')// 字符開頭
        {
            if (reg.charAt(1) == '*')
                return reg.length() == 2 ? -1 : 2;
            return 1;
        } else// 左括號(hào)開頭
        {
            int bracketCnt = 1;
            for (int i = 1; i < reg.length(); ++i) {
                if (reg.charAt(i) == '(')
                    bracketCnt++;
                if (reg.charAt(i) == ')') {
                    bracketCnt--;
                    if (bracketCnt == 0) {
                        if (reg.charAt(i + 1) == '*')
                            return reg.length() == i + 2 ? -1 : i + 2;
                        return i + 1;
                    }
                }
            }
        }
        return -1;
    }

    static boolean isPattern3(String reg) {
        if (reg.length() == 2 && reg.charAt(1) == '*')// a*
            return true;
        // 相匹配的(...)* 匹配條件:左括號(hào)開始計(jì)數(shù)1,往后左加右減,若減到0時(shí)是最后一個(gè)右括號(hào),成功
        if (reg.charAt(0) == '(' && reg.endsWith(")*")) {
            int bracketCnt = 1;
            for (int i = 1; i < reg.length() - 1; ++i) {
                if (reg.charAt(i) == '(')
                    bracketCnt++;
                if (reg.charAt(i) == ')') {
                    bracketCnt--;
                    if (bracketCnt == 0)
                        return i == reg.length() - 2;
                }
            }
        }
        return false;
    }

    static void NFAtoDFA() {
        finalStates = new LinkedList<>();
        dfa = new LinkedList<>();
        Integer[] I = getClosure(new Integer[] { 0 });
        getChars();
        LinkedList<Integer[]> newStates = new LinkedList<>();
        newStates.add(I);
        for (Integer i : I)
            if (i == 1) {
                finalStates.add(0);
                break;
            }
        LinkedList<Integer[]> toCalc = new LinkedList<>(newStates);
        while (!toCalc.isEmpty()) {
            Integer[] arr = toCalc.poll();
            for (Character c : chars) {
                Integer[] tmp = getReachStates(arr, c);
                Integer[] reachClosure = getClosure(tmp);
                if (!have(newStates, reachClosure)) {
                    newStates.add(reachClosure);
                    toCalc.add(reachClosure);
                    for (Integer i : reachClosure)
                        if (i == 1) {
                            finalStates.add(newStates.size() - 1);
                            break;
                        }
                }
                int s = getIndex(newStates, arr);
                int e = getIndex(newStates, reachClosure);
                triple t = new triple(s, e, c + "");
                dfa.add(t);
            }
        }
    }

    static Integer[] getClosure(Integer[] arr) {
        Set<Integer> closure = new HashSet<>();
        for (Integer start : arr)
            closure.add(start);
        LinkedList<Integer> tocheck = new LinkedList<>(closure);
        while (!tocheck.isEmpty()) {
            int start = tocheck.poll();
            for (triple t : nfa)
                if (t.start == start && t.regex.equals("")) {
                    closure.add(t.end);
                    tocheck.add(t.end);
                    break;
                }
        }
        return closure.toArray(new Integer[closure.size()]);
    }

    static void getChars() {
        chars = new HashSet<>();
        for (int i = 0; i < regex.length(); ++i) {
            char c = regex.charAt(i);
            if (c != '(' && c != ')' && c != '|' && c != '*')
                chars.add(c);
        }
    }

    static Integer[] getReachStates(Integer[] arr, Character c) {
        Set<Integer> reachStates = new HashSet<>();
        for (Integer i : arr)
            for (triple t : nfa)
                if (t.start == i && !t.regex.isEmpty() && t.regex.charAt(0) == c) {
                    reachStates.add(t.end);
                    break;
                }
        return reachStates.toArray(new Integer[reachStates.size()]);
    }

    static int getIndex(LinkedList<Integer[]> newStates, Integer[] arr) {
        for (int i = 0; i < newStates.size(); ++i)
            if (Arrays.equals(newStates.get(i), arr))
                return i;
        return -1;
    }

    static boolean have(LinkedList<Integer[]> newStates, Integer[] arr) {
        for (int i = 0; i < newStates.size(); ++i)
            if (Arrays.equals(newStates.get(i), arr))
                return true;
        return false;
    }
    
    static void DFAtoMFA()
    {
        mfaStart = 0;
        mfaFinal = finalStates;
        mfa = new LinkedList<>(dfa);
        Set<Integer> allStates = new HashSet<>();
        allStates.add(0);
        for(triple t: dfa)
            allStates.add(t.end);
        int n = allStates.size();
        allStates.removeAll(finalStates);
        LinkedList<Integer> nonFinal = new LinkedList<>(allStates);
        boolean[][] table = new boolean[n][n];
        for(int i=0; i<n; ++i)
            for(int j=0; j<n; ++j)
                table[i][j] = true;
        for(int i=0; i<n; ++i)
            for(int j=0; j<n; ++j)
                if(j!=i && ((finalStates.contains(i)&&nonFinal.contains(j)) || (nonFinal.contains(i)&&finalStates.contains(j))))
                    table[i][j] = false;
        boolean flag = true;
        while(flag)
        {
            flag = false;
            for(int i=0; i<n; ++i)
                for(int j=0; j<n; ++j)
                    if(j!=i && table[i][j])
                        for(char c: chars)
                        {
                            boolean ihasTrans = false, jhasTrans = false;
                            int iTrans = 0, jTrans = 0;
                            for(triple t: dfa)
                                if(t.start==i && t.regex==c+"")
                                {
                                    ihasTrans = true;
                                    iTrans = t.end;
                                    break;
                                }
                            for(triple t: dfa)
                                if(t.start==j && t.regex==c+"")
                                {
                                    jhasTrans = true;
                                    jTrans = t.end;
                                    break;
                                }
                            if((ihasTrans&&!jhasTrans) || (!ihasTrans&&jhasTrans) || (ihasTrans&&jhasTrans&&!table[iTrans][jTrans]))
                            {
                                table[i][j] = false;
                                flag = true;
                                break;
                            }
                        }
        }
        for(int i=0; i<n; ++i)
            for(int j=0; j<n; ++j)
                if(j!=i && table[i][j])
                {
                    for(triple t: dfa)
                        if(t.start==j || t.end==j) 
                        {
                            mfa.remove(t);
                            if(t.start == j) t.start = i;
                            if(t.end == j) t.end = i;
                            if(!mfa.contains(t))
                                mfa.add(t);
                        }
                    if(mfaStart==j) mfaStart=i;
                    mfaFinal.remove(j);
                    table[i][j] = false;
                    table[j][i] = false;
                }
    }   
}
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容