//要求正則式中不含有空集或空串,程序不檢查符號(hào)是否合法
//NFA初始開始狀態(tài)記為0,終止?fàn)顟B(tài)記為1,新增的狀態(tài)用2,3,...表示,存儲(chǔ)結(jié)構(gòu)為(起點(diǎn)狀態(tài),終點(diǎn)狀態(tài),正則式)三元組的列表
//假設(shè)沒有不可達(dá)狀態(tài)和死狀態(tài)
package compiler;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Scanner;
import java.util.Set;
class triple {
int start, end;
String regex;
public triple(int i, int j, String init) {
start = i;
end = j;
regex = init;
}
@Override
public String toString() {
return start + "," + regex + "->" + end;
}
@Override
public boolean equals(Object arg0) {
if(arg0 == null || arg0.getClass() != this.getClass()) return false;
triple t = (triple)arg0;
return start==t.start&&end==t.end&®ex.equals(t.regex);
}
}
public class RegexToMFA {
static String regex;
static LinkedList<triple> nfa;
static LinkedList<Integer> finalStates;
static LinkedList<triple> dfa;
static Set<Character> chars;
static int mfaStart;
static LinkedList<Integer> mfaFinal;
static LinkedList<triple> mfa;
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
regex = sc.nextLine().trim();
sc.close();
if (regex.length() == 1) {
System.out.println("MFA:初態(tài)0,終態(tài)1,(0," + regex + ")->1");
return;
}
regexToNFA();
System.out.println("NFA:初態(tài)0,終態(tài)1");
for (triple t : nfa)
System.out.println(t);
NFAtoDFA();
System.out.println("DFA:初態(tài)0,終態(tài)" + finalStates);
for (triple t : dfa)
System.out.println(t);
DFAtoMFA();
System.out.println("MFA:初態(tài)"+mfaStart+",終態(tài)"+mfaFinal);
for (triple t : mfa)
System.out.println(t);
}
static void regexToNFA() {
nfa = new LinkedList<>();
nfa.add(new triple(0, 1, regex));
int state = 2;
boolean loop = true;// 每輪迭代對(duì)nfa中的一個(gè)映射應(yīng)用規(guī)則拆分,直到不能再拆分為止
while (loop) {
loop = false;
// 遍歷映射,若拆分,loop=true,跳出
// pattern1,2,3檢查不可調(diào)換順序
for (triple t : nfa) {
int s = t.start, e = t.end;
String reg = t.regex;
if (reg.length() <= 1)
continue;// 不能拆分
// 去括號(hào)
if (reg.charAt(0) == '(') {
int bracketCnt = 1;
for (int i = 1; i < reg.length(); ++i) {
if (reg.charAt(i) == '(')
bracketCnt++;
if (reg.charAt(i) == ')') {
bracketCnt--;
if (bracketCnt == 0) {
if (i == reg.length() - 1)
t.regex = reg.substring(1, i);
break;
}
}
}
}
reg = t.regex;
int i;
if ((i = isPattern1(reg)) != -1) {
nfa.remove(t);
nfa.add(new triple(s, e, reg.substring(0, i)));
nfa.add(new triple(s, e, reg.substring(i + 1)));
loop = true;
break;
}
if ((i = isPattern2(reg)) != -1) {
nfa.remove(t);
nfa.add(new triple(s, state, reg.substring(0, i)));
nfa.add(new triple(state, e, reg.substring(i)));
state++;
loop = true;
break;
}
if (isPattern3(reg)) {
nfa.remove(t);
nfa.add(new triple(s, state, ""));
nfa.add(new triple(state, e, ""));
nfa.add(new triple(state, state, reg.substring(0, reg.length() - 1)));
state++;
loop = true;
break;
}
}
}
}
static int isPattern1(String regex)// 若符合規(guī)則,返回|的下標(biāo),否則返回-1
{
int bracketCnt = 0;
for (int i = 0; i < regex.length(); ++i) {
if (regex.charAt(i) == '(')
bracketCnt++;
if (regex.charAt(i) == ')')
bracketCnt--;
if (regex.charAt(i) == '|' && bracketCnt == 0)
return i;
}
return -1;
}
static int isPattern2(String reg)// 若符合規(guī)則,返回e2開始處的下標(biāo),否則返回-1
{
if (reg.charAt(0) != '(')// 字符開頭
{
if (reg.charAt(1) == '*')
return reg.length() == 2 ? -1 : 2;
return 1;
} else// 左括號(hào)開頭
{
int bracketCnt = 1;
for (int i = 1; i < reg.length(); ++i) {
if (reg.charAt(i) == '(')
bracketCnt++;
if (reg.charAt(i) == ')') {
bracketCnt--;
if (bracketCnt == 0) {
if (reg.charAt(i + 1) == '*')
return reg.length() == i + 2 ? -1 : i + 2;
return i + 1;
}
}
}
}
return -1;
}
static boolean isPattern3(String reg) {
if (reg.length() == 2 && reg.charAt(1) == '*')// a*
return true;
// 相匹配的(...)* 匹配條件:左括號(hào)開始計(jì)數(shù)1,往后左加右減,若減到0時(shí)是最后一個(gè)右括號(hào),成功
if (reg.charAt(0) == '(' && reg.endsWith(")*")) {
int bracketCnt = 1;
for (int i = 1; i < reg.length() - 1; ++i) {
if (reg.charAt(i) == '(')
bracketCnt++;
if (reg.charAt(i) == ')') {
bracketCnt--;
if (bracketCnt == 0)
return i == reg.length() - 2;
}
}
}
return false;
}
static void NFAtoDFA() {
finalStates = new LinkedList<>();
dfa = new LinkedList<>();
Integer[] I = getClosure(new Integer[] { 0 });
getChars();
LinkedList<Integer[]> newStates = new LinkedList<>();
newStates.add(I);
for (Integer i : I)
if (i == 1) {
finalStates.add(0);
break;
}
LinkedList<Integer[]> toCalc = new LinkedList<>(newStates);
while (!toCalc.isEmpty()) {
Integer[] arr = toCalc.poll();
for (Character c : chars) {
Integer[] tmp = getReachStates(arr, c);
Integer[] reachClosure = getClosure(tmp);
if (!have(newStates, reachClosure)) {
newStates.add(reachClosure);
toCalc.add(reachClosure);
for (Integer i : reachClosure)
if (i == 1) {
finalStates.add(newStates.size() - 1);
break;
}
}
int s = getIndex(newStates, arr);
int e = getIndex(newStates, reachClosure);
triple t = new triple(s, e, c + "");
dfa.add(t);
}
}
}
static Integer[] getClosure(Integer[] arr) {
Set<Integer> closure = new HashSet<>();
for (Integer start : arr)
closure.add(start);
LinkedList<Integer> tocheck = new LinkedList<>(closure);
while (!tocheck.isEmpty()) {
int start = tocheck.poll();
for (triple t : nfa)
if (t.start == start && t.regex.equals("")) {
closure.add(t.end);
tocheck.add(t.end);
break;
}
}
return closure.toArray(new Integer[closure.size()]);
}
static void getChars() {
chars = new HashSet<>();
for (int i = 0; i < regex.length(); ++i) {
char c = regex.charAt(i);
if (c != '(' && c != ')' && c != '|' && c != '*')
chars.add(c);
}
}
static Integer[] getReachStates(Integer[] arr, Character c) {
Set<Integer> reachStates = new HashSet<>();
for (Integer i : arr)
for (triple t : nfa)
if (t.start == i && !t.regex.isEmpty() && t.regex.charAt(0) == c) {
reachStates.add(t.end);
break;
}
return reachStates.toArray(new Integer[reachStates.size()]);
}
static int getIndex(LinkedList<Integer[]> newStates, Integer[] arr) {
for (int i = 0; i < newStates.size(); ++i)
if (Arrays.equals(newStates.get(i), arr))
return i;
return -1;
}
static boolean have(LinkedList<Integer[]> newStates, Integer[] arr) {
for (int i = 0; i < newStates.size(); ++i)
if (Arrays.equals(newStates.get(i), arr))
return true;
return false;
}
static void DFAtoMFA()
{
mfaStart = 0;
mfaFinal = finalStates;
mfa = new LinkedList<>(dfa);
Set<Integer> allStates = new HashSet<>();
allStates.add(0);
for(triple t: dfa)
allStates.add(t.end);
int n = allStates.size();
allStates.removeAll(finalStates);
LinkedList<Integer> nonFinal = new LinkedList<>(allStates);
boolean[][] table = new boolean[n][n];
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
table[i][j] = true;
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
if(j!=i && ((finalStates.contains(i)&&nonFinal.contains(j)) || (nonFinal.contains(i)&&finalStates.contains(j))))
table[i][j] = false;
boolean flag = true;
while(flag)
{
flag = false;
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
if(j!=i && table[i][j])
for(char c: chars)
{
boolean ihasTrans = false, jhasTrans = false;
int iTrans = 0, jTrans = 0;
for(triple t: dfa)
if(t.start==i && t.regex==c+"")
{
ihasTrans = true;
iTrans = t.end;
break;
}
for(triple t: dfa)
if(t.start==j && t.regex==c+"")
{
jhasTrans = true;
jTrans = t.end;
break;
}
if((ihasTrans&&!jhasTrans) || (!ihasTrans&&jhasTrans) || (ihasTrans&&jhasTrans&&!table[iTrans][jTrans]))
{
table[i][j] = false;
flag = true;
break;
}
}
}
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
if(j!=i && table[i][j])
{
for(triple t: dfa)
if(t.start==j || t.end==j)
{
mfa.remove(t);
if(t.start == j) t.start = i;
if(t.end == j) t.end = i;
if(!mfa.contains(t))
mfa.add(t);
}
if(mfaStart==j) mfaStart=i;
mfaFinal.remove(j);
table[i][j] = false;
table[j][i] = false;
}
}
}
Java實(shí)現(xiàn)正則式轉(zhuǎn)MFA
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。
相關(guān)閱讀更多精彩內(nèi)容
- title: 用Java實(shí)現(xiàn)網(wǎng)絡(luò)爬蟲二之Java正則表達(dá)式tags: Java 網(wǎng)絡(luò)爬蟲 Spider Crawl...
- 實(shí)現(xiàn)一個(gè)正則表達(dá)式需要幾步? 就三步: 分析正則表達(dá)式并構(gòu)建出NFA 根據(jù)NFA得出DFA 根據(jù)DFA匹配字符串當(dāng)...
- 1:使用場(chǎng)景 比如我們?cè)谧约簩?shí)現(xiàn)的登錄功能既能支持“手機(jī)號(hào)碼”和“郵箱號(hào)碼”加用戶的密碼登錄。這時(shí)我們就可以使用j...
- 晚上上課回來已經(jīng)十點(diǎn)多了,可群里的同學(xué)還在交流今天的學(xué)習(xí)內(nèi)容,我也是興奮的在腦海里回憶今天學(xué)習(xí)的內(nèi)容!每次去新陽光...