編譯原理實(shí)驗(yàn)一 詞法分析設(shè)計(jì)

一、實(shí)驗(yàn)?zāi)康?/h2>

通過(guò)本實(shí)驗(yàn)的編程實(shí)踐,使學(xué)生了解詞法分析的任務(wù),掌握詞法分析程序設(shè)計(jì)的原理和構(gòu)造方法,使學(xué)生對(duì)編譯的基本概念、原理和方法有完整的和清楚的理解,并能正確地、熟練地運(yùn)用。

二、實(shí)驗(yàn)內(nèi)容

用 VC++/VB/JAVA 語(yǔ)言實(shí)現(xiàn)對(duì) C 語(yǔ)言子集的源程序進(jìn)行詞法分析。通過(guò)輸入源程序從左到右對(duì)字符串進(jìn)行掃描和分解,依次輸出各個(gè)單詞的內(nèi)部編碼及單詞符號(hào)自身值;若遇到錯(cuò)誤則顯示“Error”,然后跳過(guò)錯(cuò)誤部分繼續(xù)顯示 ;同時(shí)進(jìn)行標(biāo)識(shí)符登記符號(hào)表的管理。

三、程序描述

程序達(dá)到的狀態(tài)轉(zhuǎn)換圖如下:


狀態(tài)轉(zhuǎn)換圖
package exp1;

import java.util.*;

import java.io.IOException;
import java.io.StringReader;
import static java.lang.Character.isDigit;
import static java.lang.Character.isLetter;
import static java.lang.Character.isLetterOrDigit;
import static java.lang.Character.isWhitespace;

/**
 * 詞法分析器
 *
 * @version 2020-05-18
 */
public class Tokenizer {
    /** 讀入的字符序列 */
    private final StringReader src;
    /** Tokenizer的狀態(tài)是否已經(jīng)分析過(guò) */
    private boolean hasAnalysised;
    /** 表示當(dāng)前是否到達(dá)結(jié)尾 */
    private boolean eof;
    /** 字符當(dāng)前所處的行數(shù) */
    private int line;
    /** 字符當(dāng)前行所處的位置 */
    private int character;
    /** 當(dāng)前字符在字符序列中的位置,實(shí)際應(yīng)和StringReader的私有屬性next相同 */
    private int index;
    /** 當(dāng)前字符的前一個(gè)字符 */
    private char previous;
    /** 標(biāo)志當(dāng)前是不是已經(jīng)使用了前一個(gè)字符 */
    private boolean usePrevious;
    /** 上一行的長(zhǎng)度 */
    private int characterPreviousLine;
    /** 關(guān)鍵詞表 */
    final String[] keywords = new String[] { "do", "end", "for", "if", "printf", "scanf", "then", "while" };
    /** 關(guān)鍵詞集合,為了快速判斷 */
    final Set<String> keywordsSet = new HashSet<>(Arrays.asList(keywords));
    /** 分隔符起始字符的集合 */
    final Set<Character> delimitersSet = new HashSet<>(Arrays.asList(',', ';', '(', ')', '[', ']'));
    /** 算數(shù)運(yùn)算符起始字符的集合 */
    final Set<Character> operatorsASet = new HashSet<>(Arrays.asList('+', '-', '*', '/'));
    /** 常量列表 */
    final List<Number> constants = new ArrayList<>();
    /** 標(biāo)識(shí)符列表 */
    final Set<String> identifiers = new LinkedHashSet<>();

    /**
     * 詞法分析器的構(gòu)造器
     *
     * @param src 源代碼
     */
    public Tokenizer(String src) {
        this.src = new StringReader(src);
        this.hasAnalysised = false;
        this.line = this.character = 1;
        this.index = this.characterPreviousLine = 0;
        this.previous = 0;
        this.eof = this.usePrevious = false;
    }

    /**
     * 檢查是否已經(jīng)分析過(guò)
     *
     * @return 如果已經(jīng)分析過(guò),返回true
     */
    public boolean hasAnalysised() {
        return hasAnalysised;
    }

    /**
     * 讀取出一個(gè)字符串
     *
     * @return 讀取出的字符串
     * @throws IOException
     */
    private String nextString() throws IOException {
        StringBuilder sb = new StringBuilder();
        char c;
        for (;;) {
            c = this.next();
            /*
             * 以字母開頭不是關(guān)鍵詞就是標(biāo)識(shí)符,該方法讀出一個(gè)符合正規(guī)式/[A-Za-z]([A-Za-z0-9]*)/的字符串
             * 即以字母開頭,字母和數(shù)字匹配零次或多次的字符串
             */
            if (isLetterOrDigit(c)) {
                sb.append(c);
            } else if (c == 0) {
                return sb.toString();
            } else {
                this.back();
                return sb.toString();
            }
        }
    }

    /**
     * 讀取出一個(gè)數(shù)字字符串
     *
     * @return 讀取出的數(shù)字字符串
     * @throws IOException
     */
    private String nextNumberString() throws IOException {
        StringBuilder sb = new StringBuilder();
        char c;
        for (;;) {
            c = this.next();
            /*
             * 以數(shù)字開頭可能為常量,或者為出錯(cuò)的單詞,此方法讀取出一個(gè)符合正規(guī)式/([0-9])([0-9.]*)/,即以0-9開頭,0-9和.匹配零次或多次的字符串
             * 對(duì)于末尾是.的或者是出現(xiàn)多次.的,表示出現(xiàn)錯(cuò)誤,交由numberAnalysis方法處理
             */
            if (isDigit(c)) {
                sb.append(c);
            } else if (c == '.') {
                sb.append(c);
            } else if (c == 0) {
                return sb.toString();
            } else {
                this.back();
                return sb.toString();
            }
        }
    }

    /**
     * 解析出字符串表示的數(shù)字
     *
     * @param numString 數(shù)字字符串
     * @return 數(shù)字字符串代表的值
     */
    private Number parseNumber(String numString) {
        /* 傳進(jìn)來(lái)的字符串,不是標(biāo)準(zhǔn)的整數(shù),就是標(biāo)準(zhǔn)的浮點(diǎn)數(shù),不會(huì)發(fā)生NumberFormatException */
        try {
            return new Integer(numString);
        } catch (NumberFormatException e) {
            return new Double(numString);
        }
    }

    /**
     * 獲取下一個(gè)非空白字符
     *
     * @return 到達(dá)末尾后返回0,下一個(gè)非空白字符
     * @throws IOException
     */
    private char nextClean() throws IOException {
        for (;;) {
            char c = this.next();
            if (c == 0 || !isWhitespace(c)) {
                return c;
            }
        }
    }

    /**
     * 獲取源代碼的下一個(gè)字符
     *
     * @return 到達(dá)末尾后返回0,否則返回源代碼的下一個(gè)字符
     * @throws IOException
     */
    private char next() throws IOException {
        int c;
        /* 如果回退過(guò),返回回退的字符 */
        if (this.usePrevious) {
            this.usePrevious = false;
            c = this.previous;
        } else {
            /* 否則,讀取新的字符 */
            c = this.src.read();
        }
        /* 判斷是不是到達(dá)結(jié)尾 */
        if (c <= 0) { // 到達(dá)末尾
            this.eof = true;
            return 0;
        }
        this.incrementIndexes(c);
        this.previous = (char) c;
        return this.previous;
    }

    /**
     * 增加{@link #index}并處理?yè)Q行,此方法僅應(yīng)當(dāng)在{@link #next()}中調(diào)用
     *
     * @param c 讀入的字符
     */
    private void incrementIndexes(int c) {
        if (c > 0) {
            this.index++;
            /* 因?yàn)檫\(yùn)行系統(tǒng)不同,行尾序列會(huì)有'\n' (*inx, Mac OS 10+), '\r\n' (Windows), '\r' (Mac OS 9-),需做處理*/
            if (c == '\r') {
                this.line++;
                this.characterPreviousLine = this.character;
                this.character = 0;
            } else if (c == '\n') {
                if (this.previous != '\r') {
                    this.line++;
                    this.characterPreviousLine = this.character;
                }
                this.character = 0;
            } else {
                this.character++;
            }
        }
    }

    /**
     * 回退一個(gè)字符,用于實(shí)現(xiàn)超前掃描。
     *
     * @throws IOException 如果已經(jīng)在字符串起始,或者已經(jīng)回退過(guò)一次
     */
    public void back() throws IOException {
        if (this.usePrevious) {
            throw new IOException("回退兩次不允許");
        } else if (this.index <= 0) {
            throw new IOException("已經(jīng)在開頭位置,無(wú)法回退");
        }
        this.decrementIndexes();
        this.usePrevious = true;
        this.eof = false;
    }

    /**
     * 回退{(lán)@link #index}并處理?yè)Q行,此方法僅應(yīng)當(dāng)在{@link #back()}中調(diào)用
     */
    private void decrementIndexes() {
        this.index--;
        if (this.previous == '\r' || this.previous == '\n') {
            this.line--;
            this.character = this.characterPreviousLine;
        } else if (this.character > 0) {
            this.character--;
        }
    }

    /**
     * 進(jìn)行詞法分析,返回分析列表
     *
     * @return 單詞分析結(jié)果的列表
     * @throws IOException
     */
    public List<WordExp> analysis() throws IOException {
        this.hasAnalysised = true;
        List<WordExp> res = new ArrayList<>();
        int c;
        /* c為0表示已經(jīng)分析到結(jié)尾,采用 nextClean 方法略過(guò)空白字符 */
        while ((c = nextClean()) != 0) {
            if (isLetter(c)) {
                this.back();
                this.analysisString(res); /* 處理字母開頭 */
            } else if (isDigit(c)) {
                this.back();
                this.analysisNumber(res); /* 處理數(shù)字開頭 */
            } else {
                this.back();
                this.analysisOther(res); /* 處理其他字符開頭 */
            }
        }
        return res;
    }

    /**
     * 分析以字母開頭的符號(hào)串
     *
     * @param res 分析結(jié)果列表
     * @throws IOException
     */
    private void analysisString(List<WordExp> res) throws IOException {
        /* 讀出一個(gè)以字母開頭,之后含有字母和數(shù)字零次或多次的字符串 */
        String cur = this.nextString();
        /* 如果讀出的字符串為關(guān)鍵詞 */
        if (keywordsSet.contains(cur)) {
            res.add(new WordExp(cur, WordType.KEYWORD, this.line, this.character));
        } else if (identifiers.contains(cur)) {
            /* 如果讀出的字符串已經(jīng)存在在標(biāo)識(shí)符列表中 */
            res.add(new WordExp(cur, WordType.IDENTIFIER, this.line, this.character));
        } else {
            /* 如果不存在在標(biāo)識(shí)符列表中,假如標(biāo)識(shí)符列表 */
            identifiers.add(cur);
            res.add(new WordExp(cur, WordType.IDENTIFIER, this.line, this.character));
        }
    }

    /**
     * 分析以數(shù)字開頭的符號(hào)串
     *
     * @param res 分析結(jié)果列表
     * @throws IOException
     */
    private void analysisNumber(List<WordExp> res) throws IOException {
        /* 讀出一個(gè)以數(shù)字開頭,之后含有數(shù)字和.零次或多次的字符串 */
        String numString = this.nextNumberString();
        /* 超前掃描下一個(gè)字符 */
        char c = this.next();
        if (!isLetter(c)) {
            if (c != 0)
                this.back();
            /* 如果以.結(jié)尾,表示出現(xiàn)了123.這種錯(cuò)誤 */
            if (numString.endsWith(".")) {
                res.add(new WordExp(numString, WordType.ERROR, this.line, this.character));
                return;
            }
            /* 如果字符串含有兩個(gè)及以上的.,說(shuō)明出現(xiàn)了類似123.123.123這種錯(cuò)誤 */
            else if (numString.indexOf('.') != numString.lastIndexOf('.')) {
                res.add(new WordExp(numString, WordType.ERROR, this.line, this.character));
                return;
            }
            /* 否則,是正常的常數(shù)串 */
            constants.add(parseNumber(numString));
            res.add(new WordExp(numString, WordType.CONSTANT, this.line, this.character));
            return;
        }
        /* 如果下一個(gè)字符是字母的話,說(shuō)明會(huì)出現(xiàn)形如123abc這種類似的錯(cuò)誤*/
        this.back();
        /* 讀取出后面的那個(gè)部分字符串 */
        String cur = this.nextString();
        cur = numString + cur;
        res.add(new WordExp(cur, WordType.ERROR, this.line, this.character));
    }

    /**
     * 分析以非字母和數(shù)字開頭的符號(hào)串
     *
     * @param res 分析結(jié)果列表
     * @throws IOException
     */
    private void analysisOther(List<WordExp> res) throws IOException {
        char c = this.next();
        char c2;
        if (c == '/') {
            /* 如果以/開頭,表示可能是注釋,除法符號(hào) */
            c2 = this.next();
            /* 如果下一個(gè)字符是/,表示為注釋,而且注釋到下一個(gè)回車或者結(jié)束,不用記錄 */
            if (c2 == '/') {
                for (;;) {
                    c2 = this.next();
                    if (c2 == '\r' || c2 == '\n' || c2 == '0')
                        return;
                }
                /* 如果下一個(gè)字符是*,表示為注釋,注釋到下一個(gè)*和/,不用記錄 */
            } else if (c2 == '*') {
                StringBuilder con = new StringBuilder("/*");
                for (;;) {
                    c2 = this.next();
                    con.append(c2);
                    /* 一直讀取,直到*和/ */
                    if (c2 == '*') {
                        c2 = this.next();
                        con.append(c2);
                        if (c2 == '/')
                            return;
                        else
                            continue;
                        /* 但是如果未找到匹配的*和/就達(dá)到了結(jié)尾,表示注釋出錯(cuò),要添加記錄 */
                    } else if (c2 == 0) {
                        res.add(new WordExp(con.toString(), WordType.ERROR, this.line, this.character));
                        return;
                    }
                }
                /* 如果不是/和*,表示是除法符號(hào) */
            } else {
                res.add(new WordExp("/", WordType.OPERATOR_A, this.line, this.character));
                return;
            }
            /* 如果讀取到的符號(hào)是分隔符 */
        } else if (delimitersSet.contains(c)) {
            res.add(new WordExp(c + "", WordType.DELIMITER, this.line, this.character));
            return;
        }
        /* 如果讀取到的符號(hào)是算數(shù)運(yùn)算符 */
        else if (operatorsASet.contains(c)) {
            res.add(new WordExp(c + "", WordType.OPERATOR_A, this.line, this.character));
            return;
            /* 如果是邏輯運(yùn)算符,因?yàn)檫壿嬤\(yùn)算符包含<=和>=以及<>,也要超前搜索 */
        } else if (c == '<') {
            c2 = this.next();
            /* 如果是<> */
            if (c2 == '>') {
                res.add(new WordExp("<>", WordType.OPERATOR_L, this.line, this.character));
                return;
                /* 如果是<= */
            } else if (c2 == '=') {
                res.add(new WordExp("<=", WordType.OPERATOR_L, this.line, this.character));
                return;
            } else {
                /* 如果是< */
                this.back();
                res.add(new WordExp(c + "", WordType.OPERATOR_L, this.line, this.character));
                return;
            }
        } else if (c == '>') {
            c2 = this.next();
            /* 如果是>= */
            if (c2 == '=') {
                res.add(new WordExp(">=", WordType.OPERATOR_L, this.line, this.character));
                return;
                /** 如果是> */
            } else {
                this.back();
                res.add(new WordExp(c + "", WordType.OPERATOR_L, this.line, this.character));
                return;
            }
            /* 如果是= */
        } else if (c == '=') {
            res.add(new WordExp(c + "", WordType.OPERATOR_L, this.line, this.character));
            return;
            /* 如果什么也不是,出錯(cuò) */
        } else {
            res.add(new WordExp(c + "", WordType.ERROR, this.line, this.character));
            return;
        }
    }

    public static void main(String[] args) throws IOException {
        Scanner scanner = new Scanner(System.in);
        scanner.useDelimiter("(\r\n\r\n)|(\n\n)|(\r\r)");
        String src = scanner.next();
        Tokenizer tokenizer = new Tokenizer(src);
        List<WordExp> wordExps = tokenizer.analysis();
        // System.out.println(wordExps);
        System.out.printf("%-16s%-16s%-16s%-16s", "單詞", "二元序列", "類 型", "位置(行,列)");
        System.out.println();
        System.out.printf("%-16s(單詞種別,單詞屬性)", "");
        System.out.println();
        for (WordExp word : wordExps) {
            if (word.type != WordType.ERROR) {
                System.out.printf("%-16s%-16s%-16s%-16s", word.word, "(" + word.type.id + ", " + word.word + ")",
                        word.type.type, "(" + word.line + ", " + word.column + ")");
                System.out.println();
            } else {
                System.out.printf("%-16s%-16s%-16s%-16s", word.word, "Error", "Error",
                        "(" + word.line + ", " + word.column + ")");
                System.out.println();
            }
        }
        scanner.close();
    }

    @Override
    public String toString() {
        return " at " + this.index + " [row " + this.line + " column " + this.character + "]";
    }
}

enum WordType {
    /** 關(guān)鍵詞 */
    KEYWORD(1, "關(guān)鍵詞"),
    /** 分隔符 */
    DELIMITER(2, "分隔符"),
    /** 算數(shù)運(yùn)算符 */
    OPERATOR_A(3, "算術(shù)運(yùn)算符"),
    /** 關(guān)系運(yùn)算符 */
    OPERATOR_L(4, "關(guān)系運(yùn)算符"),
    /** 常量 */
    CONSTANT(5, "常量"),
    /** 標(biāo)識(shí)符 */
    IDENTIFIER(6, "標(biāo)識(shí)符"),
    /** 錯(cuò)誤 */
    ERROR(7, "錯(cuò)誤");

    public int id;
    public String type;

    private WordType(int id, String type) {
        this.id = id;
        this.type = type;
    }

    @Override
    public String toString() {
        return this.type;
    }
};

final class WordExp {
    /** 單詞 */
    final String word;
    /** 單詞類型 */
    final WordType type;
    /** 單詞所處的行 */
    final int line;
    /** 單詞所處的列 */
    final int column;

    public WordExp(String word, WordType type, int line, int column) {
        this.word = word;
        this.type = type;
        this.line = line;
        this.column = column;
    }

    @Override
    public String toString() {
        return type.toString() + word + ",出現(xiàn)在 " + " (" + this.line + "," + this.column + ")";
    }
}

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

友情鏈接更多精彩內(nèi)容