語法分析
先實現(xiàn)一個簡單的語法分析,用BNF表示如下:
expression -> equality;
equality -> comparison ( ( "!=" | "==" ) comparsion )*;
comparison -> term ( (">" | ">=" | "<" | "<=") term)*;
term -> factor ( ("-" | "+") factor)*;
factor -> unary (( "/" | "*") unary)*;
unary -> ( "!" | "-") unary | primary;
primary -> NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")";
我們用遞歸下降解析來實現(xiàn),最后用語法樹表示。
參考維基百科中遞歸下降解析器的說明, 遞歸下降是一種自上而下的解析器,由一組相互遞歸的程序(或等價的非遞歸程序)構(gòu)建而成,其中每個程序都實現(xiàn)了文法中的一個非終結(jié)符。因此,這些程序的結(jié)構(gòu)密切反映了它所識別的文法結(jié)構(gòu)。
例如文法
S->cAd
A->ab|a
用下面的方式來解析:
class compilerEngile {
constructor(input) {
// ...
}
compilerS() {
// ...
}
compilerA() {
// ...
}
run() {
this.compilerS(); // Start !!!
}
}
語法解析
語法解析結(jié)果我們用語法樹表示。 通過上述的BNF可以看到,這里存在遞歸引用。我們用Expr類作為基類表示。 其他的都是Expr的子類。如下圖所示:
class Expr {
static class Binary extends Expr {
Binary(Expr left, Token operator, Expr right) {
this.left = left;
this.operator = operator;
this.right = right;
}
final Expr left;
final Token operator;
final Expr right;
}
static class Literal extends Expr {
Literal(Object value) {
this.value = value;
}
final Object value;
}
}
然后我們對詞法解析完的token list進行解析,按照上述BNF解析完后,得到一個以Expr為root節(jié)點的語法樹。其中的節(jié)點是Expr的個子類。
解析
public class Parser {
private List<Token> tokens;
private int position = 0;
public Expr parse(List<Token> tokens) {
this.tokens = tokens;
return expression();
}
private Expr expression() {
return equality();
}
private Expr equality() { // != == 都是左運算符
Expr expr = comparison();
while (match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL)) {
Token operator = previous();
Expr right = comparison();
expr = new Expr.Binary(expr, operator, right);
}
return expr;
}
private Expr comparison() {
Expr expr = term();
while (match(TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.LESS, TokenType.LESS_EQUAL)) {
Token operator = previous();
Expr right = term();
expr = new Expr.Binary(expr, operator, right);
}
return expr;
}
private Expr term() {
Expr expr = factor();
while (match(TokenType.MINUS, TokenType.PLUS)) {
Token operator = previous();
Expr right = factor();
expr = new Expr.Binary(expr, operator, right);
}
return expr;
}
private Expr factor() {
Expr expr = unary();
while (match(TokenType.SLASH, TokenType.STAR)) {
Token operator = previous();
Expr right = unary();
expr = new Expr.Binary(expr, operator, right);
}
return expr;
}
private Expr unary() {
if (match(TokenType.BANG, TokenType.MINUS)) {
Token operator = previous();
Expr right = unary();
return new Expr.Unary(operator, right);
}
return primary();
}
private Expr primary() {
Token cToken = current();
System.out.print(String.format("Current Token %s, position %d", cToken, this.position));
if (match(TokenType.NUMBER)) {
Token token = previous();
return new Expr.Literal(token.value);
}
if (match(TokenType.STRING)) {
Token token = previous();
return new Expr.Literal(token.value);
}
if (match(TokenType.TRUE)) {
return new Expr.Literal(true);
}
if (match(TokenType.FALSE)) {
return new Expr.Literal(false);
}
if (match(TokenType.NIL)) {
return new Expr.Literal(null);
}
if (match(TokenType.LEFT_PAREN)) {
Expr expr = expression();
consume(TokenType.RIGHT_PAREN, "Expect ')' ");
return new Expr.Grouping(expr);
}
throw new Error("Parse error");
}
private boolean match(TokenType ...types) {
Token token = current();
for (TokenType type: types) {
if (token.tokenType == type) {
this.advance();
return true;
}
}
return false;
}
private Token current() {
return this.tokens.get(this.position);
}
private void advance() {
if (!isEnd()) {
this.position ++;
}
}
private Token previous() {
return this.tokens.get(this.position - 1);
}
private void consume(TokenType tokenType, String errmsg) {
if (!match(tokenType)) {
Runner.error(errmsg);
}
}
private boolean isEnd() {
return current().tokenType == TokenType.EOF;
}
}
Expr的定義如下
abstract class Expr {
static class Binary extends Expr {
Binary(Expr left, Token operator, Expr right) {
this.left = left;
this.operator = operator;
this.right = right;
}
final Expr left;
final Token operator;
final Expr right;
}
static class Unary extends Expr {
Unary(Token operator, Expr unary) {
this.operator = operator;
this.unary = unary;
}
final Token operator;
final Expr unary;
}
static class Literal extends Expr {
Literal(Object value) {
this.value = value;
}
final Object value;
}
static class Grouping extends Expr {
Grouping(Expr expr) {
this.expr = expr;
}
final Expr expr;
}
}
這個時候可以開始解析
調(diào)用Parse.parse(tokens),最終會返回一個以Expr為root的語法樹。這里,為了方便查看,我們把語法樹輸出來,這個就涉及到對語法樹的遍歷處理。一般用visitor模式來遍歷處理。 這里用visitor模式,不是因為visitor的名字暗示的這樣,方便查看遍歷,而是對AST的處理,有很多中,比方說,打印,檢查類型, 執(zhí)行等。用visitor模式,可以再不修改Expr類的情況下,只擴展新的操作類就可以。
我們把原來的Expr改成如下所示:
abstract class Expr {
interface Visitor<R> {
R visitBinaryExpr(Binary expr);
R visitUnaryExpr(Unary expr);
R visitLiteralExpr(Literal expr);
R visitGroupingExpr(Grouping expr);
}
abstract <R> R accept(Visitor<R> visitor);
static class Binary extends Expr {
Binary(Expr left, Token operator, Expr right) {
this.left = left;
this.operator = operator;
this.right = right;
}
final Expr left;
final Token operator;
final Expr right;
@Override
<R> R accept(Visitor<R> visitor) {
return visitor.visitBinaryExpr(this);
}
}
static class Unary extends Expr {
Unary(Token operator, Expr unary) {
this.operator = operator;
this.unary = unary;
}
final Token operator;
final Expr unary;
@Override
<R> R accept(Visitor<R> visitor) {
return visitor.visitUnaryExpr(this);
}
}
static class Literal extends Expr {
Literal(Object value) {
this.value = value;
}
final Object value;
@Override
<R> R accept(Visitor<R> visitor) {
return visitor.visitLiteralExpr(this);
}
}
static class Grouping extends Expr {
Grouping(Expr expr) {
this.expr = expr;
}
final Expr expr;
@Override
<R> R accept(Visitor<R> visitor) {
return visitor.visitGroupingExpr(this);
}
}
}
創(chuàng)建ASTprinter類, 按照想要的輸出格式,處理每個節(jié)點
public class AstPrinter implements Expr.Visitor<String>{
String print(Expr expr) {
return expr.accept(this);
}
@Override
public String visitBinaryExpr(Binary expr) {
return parenthesize(expr.operator.name, expr.left, expr.right);
}
@Override
public String visitUnaryExpr(Unary expr) {
return parenthesize(expr.operator.name, expr.unary);
}
@Override
public String visitLiteralExpr(Literal expr) {
if (expr.value == null) return "nil";
return expr.value.toString();
}
@Override
public String visitGroupingExpr(Grouping expr) {
return parenthesize("group", expr.expr);
}
private String parenthesize(String name, Expr ...exprs) {
StringBuilder builder = new StringBuilder();
builder.append("(").append(name);
for (Expr expr: exprs) {
builder.append(" ");
builder.append(expr.accept(this));
}
builder.append(")");
return builder.toString();
}
}
從文件輸入測試語法,試試
String text = readTextFile();
Scanner scanner = new Scanner();
List<Token> tokenlist = scanner.scan(text);
Parser parser = new Parser();
Expr expr = parser.parse(tokenlist);
AstPrinter printer = new AstPrinter();
String printResult = printer.print(expr);
System.out.println("Result " + printResult);
最終輸出
(+ (* 3.0 5.0) 23.0)