オリジナル言語のLLVMフロントエンドをKonohaScriptで実装する その3

 LLVMのチュートリアルページ(http://llvm.org/docs/tutorial/LangImpl1.html)の2章まで追っていきます。ここまではチュートリアルで記述されているものとほぼ一緒です。LLVM IRを作成し、KonohaScriptの関数として実行する際には若干手を加えている部分があります。
 今回は文字列を受け取る所から、ASTを作成する所までです。

  • Lexer

 文字列を標準入力から受け取って、トークンへと分解していきます。
KonohaScriptではSystem.readLine()関数で行単位での文字列の入力を受けとることが出来ます。

  • Parser

 Lexerから受け取ったトークン列を元に、Abstract Syntax Tree (AST, 抽象構文木) を作ります。

ここまでで以下のような動作が確認出来ます。

$ konoha kaleidoscope.k
ready>def foo(x y) x+foo(y, 4.0);
ready>(handleDefinition:330) Parsed a function definition
ready>def foo(x y) x+y;
ready>(handleDefinition:330) Parsed a function definition
ready>y;
ready>(handleTopLevelExpression:339) Parsed a top-level expr

ソースコード

 370行程です。5章まで実装している段階で700行を超えているのですが、こちらに直接書き込むのは既に限界のようです。
GitHubの利用を検討してみます。

 コピペしても動くはずです。

using konoha.lang.*;

static Map<String, int> binopPrecedence = {};

int tok_eof = -1; int tok_def = -2; int tok_extern = -3;
int tok_identifier = -4; int tok_number = -5;

static int numVal = 0;
static String identifierStr = "";

static boolean isspace (String s)
{
    int c = ((Bytes)s[0])[0];
    return (c == ' ');
}

static boolean isalpha (String s)
{
    int c = ((Bytes)s[0])[0];
    return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
}

static boolean isdigit (String s)
{
    int c = ((Bytes)s[0])[0];
    return (c >= '0' && c <= '9');
}

static boolean isalnum (String s)
{
    return (isalpha(s) || isdigit(s));
}

static String str = null;
static int cur = 0;

String getchar ()
{
    String res;
    while (str == null || cur == |str| + 1) {
        str = readLine();
        cur = 0;
    }
    if (cur == |str|) {
        res = " ";
    } else {
        res = str[cur];
    }
    cur++;
    return res;

}
String lastChar = " ";
static int gettok ()
{
    while (isspace(lastChar)) {
        lastChar = getchar();
    }

    if (isalpha(lastChar)) {
        identifierStr = "";
        identifierStr = lastChar;
        while (isalnum((lastChar = getchar()))) {
            identifierStr += lastChar;
        }
        if (identifierStr == "def") return tok_def;
        if (identifierStr == "extern") return tok_extern;
        if (identifierStr == "exit" ||
            identifierStr == "quit" ||
            identifierStr == "bye"  ||
            identifierStr == "q") {
            print "bye";
            return tok_eof;
        }
        return tok_identifier;
    }

    if (isdigit(lastChar) || lastChar == ".") {
        String numStr = "";
        do {
            numStr += lastChar;
            lastChar = getchar();
        } while (isdigit(lastChar) || lastChar == ".");
        numVal = (int)numStr;
        return tok_number;
    }

    if (lastChar == "#") {
        do {
            lastChar = getchar();
        } while (lastChar != "\n");
        return gettok();
    }

    String thisChar = lastChar;
    lastChar = getchar();
    return ((Bytes)thisChar[0])[0];
}

int curTok;

static int getNextToken () {
    curTok = gettok ();
    return curTok;
}


class ExprAST {
    ExprAST () {}
}

class NumberExprAST extends ExprAST {
    int val;
    NumberExprAST (int val) {
        this.val = val;
    }
}

ExprAST error (String str) {
    print "Error: ", str;
    ExprAST res = null;
    return res;
}

PrototypeAST errorP (String str) { 
    error (str); 
    PrototypeAST res = null;
    return res;
}
FunctionAST errorF (String str) {
    error (str);
    FunctionAST res = null;
    return res;
}

class VariableExprAST extends ExprAST {
    String name;
    VariableExprAST (String name) {
        this.name = name;
    }
}

class BinaryExprAST extends ExprAST {
    int op;
    ExprAST lhs, rhs;
    BinaryExprAST (int op, lhs, rhs) {
        this.lhs = lhs;
        this.rhs = rhs;
        this.op = op;
    }
}

class CallExprAST extends ExprAST {
    String callee;
    ExprAST[] args;
    CallExprAST (String callee, ExprAST[] args) {
        this.callee = callee;
        this.args = args;
    }
}

class PrototypeAST {
    String name;
    String[] args;
    PrototypeAST (String name, String[] args) {
        this.name = name;
        this.args = args;
    }
    Function codeGen (LLVMGenerator gen) {
    }
}

class FunctionAST {
    PrototypeAST proto;
    ExprAST body;
    FunctionAST (PrototypeAST proto, ExprAST body) {
        this.proto = proto;
        this.body = body;
    }
    Function codeGen (LLVMGenerator gen) {
    }
}


static ExprAST parsePrimary ();
static ExprAST parseBinOpRHS (int exprPrec, ExprAST lhs);

static ExprAST parseExpression () {
    ExprAST lhs = parsePrimary ();
    if (lhs == null) return null;
    return parseBinOpRHS (0, lhs);
}

static ExprAST parseParenExpr () {
    getNextToken();
    ExprAST res = ParseExpression();
    if (res == null) return null;
    if (curTok != ')') return error ("expected ')'");
    getNextToken();
    return res;
}

static ExprAST parseNumberExpr () {
    getNextToken ();
    ExprAST res = new NumberExprAST (numVal);
    return res;
}

static ExprAST parseIdentifierExpr () {
    String idName = identifierStr;
    getNextToken ();
    if (curTok != '(') {
        return new VariableExprAST (idName);
    }
    getNextToken ();
    ExprAST[] args = [];
    if (curTok != ')') {
        while (true) {
            ExprAST arg = ParseExpression ();
            if (arg == null) return null;
            args.add (arg);
            if (curTok == ')') break;
            if (curTok != ',') {
                print (char)curTok;
                return error("Expected ')' or ',' in argument list");
            }
            getNextToken ();
        }
    }
    getNextToken ();
    return new CallExprAST (idName, args);
}

static ExprAST parsePrimary () {
    if (curTok == tok_identifier) {
        return parseIdentifierExpr ();
    } else if (curTok == tok_number) {
        return parseNumberExpr ();
    } else if (curTok == '(') {
        return parseParenExpr ();
    } else {
        return error ("unknown token when expecting an expression");
    }
}

static int getTokPrecedence () {
    int tokPrec = binopPrecedence[(String)curTok];
    if (tokPrec <= 0) return -1;
    return tokPrec;
}

static ExprAST parseBinOpRHS (int exprPrec, ExprAST lhs) {
    while (true) {
        int tokPrec = getTokPrecedence ();
        if (tokPrec < exprPrec) {
            return lhs;
        }
        int binOp = curTok;
        getNextToken ();
        ExprAST rhs = parsePrimary ();
        if (rhs == null) return null;
        int nextPrec = getTokPrecedence ();
        if (tokPrec < nextPrec) {
            rhs = parseBinOpRHS (tokPrec + 1, rhs);
            if (rhs == null) return null;
        }
        lhs = new BinaryExprAST (binOp, lhs, rhs);
    }
    print "error";
    return null;
}

static PrototypeAST parsePrototype () {
    if (curTok != tok_identifier) {
        return errorP ("Expected function name in prototype");
    }
    String fnName = identifierStr;
    getNextToken ();
    if (curTok != 40) {
        return errorP("Expected '(' in prototype");
    }
    String[] argNames = [];
    while (getnextToken () == tok_identifier) {
        argNames.add (identifierStr);
    }
    if (curTok != 41) {
        return errorP ("Expected ')' in prototype");
    }
    getNextToken ();
    return new PrototypeAST (fnName, argNames);
}

static FunctionAST parseDefinition () {
    getNextToken ();
    PrototypeAST proto = parsePrototype ();
    if (proto == null) return null;
    ExprAST e = ParseExpression ();
    if (e != null) {
        return new FuctionAST (proto, e);
    }
    return null;
}

static PrototypeAST parseExtern () {
    getNextToken ();
    return parsePrototype ();
}

static ExprAST parseTopLevelExpr () {
    ExprAST e = ParseExpression ();
    if (e != null) {
        return e;
    }
    return null;
}

static void handleDefinition () {
    FunctionAST f = parseDefinition ();
    if (f != null) {
            print "Parsed a function definition";
    } else {
        getNextToken ();
    }
}

static void handleTopLevelExpression () {
    ExprAST f = parseTopLevelExpr ();
    if (f != null) {
        print "Parsed a top-level expr";
    } else {
        getNextToken ();
    }
}

static void handleExtern () {
    PrototypeAST p = parseExtern ();
    if (p != null) {
        print "Parsed an extern";
    } else {
        getNextToken ();
    }
}


static void mainLoop () {
    while (true) {
        System.out.print("ready>");
        OUT.flush ();
        switch (curTok) {
            case ';': { getNextToken (); break;}
            case -2:  { handleDefinition (); break;}
            case -3:  { handleExtern (); break;}
            case -1:  return;
            default:  { handleTopLevelExpression (); break;}
        }
    }
}

void main (String[] args)
{
    binopPrecedence["60"] = 10; // <
    binopPrecedence["62"] = 10; // >
    binopPrecedence["43"] = 20; // +
    binopPrecedence["45"] = 20; // -
    binopPrecedence["42"] = 40; // *
    OUT << "ready>";
    OUT.flush ();
    getNextToken ();
    MainLoop ();
    return;
}