オリジナル言語のLLVMフロントエンドをKonohaScriptで実装する その3
LLVMのチュートリアルページ(http://llvm.org/docs/tutorial/LangImpl1.html)の2章まで追っていきます。ここまではチュートリアルで記述されているものとほぼ一緒です。LLVM IRを作成し、KonohaScriptの関数として実行する際には若干手を加えている部分があります。
今回は文字列を受け取る所から、ASTを作成する所までです。
- Lexer
文字列を標準入力から受け取って、トークンへと分解していきます。
KonohaScriptではSystem.readLine()関数で行単位での文字列の入力を受けとることが出来ます。
- Parser
Lexerから受け取ったトークン列を元に、Abstract Syntax Tree (AST, 抽象構文木) を作ります。
ここまでで以下のような動作が確認出来ます。
$ konoha kaleidoscope.k
ready>def foo(x y) x+foo(y, 4.0);
ready>(handleDefinition:330) Parsed a function definition
ready>def foo(x y) x+y;
ready>(handleDefinition:330) Parsed a function definition
ready>y;
ready>(handleTopLevelExpression:339) Parsed a top-level expr
ソースコード
370行程です。5章まで実装している段階で700行を超えているのですが、こちらに直接書き込むのは既に限界のようです。
GitHubの利用を検討してみます。
コピペしても動くはずです。
using konoha.lang.*; static Map<String, int> binopPrecedence = {}; int tok_eof = -1; int tok_def = -2; int tok_extern = -3; int tok_identifier = -4; int tok_number = -5; static int numVal = 0; static String identifierStr = ""; static boolean isspace (String s) { int c = ((Bytes)s[0])[0]; return (c == ' '); } static boolean isalpha (String s) { int c = ((Bytes)s[0])[0]; return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } static boolean isdigit (String s) { int c = ((Bytes)s[0])[0]; return (c >= '0' && c <= '9'); } static boolean isalnum (String s) { return (isalpha(s) || isdigit(s)); } static String str = null; static int cur = 0; String getchar () { String res; while (str == null || cur == |str| + 1) { str = readLine(); cur = 0; } if (cur == |str|) { res = " "; } else { res = str[cur]; } cur++; return res; } String lastChar = " "; static int gettok () { while (isspace(lastChar)) { lastChar = getchar(); } if (isalpha(lastChar)) { identifierStr = ""; identifierStr = lastChar; while (isalnum((lastChar = getchar()))) { identifierStr += lastChar; } if (identifierStr == "def") return tok_def; if (identifierStr == "extern") return tok_extern; if (identifierStr == "exit" || identifierStr == "quit" || identifierStr == "bye" || identifierStr == "q") { print "bye"; return tok_eof; } return tok_identifier; } if (isdigit(lastChar) || lastChar == ".") { String numStr = ""; do { numStr += lastChar; lastChar = getchar(); } while (isdigit(lastChar) || lastChar == "."); numVal = (int)numStr; return tok_number; } if (lastChar == "#") { do { lastChar = getchar(); } while (lastChar != "\n"); return gettok(); } String thisChar = lastChar; lastChar = getchar(); return ((Bytes)thisChar[0])[0]; } int curTok; static int getNextToken () { curTok = gettok (); return curTok; } class ExprAST { ExprAST () {} } class NumberExprAST extends ExprAST { int val; NumberExprAST (int val) { this.val = val; } } ExprAST error (String str) { print "Error: ", str; ExprAST res = null; return res; } PrototypeAST errorP (String str) { error (str); PrototypeAST res = null; return res; } FunctionAST errorF (String str) { error (str); FunctionAST res = null; return res; } class VariableExprAST extends ExprAST { String name; VariableExprAST (String name) { this.name = name; } } class BinaryExprAST extends ExprAST { int op; ExprAST lhs, rhs; BinaryExprAST (int op, lhs, rhs) { this.lhs = lhs; this.rhs = rhs; this.op = op; } } class CallExprAST extends ExprAST { String callee; ExprAST[] args; CallExprAST (String callee, ExprAST[] args) { this.callee = callee; this.args = args; } } class PrototypeAST { String name; String[] args; PrototypeAST (String name, String[] args) { this.name = name; this.args = args; } Function codeGen (LLVMGenerator gen) { } } class FunctionAST { PrototypeAST proto; ExprAST body; FunctionAST (PrototypeAST proto, ExprAST body) { this.proto = proto; this.body = body; } Function codeGen (LLVMGenerator gen) { } } static ExprAST parsePrimary (); static ExprAST parseBinOpRHS (int exprPrec, ExprAST lhs); static ExprAST parseExpression () { ExprAST lhs = parsePrimary (); if (lhs == null) return null; return parseBinOpRHS (0, lhs); } static ExprAST parseParenExpr () { getNextToken(); ExprAST res = ParseExpression(); if (res == null) return null; if (curTok != ')') return error ("expected ')'"); getNextToken(); return res; } static ExprAST parseNumberExpr () { getNextToken (); ExprAST res = new NumberExprAST (numVal); return res; } static ExprAST parseIdentifierExpr () { String idName = identifierStr; getNextToken (); if (curTok != '(') { return new VariableExprAST (idName); } getNextToken (); ExprAST[] args = []; if (curTok != ')') { while (true) { ExprAST arg = ParseExpression (); if (arg == null) return null; args.add (arg); if (curTok == ')') break; if (curTok != ',') { print (char)curTok; return error("Expected ')' or ',' in argument list"); } getNextToken (); } } getNextToken (); return new CallExprAST (idName, args); } static ExprAST parsePrimary () { if (curTok == tok_identifier) { return parseIdentifierExpr (); } else if (curTok == tok_number) { return parseNumberExpr (); } else if (curTok == '(') { return parseParenExpr (); } else { return error ("unknown token when expecting an expression"); } } static int getTokPrecedence () { int tokPrec = binopPrecedence[(String)curTok]; if (tokPrec <= 0) return -1; return tokPrec; } static ExprAST parseBinOpRHS (int exprPrec, ExprAST lhs) { while (true) { int tokPrec = getTokPrecedence (); if (tokPrec < exprPrec) { return lhs; } int binOp = curTok; getNextToken (); ExprAST rhs = parsePrimary (); if (rhs == null) return null; int nextPrec = getTokPrecedence (); if (tokPrec < nextPrec) { rhs = parseBinOpRHS (tokPrec + 1, rhs); if (rhs == null) return null; } lhs = new BinaryExprAST (binOp, lhs, rhs); } print "error"; return null; } static PrototypeAST parsePrototype () { if (curTok != tok_identifier) { return errorP ("Expected function name in prototype"); } String fnName = identifierStr; getNextToken (); if (curTok != 40) { return errorP("Expected '(' in prototype"); } String[] argNames = []; while (getnextToken () == tok_identifier) { argNames.add (identifierStr); } if (curTok != 41) { return errorP ("Expected ')' in prototype"); } getNextToken (); return new PrototypeAST (fnName, argNames); } static FunctionAST parseDefinition () { getNextToken (); PrototypeAST proto = parsePrototype (); if (proto == null) return null; ExprAST e = ParseExpression (); if (e != null) { return new FuctionAST (proto, e); } return null; } static PrototypeAST parseExtern () { getNextToken (); return parsePrototype (); } static ExprAST parseTopLevelExpr () { ExprAST e = ParseExpression (); if (e != null) { return e; } return null; } static void handleDefinition () { FunctionAST f = parseDefinition (); if (f != null) { print "Parsed a function definition"; } else { getNextToken (); } } static void handleTopLevelExpression () { ExprAST f = parseTopLevelExpr (); if (f != null) { print "Parsed a top-level expr"; } else { getNextToken (); } } static void handleExtern () { PrototypeAST p = parseExtern (); if (p != null) { print "Parsed an extern"; } else { getNextToken (); } } static void mainLoop () { while (true) { System.out.print("ready>"); OUT.flush (); switch (curTok) { case ';': { getNextToken (); break;} case -2: { handleDefinition (); break;} case -3: { handleExtern (); break;} case -1: return; default: { handleTopLevelExpression (); break;} } } } void main (String[] args) { binopPrecedence["60"] = 10; // < binopPrecedence["62"] = 10; // > binopPrecedence["43"] = 20; // + binopPrecedence["45"] = 20; // - binopPrecedence["42"] = 40; // * OUT << "ready>"; OUT.flush (); getNextToken (); MainLoop (); return; }