var typ = require("./representation.js"); var tool = require("./tools.js"); // Tokenization var left_paren = /^\(/; var right_paren = /^\)/; var left_brace = /^\{/; var right_brace = /^\}/; var def = /^def/; var left_square = /^\[/; var right_square = /^\]/; var comma = /^,/; var truelit = /^true/; var falselit = /^false/; var stringlit = /^\"[^\"]*\"/; var number = /^(\+|-)?\d+(\.\d+)?/; var ifexp = /^if/; var thenexp = /^then/; var elsexp = /^else/; var identifier = /^[^\s\.\(\)\{\}\[\]\""]+/; var lambda = /^lambda/; var arrow = /^->/; function tokmatch(t) { var ttype; var m; if (m = t.match(left_paren)) ttype = "left_paren"; else if (m = t.match(right_paren)) ttype = "right_paren"; else if (m = t.match(left_brace)) ttype = "left_brace"; else if (m = t.match(right_brace)) ttype = "right_brace"; else if (m = t.match(left_square)) ttype = "left_square"; else if (m = t.match(right_square)) ttype = "right_square"; else if (m = t.match(def)) ttype = "def"; else if (m = t.match(lambda)) ttype = "lambda"; else if (m = t.match(arrow)) ttype = "arrow"; else if (m = t.match(comma)) ttype = "comma"; else if (m = t.match(truelit)) ttype = "truelit"; else if (m = t.match(falselit)) ttype = "falselit"; else if (m = t.match(stringlit)) ttype = "stringlit"; else if (m = t.match(number)) if (m[0].indexOf(".") !== -1) { ttype = "float"; return [[ttype, m[0]], m.input.slice(m[0].length)]; } else { ttype = "integer"; return [[ttype, m[0]], m.input.slice(m[0].length)]; } else if (m = t.match(ifexp)) ttype = "ifexp"; else if (m = t.match(thenexp)) ttype = "thenexp"; else if (m = t.match(elsexp)) ttype = "elsexp"; else if (m = t.match(identifier)) ttype = "identifier"; else { console.log("Error: unmatched string: " + t); return; } return [[ttype, m[0]], m.input.slice(m[0].length)]; } function tokenize(exp) { var current, next; var tokens = []; while (exp != '') { if (exp[0].match(/\s/)) { exp = exp.slice(1); // skip whitespace } else { current = tokmatch(exp); if (!current) break; exp = current[1]; tokens.push(current[0]); } } return tokens; } function fst(ts) { return ts[ts.length-1]; } function snd(ts) { return ts[ts.length-2]; } //Checks if the next token is not followed by any of ``checks'' function notFollowedBy(tokens, checks) { var nextT = fst(tokens)[0]; if (!snd(tokens)) console.log("Error: "+ fst(tokens)[0] +" must be followed by something"); else if (checks.some(function (x) {return x === nextT;})) return false; else return true; } //returns a function that takes a parameter and //checks if it is in the array ``props'' function makeChecker(props) { return function(x) { return x && props.some(function (y) {return y === x;}); }; } /*Tries to parse until the prediction ``valid'' fails or the wrong type is parsed Collects the results into an array and returns it*/ function parseMany(exprType, valid, tokens) { var current = fst(tokens)[0]; var results = []; var parsed; if (valid(fst(tokens)[0])) { parsed = parse(tokens); //console.log(parsed.exprType); } else { console.log("Error: unexpected token "+fst(tokens)); return; } results.push(parsed); //make sure there are at least 2 tokens to parse if (tokens.length > 1 && valid(fst(tokens)[0])) { while (valid(snd(tokens)[0])) { results.push(parse(tokens)); //console.log(results); if (!exprType(fst(results).exprType)) break; //console.log(results); current = fst(tokens)[0] if (tokens.length <= 1) break; } } //do the same validity check as before and in the loop if (valid(fst(tokens)[0])) results.push(parse(tokens)); //console.log(tokens); return results; } function parseDef(tokens) { if (notFollowedBy(tokens, ["identifier"])) { console.log("Error: def must be followed by identifier, not "+fst(tokens)[0]); return undefined; } else { return new typ.Def(parse(tokens), parse(tokens)); } } function parseIf(tokens) { if (!notFollowedBy(tokens, ["def","comma","lambda"])) { console.log("Error: ``if'' cannot be followed by "+fst(tokens)[0]) return; } else { var ifC = parse(tokens); if (!fst(tokens) || fst(tokens)[0] !== "thenexp") console.log("Error: if must be folowed by exp, not "+snd(tokens)[0]); else { tokens.pop(); var thenC = parse(tokens); if (fst(tokens) && fst(tokens)[0] === "elsexp") { tokens.pop(); var elseC = parse(tokens); return new typ.If(ifC, thenC, elseC); } else { return new typ.If(ifC, thenC); } } } } var validFormPar = makeChecker(["identifier"]); var validName = makeChecker(["Name"]); function parseLambda(tokens) { var parameters = parseMany(validName,validFormPar, tokens); if (fst(tokens)[0] !== "arrow") { console.log("Error: arrow must follow parameters in lambda, not "+fst(tokens)[0]) return; } tokens.pop() var body = parse(tokens); return new typ.FuncT(parameters, body); } var invalidArguments = ["def", "comma", "right_paren", "right_square", "right_brace", "left_brace", "right_brace"]; var validArgument = tool.compose(tool.not, makeChecker(invalidArguments)); var validArgTypes = tool.compose(tool.not, makeChecker(["Definition"])); var validOperator = makeChecker(["identifier"]); function parse(tokens) { if (fst(tokens)) var toktype = fst(tokens)[0]; else { console.log("Unexpected end of source") process.exit(code=1); } var token = fst(tokens)[1]; tokens.pop(); if (toktype === "stringlit") return new typ.StrT(token); else if (toktype === "lambda") return checkParse(parseLambda(tokens)); else if (toktype === "integer") return new typ.IntT(token); else if (toktype === "float") return new typ.FloatT(token); else if (toktype === "identifier") return new typ.Name(token); else if (toktype === "truelit" || toktype === "falselit") return new typ.BoolT(token); else if (toktype === "def") return checkParse(parseDef(tokens)); else if (toktype === "ifexp") return checkParse(parseIf(tokens)); else if (toktype === "left_paren") return computeApp(tokens); else { console.log("Unexpected token: " + toktype); process.exit(code=1); } } function checkParse(p) { if (p === undefined) { console.log("Quitting, could not finish parsing!"); process.exit(code=1); } else return p; } //Parses function application (either infix or prefix) function computeApp(tokens) { var lhs = parse(tokens); //console.log(lhs); if (fst(tokens)) var next = fst(tokens); else { console.log("Unexpected end of source"); process.exit(code=1); } if (typ.OPInfo[next[1]]) { //it's an infix expression var result = parseInfix(tokens, 1, lhs); if (fst(tokens)[0] !== "right_paren") { console.log("Error: mismatched parentheses"); process.exit(code=1); } else { //return the result tokens.pop(); return result; } } else { //it's a prefix application var parameters = parseMany(validArgTypes, validArgument, tokens); //console.log(parameters); if (fst(tokens)[0] !== "right_paren") { console.log("Error: mismatched parentheses"); process.exit(code=1); } else { //return the result tokens.pop(); return typ.makeApp(lhs, parameters); } } } /*Parses infix expressions by precedence climbing See this for more info and an implementation in python http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/ */ function parseInfix(tokens, minPrec, lhs) { if (!lhs) { var lhs = parse(tokens); } while (true) { var cur = fst(tokens); if (!cur) { console.log("Unexpected end of source") process.exit(code=1); } var opinfo = typ.OPInfo[cur[1]]; if (!opinfo || opinfo[0] < minPrec) break; var op = new typ.Name(cur[1]); var prec = opinfo[0]; var assoc = opinfo[1]; var nextMinPrec = assoc === "Left" ? prec + 1 : prec; tokens.pop(); //remove the operator token var rhs = parseInfix(tokens, nextMinPrec); lhs = typ.makeApp(op, [lhs, rhs]); } return lhs; } function pprintName(ident) { return pprint(ident.val); } function pprintFunc(func) { if (func.p.exprType === "Name") return "\\ " + pprint(func.p) + " -> " + pprint(func.body); else return "\\ " + func.p.map(pprint).join(" ") + " -> " + pprint(func.body); } function pprintApp(app) { if (!app.p || app.p === undefined) return "(" + pprint(app.func) + ")"; return "((" + pprint(app.func) + ") " + pprint(app.p) + ")"; } function pprintDef(def) { return "let " + pprint(def.ident) + " = " + pprint(def.val); } function pprintIf(ifexp) { if (ifexp.elseexp) return "if " + pprint(ifexp.condition) + " then " + pprint(ifexp.thenexp) + " else " + pprint(ifexp.elseexp); else return "if " + pprint(ifexp.condition) + " then " + pprint(ifexp.thenexp); } function pprint(expr) { if (expr.exprType === "Name") return expr.val; else if (expr.exprType === "Bool") if (expr.val) return "True"; else return "False"; else if (expr.exprType === "Integer") return expr.val; else if (expr.exprType === "Float") return expr.val; else if (expr.exprType === "String") return expr.val; else if (expr.exprType === "Name") return expr.val; else if (expr.exprType === "Application") return pprintApp(expr); else if (expr.exprType === "Definition") return pprintDef(expr); else if (expr.exprType === "If") return pprintIf(expr); else if (expr.exprType === "Function") return pprintFunc(expr); } var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, ""); var tokenized = tokenize(input).reverse(); //parse(tokenized); //console.log(parse(tokenized)) console.log(pprint(parse(tokenized))); //console.log(tokenized);