commit 058719a884dd4dd7eb1e5c55ecb970657604e58d Author: Wesley Kerfoot Date: Tue Jun 18 12:06:13 2013 -0400 adding stuff diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/parse.js b/parse.js new file mode 100644 index 0000000..1597f85 --- /dev/null +++ b/parse.js @@ -0,0 +1,337 @@ +var typ = require("./representation.js"); +var tool = require("./tools.js"); + +// Tokenization + +var left_paren = /^\(/; +var right_paren = /^\)/; + +var left_brace = /^\{/; +var right_brace = /^\}/; + +var def = /^def/; + +var left_square = /^\[/; +var right_square = /^\]/; +var comma = /^,/; + +var truelit = /^true/; +var falselit = /^false/; + +var stringlit = /^\"[^\"]*\"/; + +var number = /^(\+|-)?\d+(\.\d+)?/; + +var ifexp = /^if/; +var thenexp = /^then/; +var elsexp = /^else/; + +var identifier = /^[^\s\.\(\)\{\}\[\]\""]+/; + +var lambda = /^lambda/; + +var arrow = /^->/; + +function tokmatch(t) { + var ttype; + var m; + if (m = t.match(left_paren)) + ttype = "left_paren"; + else if (m = t.match(right_paren)) + ttype = "right_paren"; + else if (m = t.match(left_brace)) + ttype = "left_brace"; + else if (m = t.match(right_brace)) + ttype = "right_brace"; + else if (m = t.match(left_square)) + ttype = "left_square"; + else if (m = t.match(right_square)) + ttype = "right_square"; + else if (m = t.match(def)) + ttype = "def"; + else if (m = t.match(lambda)) + ttype = "lambda"; + else if (m = t.match(arrow)) + ttype = "arrow"; + else if (m = t.match(comma)) + ttype = "comma"; + else if (m = t.match(truelit)) + ttype = "truelit"; + else if (m = t.match(falselit)) + ttype = "falselit"; + else if (m = t.match(stringlit)) + ttype = "stringlit"; + else if (m = t.match(number)) + if (m[0].indexOf(".") !== -1) { + ttype = "float"; + return [[ttype, m[0]], m.input.slice(m[0].length)]; + } + else { + ttype = "integer"; + return [[ttype, m[0]], m.input.slice(m[0].length)]; + } + else if (m = t.match(ifexp)) + ttype = "ifexp"; + else if (m = t.match(thenexp)) + ttype = "thenexp"; + else if (m = t.match(elsexp)) + ttype = "elsexp"; + else if (m = t.match(identifier)) + ttype = "identifier"; + else { + console.log("Error: unmatched string: " + t); + return; + } + return [[ttype, m[0]], m.input.slice(m[0].length)]; +} + +function tokenize(exp) { + var current, next; + var tokens = []; + while (exp != '') { + if (exp[0].match(/\s/)) { + exp = exp.slice(1); + // skip whitespace + } + else { + current = tokmatch(exp); + if (!current) + break; + exp = current[1]; + tokens.push(current[0]); + } + } + return tokens; +} + +function fst(ts) { + return ts[ts.length-1]; +} + +function snd(ts) { + return ts[ts.length-2]; +} + +//Checks if the next token is not followed by any of ``checks'' +function notFollowedBy(tokens, checks) { + var nextT = fst(tokens)[0]; + if (!snd(tokens)) + console.log("Error: "+ fst(tokens)[0] +" must be followed by something"); + else if (checks.some(function (x) {return x === nextT;})) + return false; + else + return true; +} + +//returns a function that takes a parameter and +//checks if it is in the array ``props'' +function makeChecker(props) { + return function(x) { + return x && props.some(function (y) {return y === x;}); + }; +} + +/*Tries to parse until the prediction ``valid'' fails or the wrong type is parsed + Collects the results into an array and returns it*/ +function parseMany(exprType, valid, tokens) { + var current = fst(tokens)[0]; + var results = []; + var parsed; + + if (valid(fst(tokens)[0])) { + parsed = parse(tokens); + //console.log(parsed.exprType); + } + else { + console.log("Error: unexpected token "+fst(tokens)); + return; + } + results.push(parsed); + + //make sure there are at least 2 tokens to parse + if (tokens.length > 1 && valid(fst(tokens)[0])) { + while (valid(snd(tokens)[0])) { + results.push(parse(tokens)); + //console.log(results); + if (!exprType(fst(results).exprType)) + break; + //console.log(results); + current = fst(tokens)[0] + if (tokens.length <= 1) + break; + } + } + //do the same validity check as before and in the loop + if (valid(fst(tokens)[0])) + results.push(parse(tokens)); + return results; +} + +function parseDef(tokens) { + if (notFollowedBy(tokens, ["identifier"])) { + console.log("Error: def must be followed by identifier, not "+fst(tokens)[0]); + return undefined; + } + else { + return new typ.Def(parse(tokens), parse(tokens)); + } + } + + +function parseIf(tokens) { + if (!notFollowedBy(tokens, ["def","comma","lambda"])) { + console.log("Error: ``if'' cannot be followed by "+fst(tokens)[0]) + return; + } + else { + var ifC = parse(tokens); + if (!fst(tokens) || fst(tokens)[0] !== "thenexp") + console.log("Error: if must be folowed by exp, not "+snd(tokens)[0]); + else { + tokens.pop(); + var thenC = parse(tokens); + + if (fst(tokens) && fst(tokens)[0] === "elsexp") { + tokens.pop(); + var elseC = parse(tokens); + return new typ.If(ifC, thenC, elseC); + + } + else { + return new typ.If(ifC, thenC); + } + } + } +} + + +var validFormPar = makeChecker(["identifier"]); +var validName = makeChecker(["Name"]); + +function parseLambda(tokens) { + var parameters = parseMany(validName,validFormPar, tokens); + + if (fst(tokens)[0] !== "arrow") { + console.log("Error: arrow must follow parameters in lambda, not "+fst(tokens)[0]) + return; + } + tokens.pop() + var body = parse(tokens); + return new typ.FuncT(parameters, body); +} + +var invalidArguments = ["def", "comma", "right_paren", "right_square", "right_brace", "left_brace", "right_brace"]; +var validArgument = tool.compose(tool.not, makeChecker(invalidArguments)); +var validArgTypes = tool.compose(tool.not, makeChecker(["Definition"])); +var validOperator = makeChecker(["identifier"]); + +function parse(tokens) { + if (fst(tokens)) + var toktype = fst(tokens)[0]; + else { + console.log("Unexpected end of source") + process.exit(code=1); + } + var token = fst(tokens)[1]; + tokens.pop(); + if (toktype === "stringlit") + return new typ.StrT(token); + else if (toktype === "lambda") + return checkParse(parseLambda(tokens)); + else if (toktype === "integer") + return new typ.IntT(token); + else if (toktype === "float") + return new typ.FloatT(token); + else if (toktype === "identifier") + return new typ.Name(token); + else if (toktype === "truelit" || toktype === "falselit") + return new typ.BoolT(token); + else if (toktype === "def") + return checkParse(parseDef(tokens)); + else if (toktype === "ifexp") + return checkParse(parseIf(tokens)); + else if (toktype === "left_paren") + return computeApp(tokens); +} + +function checkParse(p) { + if (p === undefined) { + console.log("Quitting, could not finish parsing!"); + process.exit(code=1); + } + else + return p; +} + +//Parses function application (either infix or prefix) +function computeApp(tokens) { + var lhs = parse(tokens); + if (fst(tokens)) + var next = fst(tokens); + else { + console.log("Unexpected end of source"); + process.exit(code=1); + } + if (typ.OPInfo[next[1]]) { + //it's an infix expression + var result = parseInfix(tokens, 1, lhs); + if (fst(tokens)[0] !== "right_paren") { + console.log("Error: mismatched parentheses"); + process.exit(code=1); + } + else { + //return the result + return result; + } + } + else { + //it's a prefix application + var parameters = parseMany(validArgTypes, validArgument, tokens); + if (fst(tokens)[0] !== "right_paren") { + console.log("Error: mismatched parentheses"); + process.exit(code=1); + } + else { + //return the result + return typ.makeApp(lhs, parameters); + } + } +} + +//Parses infix expressions by precedence climbing +function parseInfix(tokens, minPrec, lhs) { + if (!lhs) { + var lhs = parse(tokens); + } + while (true) { + var cur = fst(tokens); + if (!cur) { + console.log("Unexpected end of source") + process.exit(code=1); + } + var opinfo = typ.OPInfo[cur[1]]; + + if (!opinfo || opinfo[0] < minPrec) + break; + + var op = cur[1]; + var prec = opinfo[0]; + var assoc = opinfo[1]; + var nextMinPrec = assoc === "Left" ? prec + 1 : prec; + tokens.pop(); + //remove the operator token + var rhs = parseInfix(tokens, nextMinPrec); + lhs = typ.makeApp(op, [lhs, rhs]); + + + } + return lhs; +} + + +var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, ""); +var wat = tokenize(input).reverse(); +//console.log(tool.pprint(parse(wat))); +console.log(parse(wat)); +//console.log(wat); +//parse(wat); \ No newline at end of file diff --git a/representation.js b/representation.js new file mode 100644 index 0000000..86156a4 --- /dev/null +++ b/representation.js @@ -0,0 +1,162 @@ +var Expression = { + display : + function() { + return this.exprType + " " + this.val; + }, + type : + function () { + return this.exprType; + }, + unify : + function (t) { + if (this.exprType === t.exprType) { + return t.exprType; + } + else { + console.log("Could not unify " + this.exprType + " with " + t.exprType); + } + } +}; + + +function IntT(v) { + this.exprType = "Integer"; + this.val = parseInt(v, 10); + return this; +} +IntT.prototype = Expression; + +function FloatT(v) { + this.exprType = "Float"; + this.val = parseFloat(v, 10); + return this; +} + +FloatT.prototype = Expression; + +function StrT(v) { + this.exprType = "String"; + this.val = v; + return this; +} + +StrT.prototype = Expression; + +function BoolT(b) { + if (b === "true") { + this.val = true; + } + else { + this.val = false; + } + this.exprType = "Bool"; + return this; +} + +BoolT.prototype = Expression; + +function ListT(x, xs) { + this.x = x; + this.rest = xs; + this.val = [x,xs]; + this.exprType = "List"; + return this; +} + +ListT.prototype = Expression; + +function FuncT(p, body) { + this.p = p; + this.body = body; + this.val = [p, body]; + this.exprType = "Function"; + return this; +} + +FuncT.prototype = Expression; + +//Wrapper for function objects +function OpT(operator) { + this.op = operator; + this.val = this.op; + this.exprType = "Function"; + return this; +} + +OpT.prototype = Expression; + +// Applications separate from other types +function App(func, p) { + this.func = func; + this.exprType = "Application"; + if (p) + this.p = p; + return this; +} + +// Names are not types +function Name(identifier) { + this.ident = identifier; + this.val = this.ident; + this.exprType = "Name"; + return this; +} + +function Def(ident, exp) { + this.ident = ident; + this.val = exp; + this.exprType = "Definition"; + return this; +} + +function If(condition, thenexp, elseexp) { + this.condition = condition; + this.thenexp = thenexp; + if (elseexp) + this.elseexp = elseexp; + this.exprType = "If"; + return this; +} + +//convenience function to construct binary operators +//assumes that the identifier refers to the name of a primitive +//operation +function makeBin(ident) { + return new OpT(new FuncT (new Name("a"), new FuncT(new Name("b"), new App(new App(ident, "a"), "b")))); +} + +//Applies the function ``name'' to the list of parameters +function makeApp(name, parameters) { + if (parameters) { + return parameters.slice(1).reduce(function(f, ident) { + return new App(f, ident); + }, new App(name, parameters[0])); + } + else { + return new App(name); + } + +} + +OPTable = {"+" : makeBin("+")}; + +OPInfo = {"+" : [1, "Left"], + "-" : [1, "Left"], + "*" : [2, "Left"], + "/" : [2, "Left"], + "^" : [3, "Right"]} + +module.exports = + { IntT : IntT, + FloatT : FloatT, + StrT : StrT, + BoolT : BoolT, + ListT : ListT, + FuncT : FuncT, + App : App, + Name : Name, + Def : Def, + OpT : OpT, + OPInfo : OPInfo, + makeApp : makeApp, + If : If} diff --git a/tools.js b/tools.js new file mode 100644 index 0000000..8535232 --- /dev/null +++ b/tools.js @@ -0,0 +1,33 @@ +function compose(f, g) { + return function(x) { + return f(g(x)); + }; +} + +function not(x) { + return !x; +} + +function on(g, f) { + return function(x,y) { + return g(f(x), f(y)); + }; +} + +//pretty prints expressions! +function pprint(exp) { + //console.log(exp); + //if (exp.func && exp.func + if (exp.func && exp.p) + return "(" + pprint(exp.func) + " " + pprint(exp.p) + ")"; + //return pprint(exp.func); + else if (exp.exprType === "Name") + return exp.val; + else + return exp.val; +} + +module.exports = {compose : compose, + not : not, + on : on, + pprint : pprint} \ No newline at end of file