diff --git a/cexps.js b/cexps.js new file mode 100644 index 0000000..f996a65 --- /dev/null +++ b/cexps.js @@ -0,0 +1,135 @@ +/* + * Defines the data structures associated with Continuation Expressions (cexps) + * The object here is to have an intermediate representation that allows a code generator backend + * to easily create sequential code. In other words we are taking an alegraic/applicative language + * and translating it to something more similar to the "one word at a time" architecture of the von-Neumann + * architecture + */ + +var cexp = { + type : "cexp" +}; + +function record(values_accesspaths, + w, + next_cexp) { + this.values_accesspaths = values_accesspaths; + this.w = w; + this.next_cexp = next_cexp; + return this; +} + +record.prototype = cexp; + +function select(i, v, w, next_cexp) { + this.i = i; + this.v = v; + this.w = w; + this.next_cexp = next_cexp; + return this; +} +select.prototype = cexp; + +function offset(i, v, w, next_cexp) { + this.i = i; + this.v = v; + this.w = w; + this.next_cexp = next_cexp; + return this; +} +offset.prototype = cexp; + +function app(k, vs) { + this.k = k; + this.vs = vs; + return this; +} +app.prototype = cexp; + +function fix(fs, next_cexp) { + this.fs = fs; + this.next_cexp = next_cexp; + return this; +} +fix.prototype = cexp; + +function switchl(v, cexps) { + this.v = v; + this.cexps = cexps; + return this; +} +switchl.prototype = cexp; + +function primop(op, vals, vars, next_cexp) { + this.op = op; + this.vals = vals; + this.vars = vars; + this.next_cexp = next_cexp; + return this; +} +primop.prototype = cexp; + +function accessPath(offp, selp) { + this.offp = offp; + this.selp = selp; + return this; +} + +var primoptype = { + type : "primop", + equal : function(pOp) { + return this.name === pOp.name; + } +}; + +function Primop(name) { + function ptype() { + this.name = name; + return this; + } + ptype.prototype = primoptype; + return new ptype(); +} + +var times = Primop("+"); +var plus = Primop("+"); +var div = Primop("div"); +var tilde = Primop("~"); +var ieql = Primop("ieql"); +var ineq = Primop("ineq"); +var lessthan = Primop("<"); +var lessoreq = Primop("<="); +var greatthan = Primop(">"); +var greatoreq = Primop(">="); +var bang = Primop("!"); +var subscript = Primop("subscript"); +var ordof = Primop("ordof"); +var assign = Primop(":="); +var unboxedassign = Primop("unboxedassign"); +var update = Primop("update"); +var unboxedupdate = Primop("unboxedupdate"); +var store = Primop("store"); +var makeref = Primop("makeref"); +var makerefunboxed = Primop("makerefunboxed"); +var alength = Primop("alength"); +var slength = Primop("slength"); +var gethdlr = Primop("gethdlr"); +var sethdlr = Primop("sethdlr"); +var boxed = Primop("boxed"); +var fadd = Primop("fadd"); +var fsub = Primop("fsub"); +var fdiv = Primop("fdiv"); +var fmul = Primop("fmul"); +var feql = Primop("feql"); +var fneq = Primop("fneq"); +var fge = Primop("fge"); +var fgt = Primop("fgt"); +var fle = Primop("fle"); +var flt = Primop("flt"); +var rshift = Primop("rshift"); +var lshift = Primop("lshift"); +var orb = Primop("orb"); +var andb = Primop("andb"); +var xorb = Primop("xorb"); +var notb = Primop("notb"); + diff --git a/closure_conversion.js b/closure_conversion.js new file mode 100644 index 0000000..34a26d4 --- /dev/null +++ b/closure_conversion.js @@ -0,0 +1,153 @@ +/* Takes an AST and converts all of the functions into closures. + * A closure is a triple of: + * the bound variables in a function or let + * the free variables in a function or let + * a function body or let body and bound values + * The closure has the property that all of the free variables of the function or let + * are in the environment, or an exception is raised because the variable is not bound + * in the current environment. + * A free variable is simply those that are not in the list of formal parameters or bound variables if it is a let + * + * Therefore in order to call a closure one must first extract the actual function and then + * call the function with the environment associated with it. + * For the purposes of type checking it does not matter how the function gets called, the environment + * is only used for looking up the types of names. Formal parameters are given type variables. + */ + +var rep = require("./representation.js"); +var env = require("./environments.js"); +var errors = require("./errors.js"); +var parser = require("./parse.js"); +var pprint = require("./pprint.js"); +var $ = require("./tools.js"); +var _ = require("underscore"); + +var notEmpty = _.compose($.not, _.partial(_.equal, [])); + +function fvs(stx) { + switch (stx.exprType) { + case "Integer": + return []; + case "Float": + return []; + case "String": + return []; + case "Function": + return []; + case "Nil": + return []; + case "Bool": + return []; + case "Let": + return []; + case "Unary": + return _.flatten([stx.op.ident, fvs(stx.val)]); + case "Definition": + return _.flatten(fvs(stx.val)); + case "Application": + var vs = _.flatten(fvs(stx.p)); + var f_fvs = _.flatten(fvs(stx.func)); + return _.flatten([vs, f_fvs]); + case "If": + if (stx.elseexp) { + var cond_fvs = fvs(stx.condition); + var then_fvs = fvs(stx.thenexp); + var else_fvs = fvs(stx.elseexp); + return _.flatten([cond_fvs, then_fvs, else_fvs]); + } + else { + return _.flatten([fvs(stx.condition), fvs(stx.thenexp)]); + } + break; + case "Name": + return [stx.ident]; + } +} + +function closure_convert(stx) { + /* Takes a stx object that is either + * a lambda + * a let + * and returns a closure wrapped around that stx object + */ + if (stx.exprType !== "Function" && + stx.exprType !== "Let") { + throw errors.JInternalError( + ["Tried to calculate the free variables of", + "something that was not a function or let.\n", + "That something was a: " + stx.exprType +"\n"].reduce( + function (a,b) { + return a+" "+b; + }, "")); + } + var variables, free_variables, bound_vars, stx_type; + + switch (stx.exprType) { + case "Let": + bound_vars = stx.pairs.map( + function (stx) { + return stx.ident.ident; + }); + var let_fvs = stx.pairs.map(fvs); + var body_fvs = fvs(stx.body); + variables = _.flatten(let_fvs); + $.extend(variables, _.flatten(body_fvs)); + break; + case "Function": + bound_vars = [stx.p.ident,]; + variables = fvs(stx.body); + break; + } + free_variables = _.difference(_.uniq(variables), bound_vars); + return new rep.Closure(bound_vars, free_variables, stx, []); +} + +function closure_convert_all(stx) { + var closure; + switch (stx.exprType) { + case "Let": + closure = closure_convert(stx); + closure.body.pairs = closure.body.pairs.map(closure_convert_all); + closure.body = closure_convert_all(closure.body.body); + return closure; + case "Function": + closure = closure_convert(stx); + closure.body.body = closure_convert_all(closure.body.body); + return closure; + case "Unary": + stx.val = closure_convert_all(stx.val); + return stx; + case "Application": + stx.func = closure_convert_all(stx.func); + stx.p = closure_convert_all(stx.p); + return stx; + case "If": + if (stx.elseexp) { + stx.condition = closure_convert_all(stx.condition); + stx.thenexp = closure_convert_all(stx.thenexp); + stx.elseexp = closure_convert_all(stx.elseexp); + return stx; + } + else { + stx.condition = closure_convert_all(stx.condition); + stx.thenexp = closure_convert_all(stx.thenexp); + return stx; + } + case "Definition": + stx.val = closure_convert_all(stx.val); + return stx; + default: + return stx; + } +} + + +function test(src) { + var ast = parser.parse(src)[0]; + console.log(JSON.stringify(closure_convert_all(ast), null, 4)); +} + +module.export = { + test : test, + closureConvert : closure_convert_all +}; diff --git a/desugar.js b/desugar.js index 0f3fdd8..cf79a35 100644 --- a/desugar.js +++ b/desugar.js @@ -5,6 +5,7 @@ */ var typ = require("./representation.js"); +var _ = require("underscore"); // Lists get desugared to nested function calls // i.e. (cons (cons (cons ...))) @@ -20,14 +21,29 @@ function desugarList(lst) { } function desugarDefFunc(def) { - return new typ.Def(def.ident, new typ.FuncT(desugar(def.params), desugar(def.body))); + return new typ.Def(def.ident, + curryFunc(def.params, + def.body)); +} + +function curryFunc(ps, body) { + if (_.isEmpty(ps)) { + return desugar(body); + } + else { + return new typ.FuncT(desugar(_.first(ps)), + curryFunc(_.rest(ps), body)); + } } -//function desugarString(str) { +function desugarLet(stx) { + var values = stx.pairs.map(desugar); + return new typ.LetExp(values, desugar(stx.body)); +} function desugar(stx) { - switch (stx.exprType) { + switch (stx.exprType) { case "If": if (stx.elseexp) return new typ.If(desugar(stx.condition), desugar(stx.thenexp), desugar(stx.elseexp)); @@ -48,7 +64,7 @@ function desugar(stx) { return new typ.App(desugar(stx.func), desugar(stx.p)); return new typ.App(stx.func); case "Function": - return new typ.FuncT(stx.p, desugar(stx.body)); + return curryFunc(stx.p, stx.body); case "List": return desugarList(stx); case "Bool": @@ -59,6 +75,8 @@ function desugar(stx) { return stx; case "Integer": return stx; + case "Let": + return desugarLet(stx); default: return stx; } diff --git a/environments.js b/environments.js new file mode 100644 index 0000000..51ee39e --- /dev/null +++ b/environments.js @@ -0,0 +1,55 @@ +/* + * An environment is just an object that maps identifiers to JLambda expressions + * with a few built-in (a standard Prelude environment) + */ + +// returns the new environment after mutating it +// values = [(identifier, JLambda expression)] + +var errors = require("./errors.js"); +var rep = require("./representation.js"); + + +function extend(env, values) { + var new_env = {}; + var env_keys = Object.keys(env); + for (var i = 0; i < env_keys.length; i++) { + new_env[env_keys[i]] = env[env_keys[i]]; + } + for (i = 0; i < values.length; i++) { + new_env[values[i][0].val] = values[i][1]; + } + return new_env; +} + +// creates a new environment initialized with the pairs in values +function makeEnv(name, values) { + var env = {}; + env.name = name; + for (var i = 0; i < values.length; i++) { + name = values[i][0].val; + var val = values[i][1]; + env[name] = val; + } + return env; +} + +function lookup(name, env) { + var value = env[name]; + if (!value) { + throw errors.UnboundError(name, env.name); + } + return value; +} + +var prelude = makeEnv("prelude", [[new rep.Name("e"), new rep.FloatT(Math.E)], + [new rep.Name("pi"), new rep.FloatT(Math.PI)]]); + +var prelude_types = makeEnv("prelude_types", + [[new rep.Name("e"), new rep.TypeOp("Float", [], false)], + [new rep.Name("pi"), new rep.TypeOp("Float", [], false)]]); + +module.exports = { prelude : prelude, + prelude_types : prelude_types, + lookup : lookup, + extend : extend }; diff --git a/errors.js b/errors.js new file mode 100644 index 0000000..b7855f0 --- /dev/null +++ b/errors.js @@ -0,0 +1,39 @@ +/* + * This file defines common error objects + * for reporting on syntax errors, type errors, + * and perhaps runtime exceptions although I have + * not thought about how that will work much + */ + +function JSyntaxError(linenum, charnum, message) { + this.linenum = linenum; + this.charnum = charnum; + this.errormessage = message; + this.stxerror = function() { + console.log("Syntax Error\n", + "Line #", this.linenum,"\n", + "Near character #", this.charnum, "\n", + this.errormessage); + }; + return this; +} + +function JTypeError(linenum, charnum, token, message) { + this.linenum = linenum; + this.charnum = charnum; + this.errormessage = message; + this.token = token; + return this; +} + +function JInternalError(message) { + this.errormessage = message; + console.log(message); + return this; +} + +module.exports = + {JSyntaxError : JSyntaxError, + JTypeError : JTypeError, + JInternalError : JInternalError + }; diff --git a/example.jl b/example.jl new file mode 100644 index 0000000..9854467 --- /dev/null +++ b/example.jl @@ -0,0 +1,68 @@ +defop 2 Left (a ## b) + (a - b) + +def (f a b) + (a ++ b) + +def (add a b) + (a + b) + +def (catstrs strs) + (foldr f + (head strs) + (tail strs)) + +def strs ["aa", "bb"] + +def (mymap f xs) + if ((length xs) == 0) + then + xs + else + ((f (head xs)) + : (mymap f (tail xs))) + +def empty [] + +def getFile + (readFile "./parse.js") + +def fileLines + (getFile >>= + ((mapM_ putStrLn) . lines)) + +def (testUnary n) + ((-n) + n) + +def (splitHelp acc xs ys) + if (null xs) + then ((reverse acc), ys) + else if (null (tail xs)) + then ((reverse acc), ys) + else + (splitHelp ((head ys) : acc) + (tail (tail xs)) + (tail ys)) + +def (splitxs xs) + (splitHelp [] xs xs) + +def r def + { + a = 4 + } + a + +def main + let { + unary = (print (testUnary 6)) + splitted = def { + xs = (fst (splitxs [12,3,4,56])) + } (xs ++ [0,9]) + } + if False + then undefined + else + (unary >> + fileLines >> + (print splitted)) diff --git a/optimize.js b/optimize.js deleted file mode 100644 index 56b58dd..0000000 --- a/optimize.js +++ /dev/null @@ -1,11 +0,0 @@ -var typ = require("./representation.js"); - -/*function simplify(stx) { - switch (stx.exprType) { - case "Application": - - } -}*/ - -//function simplifyUnary(stx) { - diff --git a/parse.js b/parse.js index abeaff1..301515c 100755 --- a/parse.js +++ b/parse.js @@ -2,10 +2,12 @@ var fs = require("fs"); var typ = require("./representation.js"); -var tool = require("./tools.js"); +var $ = require("./tools.js"); +var _ = require("underscore"); var tokenizer = require("./tokenize.js"); var desugarer = require("./desugar.js"); var pprint = require("./pprint.js"); +var error = require("./errors.js"); var print = console.log; @@ -17,17 +19,22 @@ function snd(ts) { return ts[ts.length-2]; } -//Checks if the next token is not followed by any of ``checks'' -function notFollowedBy(tokens, checks) { - var nextT = fst(tokens)[0]; - if (checks.some(function (x) {return x === nextT;})) +/*Checks if the next token is not followed by any of ``checks'' */ +function notFollowedBy(tokens, checks, linenum, charnum) { + if (!fst(tokens)) { + throw error.JSyntaxError(0,0,"unexpected end of source"); + } + var nextT = fst(tokens)[0]; + if (checks.some(function (x) { + return x === nextT; + })) return false; else return true; } -//returns a function that takes a parameter and -//checks if it is in the array ``props'' +/* returns a function that takes a parameter and + checks if it is in the array ``props''*/ function makeChecker(props) { return function(x) { return x && props.some(function (y) {return y === x;}); @@ -36,38 +43,47 @@ function makeChecker(props) { /*Tries to parse until the prediction ``valid'' fails or the wrong type is parsed Collects the results into an array and returns it*/ -function parseMany(exprType, valid, tokens) { +function parseMany(parse, exprType, valid, tokens, charnum, linenum) { + if (!fst(tokens)) { + throw error.JSyntaxError(charnum, + linenum, + "Unexpected end of source"); + } var current = fst(tokens)[0]; var results = []; var parsed; if (valid(fst(tokens)[0])) { parsed = parse(tokens); - //console.log(parsed.exprType); } else { - console.log("Error: unexpected token "+fst(tokens)); - console.log("in parseMany," + ", " + tokens); - return; + throw error.JSyntaxError(linenum, + charnum, + "Unexpected token: ``"+fst(tokens)[0]+"''"); } results.push(parsed); //make sure there are at least 2 tokens to parse - if (tokens.length > 1 && valid(fst(tokens)[0])) { + if (tokens.length > 1 && fst(tokens) && valid(fst(tokens)[0])) { while (valid(snd(tokens)[0])) { if (!(valid(fst(tokens)[0]))) break; - //print(valid(fst(tokens)[0]), tokens); results.push(parse(tokens)); if (!exprType(fst(results).exprType)) break; - //console.log(results); - current = fst(tokens)[0] + if (fst(tokens)) + current = fst(tokens)[0]; + else + throw error.JSyntaxError(charnum, linenum, "Unexpected end of source"); if (tokens.length <= 1) break; } } //do the same validity check as before and in the loop + if (!fst(tokens)) + throw error.JSyntaxError(linenum, + charnum, + "unexpected end of source"); if (valid(fst(tokens)[0])) results.push(parse(tokens)); return results; @@ -77,11 +93,10 @@ function parseMany(exprType, valid, tokens) { /* Tries to parse exprType separated by the token between * e.g. ,,... */ -function parseBetween(exprType, between, tokens) { +function parseBetween(exprType, between, tokens, charnum, linenum) { var first = parse(tokens); if (!exprType(first)) { - console.log("Error, unexpected token:"+fst(tokens)[0]); - return; + throw error.JSyntaxError(charnum, linenum, "Unexpected token: ``"+fst(tokens)[0]+"''"); } var items = [first]; var parsed; @@ -89,6 +104,10 @@ function parseBetween(exprType, between, tokens) { while (fst(tokens)[0] === between) { tokens.pop(); parsed = parse(tokens); + if (!fst(tokens)) + throw error.JSyntaxError(linenum, + charnum, + "Missing terminator: "+between); items.push(parsed); } return items; @@ -97,19 +116,21 @@ function parseBetween(exprType, between, tokens) { } function parseList(tokens) { + var xs; if (fst(tokens)[0] === "right_square") { - var xs = []; + xs = []; } else if (fst(tokens)[0] === "comma") { tokens.pop(); - var xs = []; + xs = []; } else { - var xs = parseBetween(function (x) { return true; }, "comma", tokens); + xs = parseBetween(function (x) { return true; }, "comma", tokens, fst(tokens)[3], fst(tokens)[2]); } - if (fst(tokens)[0] !== "right_square") { - console.log("Error, list must be terminated by ]"); - return undefined; + if (!fst(tokens) || fst(tokens)[0] !== "right_square") { + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "list must be terminated by ]"); } tokens.pop(); return new typ.ListT(xs); @@ -118,57 +139,272 @@ function parseList(tokens) { function parseDefFunction(tokens) { var fname = parse(tokens); - if (!fname.exprType === "identifier") { - console.log("Error, expected an identifier in function definition"); - return undefined; + var parameters; + if (fname.exprType != "Name") { + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "Expected an identifier in function definition"); } if (fst(tokens)[0] === "right_paren") { - var parameters = []; + parameters = []; } else { - var parameters = parseMany(validName, validFormPar, tokens); + parameters = parseMany(parse, + validName, + validFormPar, + tokens, + fst(tokens)[2], + fst(tokens)[3]); } if ((fst(tokens)[0]) !== "right_paren") { - console.log("Error, formal parameters must be followed by )"); - return undefined; + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "Formal parameters must be followed by )"); } tokens.pop(); var body = parse(tokens); return new typ.DefFunc(fname, parameters, body); } +validLet = makeChecker(["Definition", "FunctionDefinition"]); +letEnd = _.compose($.not, makeChecker(["right_brace"])); + +function parseLetForm(tokens, linenum, charnum) { + if (!fst(tokens)) { + error.JSyntaxError(linenum, + charnum, + "Unexpected end of source"); + } + var pairs = parseMany(parseLetItem, + validLet, + letEnd, + tokens, + linenum, + charnum); + if (fst(tokens) && fst(tokens)[0] !== "right_brace") { + throw error.JSyntaxError(fst(tokens)[2], + fst(tokens)[3], + "let/def form must have a closing }"); + } + if (!fst(tokens)) { + throw error.JSyntaxError(linenum, + charnum, + "Unexpected end of source"); + } + linenum = fst(tokens)[3]; + charnum = fst(tokens)[2]; + tokens.pop(); + if (tokens.length <= 0) { + throw error.JSyntaxError(linenum, + charnum, + "let/def form must have a body"); + } + var body = parse(tokens); + if (body.exprType === "Definition" || + body.exprType === "FunctionDefinition") { + throw error.JSyntaxError(linenum, + charnum, + "Body of a let/def expression cannot be a definition"); + } + return new typ.LetExp(pairs, body); + +} + +function parseLetFunction(tokens, linenum, charnum) { + var fname = parse(tokens); + var parameters; + + if (fname.exprType != "Name") { + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "Expected an identifier in function definition"); + } + if (fst(tokens)[0] === "right_paren") { + parameters = []; + } + else { + parameters = parseMany(parse, + validName, + validFormPar, + tokens, + fst(tokens)[2], + fst(tokens)[3]); + } + if ((fst(tokens)[0]) !== "right_paren") { + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "Formal parameters must be followed by )"); + } + tokens.pop(); + if (fst(tokens)[0] !== "arrow") { + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "Function parameters in let/def form must be followed by ->"); + } + tokens.pop(); + var body = parse(tokens); + return new typ.DefFunc(fname, parameters, body); +} +function parseLetBinding(tokens, linenum, charnum) { + var name = parse(tokens); + if (name.exprType != "Name") { + throw error.JSyntaxError(linenum, + charnum, + "Expected an identifier in let/def binding"); + } + if (!fst(tokens) || fst(tokens)[1] !== "=") { + throw error.JSyntaxError(linenum, + charnum, + "An identifier in a let/def binding must be followed by ``=''"); + } + tokens.pop(); + if (!notFollowedBy(tokens, + ["comma", "arrow", "right_brace", "right_square"], + linenum, + charnum)) { + throw error.JSyntaxError(linenum, + charnum, + "The binding of " + identifier.val + " must not be followed by " + fst(tokens)[0]); + } + var bound = parse(tokens); + if (bound.exprType === "Definition" || + bound.exprType === "FunctionDefinition") { + throw error.JSyntaxError(linenum, + charnum, + "A definition cannot be the value of a binding"); + } + return new typ.Def(name, bound); +} -function parseDef(tokens) { +function parseLetItem(tokens) { + if (fst(tokens) && fst(tokens)[0] === "left_paren") { + tokens.pop(); + return parseLetFunction(tokens, + fst(tokens)[3], + fst(tokens)[2]); + } + else { + return parseLetBinding(tokens, + fst(tokens)[3], + fst(tokens)[2]); + } +} + +function parseDef(tokens, linenum, charnum) { + if (tokens.length < 2) + throw error.JSyntaxError(linenum, + charnum, + "Unexpected end of source"); if (fst(tokens)[0] === "left_paren") { - // It's a function definition + /* It's a function definition */ + tokens.pop(); + return parseDefFunction(tokens, linenum, charnum); + } + + if (fst(tokens)[0] === "left_brace") { + /* It's a let/def form */ tokens.pop(); - return parseDefFunction(tokens); + return parseLetForm(tokens, + fst(tokens)[3], + fst(tokens)[2]); } - if (notFollowedBy(tokens, ["identifier"])) { - console.log("Error: def must be followed by identifier, not "+fst(tokens)[0]); - return undefined; + + if (notFollowedBy(tokens, ["identifier"], linenum, charnum)) { + throw error.JSyntaxError(linenum, + charnum, + "def must be followed by identifier, not "+fst(tokens)[0]); } else { var identifier = parse(tokens); - if (!notFollowedBy(tokens, ["def", "comma", "arrow", "right_brace", "right_square"])) { - console.log("Error: def " + identifier.val + " must not be followed by " + fst(tokens)[0]); - return; + if (!fst(tokens)) + throw error.JSyntaxError(linenum, + charnum, + "Unexpected end of source"); + linenum = fst(tokens)[3]; + charnum = fst(tokens)[2]; + if (!notFollowedBy(tokens, + ["comma", "arrow", "right_brace", "right_square"], + linenum, + charnum)) { + throw error.JSyntaxError(linenum, + charnum, + "def " + identifier.val + " must not be followed by " + fst(tokens)[0]); } - return new typ.Def(identifier, parse(tokens)); + var bound = parse(tokens); + if (bound.exprType === "Definition" || + bound.exprType === "FunctionDefinition") { + throw error.JSyntaxError(linenum, + charnum, + "A definition cannot be the value of a binding"); + } + return new typ.Def(identifier, bound); } } +function parseDefOp(tokens, linenum, charnum) { + if (fst(tokens)[0] !== "integer" || + fst(tokens)[1] < 1) { + throw error.JSyntaxError(linenum, + charnum, + "defop must be followed by integer precedence >= 1"); + } + tokens.pop(); + + if (fst(tokens)[1] !== "Left" && fst(tokens)[1] !== "Right") { + throw error.JSyntaxError(linenum, + charnum, + "defop must be followed by precedence and then either Left or Right"); + } + tokens.pop(); + if (fst(tokens)[0] !== "left_paren") { + throw error.JSyntaxError(linenum, + charnum, + "defop arguments must start with ("); + } + tokens.pop(); + if (!(tokens.slice(tokens.length-3, + tokens.length).every(function(x) { + return x[0] === "identifier"; + }))) { + throw error.JSyntaxError(linenum, + charnum, + "defop must be surrounded by exactly 3 identifiers"); + } + var pattern = tokens.slice(tokens.length-3, + tokens.length); + tokens.pop(); tokens.pop(); tokens.pop(); + if (fst(tokens)[0] !== "right_paren") { + throw error.JSyntaxError(linenum, + charnum, + "defop pattern must be terminated with )"); + } + tokens.pop(); + return new typ.DefFunc(new typ.Name(pattern[1][1]), + [new typ.Name(pattern[0][1]), + new typ.Name(pattern[2][1])], + parse(tokens)); +} + + function parseIf(tokens) { - if (!notFollowedBy(tokens, ["def","comma","lambda"])) { - console.log("Error: ``if'' cannot be followed by "+fst(tokens)[0]) - return; + var linenum = fst(tokens)[3]; + var charnum = fst(tokens)[2]; + if (!notFollowedBy(tokens, + ["def","comma","lambda"], + linenum, + charnum)) { + throw error.JSyntaxError(linenum, + charnum, + "``if'' cannot be followed by "+fst(tokens)[0]) ; } else { var ifC = parse(tokens); if (!fst(tokens) || fst(tokens)[0] !== "thenexp") - console.log("Error: if must be folowed by exp, not "+snd(tokens)[0]); + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "if ``exp'' must be folowed by ``then'' exp, not "+snd(tokens)[0]); else { tokens.pop(); var thenC = parse(tokens); @@ -191,65 +427,74 @@ var validFormPar = makeChecker(["identifier"]); var validName = makeChecker(["Name"]); function parseLambda(tokens) { - var parameters = parseMany(validName,validFormPar, tokens); + var parameters = parseMany(parse, + validName, + validFormPar, + tokens, + fst(tokens)[2], + fst(tokens)[3]); if (fst(tokens)[0] !== "arrow") { - console.log("Error: arrow must follow parameters in lambda, not "+fst(tokens)[0]) - return; + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "arrow must follow parameters in lambda, not "+fst(tokens)[0]); } - tokens.pop() + tokens.pop(); var body = parse(tokens); return new typ.FuncT(parameters, body); } var invalidArguments = ["def", "comma", "right_paren", "right_square", "right_brace", "left_brace", "right_brace"]; -var validArgument = tool.compose(tool.not, makeChecker(invalidArguments)); -var validArgTypes = tool.compose(tool.not, makeChecker(["Definition"])); +var validArgument = _.compose($.not, makeChecker(invalidArguments)); +var validArgTypes = _.compose($.not, makeChecker(["Definition"])); var validOperator = makeChecker(["identifier"]); -function checkParse(p) { - if (p === undefined) { - console.log("Quitting, could not finish parsing!"); - process.exit(code=1); - } - else - return p; -} - -//Parses function application (either infix or prefix) -function computeApp(tokens) { +/* Parses function application (either infix or prefix) */ +function computeApp(tokens, charnum, linenum) { var lhs = parse(tokens); - //console.log(lhs); - if (fst(tokens)) - var next = fst(tokens); + var next; + var result; + if (fst(tokens)) { + next = fst(tokens); + } else { - console.log("Unexpected end of source"); - process.exit(code=1); + throw error.JSyntaxError(linenum, + charnum, + "Unexpected end of source"); } if (typ.OPInfo[next[1]]) { - //it's an infix expression - var result = parseInfix(tokens, 1, lhs); - if (fst(tokens)[0] !== "right_paren") { - console.log("Error: mismatched parentheses"); - process.exit(code=1); + /* it's an infix expression */ + result = parseInfix(tokens, 1, lhs, linenum, charnum); + if (!fst(tokens) || fst(tokens)[0] !== "right_paren") { + throw error.JSyntaxError(linenum, + charnum, + "Mismatched parentheses or missing parenthesis on right-hand side"); } else { - //return the result tokens.pop(); return result; } } else { - //it's a prefix application - - var parameters = parseMany(validArgTypes, validArgument, tokens); - //console.log(parameters); + /* it's a prefix application */ + var parameters; if (fst(tokens)[0] !== "right_paren") { - console.log("Error: mismatched parentheses"); - process.exit(code=1); + parameters = parseMany(parse, + validArgTypes, + validArgument, + tokens, + charnum, + linenum); + } + else { + parameters = []; + } + if ((!fst(tokens)) || fst(tokens)[0] !== "right_paren") { + throw error.JSyntaxError(linenum, + charnum, + "Mismatched parentheses or missing parenthesis on right-hand side"); } else { - //return the result tokens.pop(); return typ.makeApp(lhs, parameters); } @@ -260,15 +505,16 @@ function computeApp(tokens) { See this for more info and an implementation in python http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/ */ -function parseInfix(tokens, minPrec, lhs) { +function parseInfix(tokens, minPrec, lhs, linenum, charnum) { if (!lhs) { - var lhs = parse(tokens); + lhs = parse(tokens); } while (true) { var cur = fst(tokens); if (!cur) { - console.log("Unexpected end of source") - process.exit(code=1); + throw error.JSyntaxError(linenum, + charnum, + "Unexpected end of source"); } var opinfo = typ.OPInfo[cur[1]]; @@ -280,7 +526,7 @@ function parseInfix(tokens, minPrec, lhs) { var assoc = opinfo[1]; var nextMinPrec = assoc === "Left" ? prec + 1 : prec; tokens.pop(); - //remove the operator token + /*remove the operator token*/ var rhs = parseInfix(tokens, nextMinPrec); lhs = typ.makeApp(op, [lhs, rhs]); } @@ -288,10 +534,13 @@ function parseInfix(tokens, minPrec, lhs) { } function parse(tokens) { - if (fst(tokens)) - var toktype = fst(tokens)[0]; + var charnum = fst(tokens)[2]; + var linenum = fst(tokens)[3]; + var toktype; + if (fst(tokens)) { + toktype = fst(tokens)[0]; + } else { - //console.log("Unexpected end of source") process.exit(code=1); } var token = fst(tokens)[1]; @@ -301,47 +550,56 @@ function parse(tokens) { else if (toktype === "left_square") return parseList(tokens); else if (toktype === "lambda") - return checkParse(parseLambda(tokens)); + return parseLambda(tokens); else if (toktype === "integer") return new typ.IntT(token); else if (toktype === "float") return new typ.FloatT(token); else if (toktype === "identifier") - return new typ.Name(token); + return new typ.Name(token); else if (toktype === "truelit" || toktype === "falselit") return new typ.BoolT(token); - else if (toktype === "def") - return checkParse(parseDef(tokens)); + else if (toktype === "def" || + toktype === "let") + return parseDef(tokens, fst(tokens)[3], fst(tokens)[2]); + else if (toktype === "defop") + return parseDefOp(tokens, fst(tokens)[3], fst(tokens)[2]); else if (toktype === "ifexp") - return checkParse(parseIf(tokens)); + return parseIf(tokens); else if (toktype === "left_paren") { if (fst(tokens)[0] === "lambda") { tokens.pop(); - var parsed = checkParse(parseLambda(tokens)); + var parsed = parseLambda(tokens); tokens.pop(); return parsed; } else - return computeApp(tokens); + return computeApp(tokens, charnum, linenum); } else { - console.log("Unexpected token: " + toktype); - process.exit(code=1); + throw error.JSyntaxError(fst(tokens)[3], + fst(tokens)[2], + "Unexpected token: ``" + toktype+"''"); } } -var istr = fs.readFileSync('/dev/stdin').toString(); + function parseFull(tokenized) { - var ast = new Array(); - while (tokenized.length > 0) { - var parsed = desugarer.desugar(parse(tokenized)); - ast.push(parsed); + var ast = []; + try { + while (tokenized.length > 0) { + var parsed = desugarer.desugar(parse(tokenized)); + ast.push(parsed); + } + return ast; + } catch (e) { + e.stxerror(); + process.exit(1); } - return ast; } -console.log(parseFull(tokenizer.tokenize(istr)).map(pprint.pprint).join("\n")); - -//console.log(tokenizer.tokenize(istr)); -//console.log(parseFull(tokenizer.tokenize(istr))); -//module.exports = {parse : tool.compose(parseFull, tokenizer.tokenize) }; +module.exports = { parse : function(str) { + return parseFull(tokenizer.tokenize(str)); + } + }; +//var istr = fs.readFileSync('/dev/stdin').toString(); diff --git a/pprint.js b/pprint.js index a5cdf36..023b1f8 100644 --- a/pprint.js +++ b/pprint.js @@ -55,6 +55,11 @@ function pprint(expr) { return "[]"; else if (expr.exprType === "Unary") return "("+expr.op.ident+" "+pprint(expr.val)+")"; + else if (expr.exprType === "Let") + return "let {" + expr.pairs.map( + function (v) { + return pprint(v); + }).join(" ; ") + "} in " + pprint(expr.body); } module.exports = {pprint : pprint}; diff --git a/representation.js b/representation.js index 0676bc7..ea4ba57 100644 --- a/representation.js +++ b/representation.js @@ -18,12 +18,37 @@ var Expression = { } }; +function Closure(bound_vars, free_vars, body, env) { + this.bound_vars = bound_vars; + this.free_vars = free_vars; + this.body = body; + this.env = env; + this.exprType = "Closure"; + return this; +} + +function LetExp(pairs, body) { + if (!pairs.every(function(x) { + return (x.exprType === "Definition" || + x.exprType === "FunctionDefinition"); + })) { + throw "let can only be used to bind things to names or functions"; + } + this.exprType = "Let"; + this.val = [pairs, body]; + this.pairs = pairs; + this.body = body; + return this; +} +LetExp.prototype = Expression; + function UnaryOp(op, v) { this.exprType = "Unary"; this.val = v; this.op = op; return this; } +UnaryOp.prototype = Expression; function IntT(v) { this.exprType = "Integer"; @@ -138,6 +163,17 @@ function If(condition, thenexp, elseexp) { return this; } +function TypeVar(name) { + this.name = name; + return this; +} + +function TypeOp(name, params, body) { + this.name = name; + this.params = params; + this.body = body; + return this; +} //convenience function to construct binary operators //assumes that the identifier refers to the name of a primitive @@ -159,38 +195,59 @@ function makeApp(name, parameters) { } +function makeGensym() { + var n = 0; + return function() { + var x = "G"+n; + n = n + 1; + return x; + }; +} + +var gensym = makeGensym(); + OPInfo = {"+" : [3, "Left"], - "-" : [3, "Left"], - "*" : [4, "Left"], - "/" : [4, "Left"], - "^" : [5, "Right"], - "++" : [3, "Left"], - "==" : [2, "Left"], - ">" : [2, "Left"], - ">=" : [2, "Left"], - "<" : [2, "Left"], - "<=" : [2, "Left"], - ":" : [2, "Left"], - "$" : [1, "Left"], - ">>" : [1, "Left"], - ">>=" : [1, "Left"], - "<$>" : [1, "Left"], - "." : [1, "Left"]} + "-" : [3, "Left"], + "*" : [4, "Left"], + "/" : [4, "Left"], + "^" : [5, "Right"], + "++" : [3, "Left"], + "==" : [2, "Left"], + ">" : [2, "Left"], + ">=" : [2, "Left"], + "<" : [2, "Left"], + "<=" : [2, "Left"], + "&&" : [2, "Left"], + "||" : [2, "Left"], + "::" : [2, "Left"], + ":" : [1, "Left"], + "$" : [1, "Left"], + ">>" : [1, "Left"], + ">>=" : [1, "Left"], + "<$>" : [1, "Left"], + "." : [1, "Left"], + "," : [1, "Left"]}; module.exports = { IntT : IntT, - FloatT : FloatT, - StrT : StrT, - BoolT : BoolT, - ListT : ListT, - FuncT : FuncT, - App : App, - Name : Name, - Def : Def, - OpT : OpT, - OPInfo : OPInfo, - makeApp : makeApp, - If : If, + FloatT : FloatT, + StrT : StrT, + BoolT : BoolT, + ListT : ListT, + FuncT : FuncT, + App : App, + Name : Name, + Def : Def, + OpT : OpT, + OPInfo : OPInfo, + makeApp : makeApp, + If : If, DefFunc : DefFunc, UnaryOp : UnaryOp, - Nil : Nil } + Nil : Nil, + LetExp : LetExp, + gensym : gensym, + TypeVar : TypeVar, + TypeOp : TypeOp, + Closure : Closure + }; diff --git a/test.jl b/test.jl deleted file mode 100644 index bb84621..0000000 --- a/test.jl +++ /dev/null @@ -1,35 +0,0 @@ -def (f a b) - (a ++ b) - -def (add a b) - (a + b) - -def (catstrs strs) - (foldr f - (head strs) - (tail strs)) - -def strs ["aa", "bb"] - -def (mymap f xs) - if ((length xs) == 0) - then - xs - else - ((f (head xs)) - : (mymap f (tail xs))) - -def empty [] - -def getFile - (readFile "./parse.js") - -def (testUnary n) - ((-n) + n) - -def main - ((print (testUnary 6)) >> - if False - then - undefined - else getFile) diff --git a/test.js b/test.js index eff6f14..a360582 100755 --- a/test.js +++ b/test.js @@ -1,10 +1,111 @@ #! /usr/bin/node -var p = require("./parse.js"); -var pp = require("./pprint.js"); +var parser = require("./parse.js"); +var cexps = require("./cexps.js"); +var closures = require("./closure_conversion.js"); +var desugar = require("./desugar.js"); +var environments = require("./environments.js"); +var errors = require("./errors.js"); +var tokens = require("./tokenize.js"); var tools = require("./tools.js"); -var parse = tools.compose(pp.pprint, p.parse); -//console.log(parse("((map g [1,2,3]) >> (print 34))")); -//p.parse("((f [1,2,3,4,5]) >> (* 2 3))"); -//p.parse("[1] 45"); -//p.parse("(+ 2 3 4)"); +var typecheck = require("./typecheck.js"); +var _ = require("underscore"); + +var qc = require("quickcheck"); +var assert = require("assert"); + + +/* my own generators */ +function arbArray(gen) { + return qc.arbArray(gen); +} + +function arbStrings() { + return qc.arbArray(qc.arbString); +} + + +function arbPair(gen) { + return function() { + return [gen(), gen()]; + }; +} + +function arbArrayofPairs() { + return arbArray(function() { + return arbArray(arbPair(qc.arbString)); + }); +} + +function arbPairs() { + return arbArray(arbPair(qc.arbString)); +} + + +/* Tests for misc tools */ +function emptyProp(xs) { + return (tools.empty(xs) === tools.empty(xs) && + ((tools.empty(xs) === true) || + (tools.empty(xs) === false))); +} + + +function dictProp(pairs) { + var dict = tools.dict(pairs); + var result = _.map(pairs, + function(pair) { + if ((_.size(pair) < 2) || + (_.size(pair[0]) < 1) || + (_.size(pair[1]) < 1)) { + return true; + } + return dict[pair[0]] === pair[1]; + }); + if (_.every(result, _.identity)) { + return true; + } + return false; +} + +function opMatchProp(strings) { + var match = tools.opMatch(strings); + var result = _.every(_.map(strings, + function (str) { + if (str.replace(/ /g,'').length < 1) { + return true; + } + var res = match(str); + if (res !== false) { + console.log(str); + return true; + } + return false; + }), + _.identity); + return result; +} + +function extendProp(pair) { + if (pair.length < 2) { + // empty lists or lists with one item are undefined + // so just return true because extend can't handle them + return true; + } + var x = _.first(pair); + var y = _.first(_.rest(pair)); + var extended = tools.extend(x,y); + return x.length + y.length === extended.length; +} + +/* Tokenizer tests */ + + +function toolsTests() { + assert.equal(true, tools.empty([])); + assert.equal(true, qc.forAll(dictProp, arbArrayofPairs)); + assert.equal(true, qc.forAll(extendProp, arbPairs)); + //assert.equal(true, qc.forAll(opMatchProp, arbStrings)); +} + + +toolsTests(); diff --git a/tokenize.js b/tokenize.js index 28e4e01..47ffcc4 100755 --- a/tokenize.js +++ b/tokenize.js @@ -1,10 +1,10 @@ #! /usr/bin/node var rep = require("./representation.js"); -var tools = require("./tools.js"); +var $ = require("./tools.js"); +var error = require("./errors.js"); var operators = Object.keys(rep.OPInfo); - -var matchop = tools.opMatch(operators); +var _ = require("underscore"); function isDigit(a) { if (!a) @@ -26,20 +26,22 @@ function isIdentifier(a) { return code !== 41 && code !== 40 && code && 125 && code && 123 && code !== 93 && code !== 91 && code !== 44; } -function tokenizeNum(tokstream) { +function tokenizeNum(tokstream, charnum, linenum) { var number = []; var code = tokstream[0].charCodeAt(); var isFloat = false; var n = 0; // + - - if (code === 43 || code === 45) { + // might want to remove this since it probably won't ever get run? + if (code === 43 || code === 45) { // + or - number.push(tokstream[0]); tokstream = tokstream.substr(1); n++; } - else if (code === 46) { + else if (code === 46) { // . tokstream = tokstream.substr(1); n++; + charnum++; number.push('0'); number.push('.'); isFloat = true; @@ -48,24 +50,27 @@ function tokenizeNum(tokstream) { while (isDigit(tokstream[0]) && tokstream.length !== 0) { number.push(tokstream[0]); tokstream = tokstream.substr(1); + charnum++; n++; } if (tokstream[0] === '.' && isDigit(tokstream[1])) { number.push('.'); number.push(tokstream[1]); tokstream = tokstream.substr(2); + charnum++; charnum++; n++; n++; while (isDigit(tokstream[0]) && tokstream.length !== 0) { number.push(tokstream[0]); tokstream = tokstream.substr(1); n++; + charnum++; } - return [n, ["float", parseFloat(number.join(''), 10)]]; + return [n, ["float", parseFloat(number.join(''), 10), charnum, linenum]]; } if (!isFloat) - return [n, ["integer", parseInt(number.join(''), 10)]]; + return [n, ["integer", parseInt(number.join(''), 10), charnum, linenum]]; else - return [n, ["float", parseFloat(number.join(''), 10)]]; + return [n, ["float", parseFloat(number.join(''), 10), charnum, linenum]]; } /* Split up the tokenized identifier if an operator appears in it @@ -74,20 +79,21 @@ function tokenizeNum(tokstream) { * Everything after the operator goes back on to the token stream */ -function tokenizeIdent(tokstream) { +function tokenizeIdent(tokstream, matchop, charnum, linenum) { var identifier = []; var n = 0; while ((!isWhitespace(tokstream[0])) && isIdentifier(tokstream[0]) && !matchop(tokstream)) { identifier.push(tokstream[0]); tokstream = tokstream.substr(1); n++; + charnum++; } identifier = identifier.join(''); - return [[n, ["identifier", identifier]]]; + return [[n, ["identifier", identifier, charnum, linenum]]]; } -function tokenizeStr(tokstream) { +function tokenizeStr(tokstream, charnum, linenum) { var stringlit = []; var n = 1; tokstream = tokstream.substr(1); @@ -95,17 +101,17 @@ function tokenizeStr(tokstream) { stringlit.push(tokstream[0]); tokstream = tokstream.substr(1); n++; + charnum++; if (tokstream.length < 1) { - console.log("Error: missing quotation mark"); - process.exit(code=1); + throw error.JSyntaxError(linenum, charnum, "Error: missing quotation mark"); } } n++; - return [n, ["stringlit", stringlit.join('')]]; + return [n, ["stringlit", stringlit.join(''), charnum, linenum]]; } -function tokenizeT(tokstream) { +function tokenizeT(tokstream, charnum, linenum) { if (tokstream.length < 4) return false; var next4 = tokstream.substr(0,4); @@ -116,7 +122,7 @@ function tokenizeT(tokstream) { return false; } -function peek(tokstream, toktype, word) { +function peek(tokstream, toktype, word, charnum, linenum) { var n = word.length; if (tokstream.length < n) return false; @@ -127,189 +133,247 @@ function peek(tokstream, toktype, word) { return false; } -function tokenize(tokstream) { +function tokenize(tokstream, matchop) { var tokens = []; + var charnum = 1; + var linenum = 1; + var i, result, lambda, num; while (tokstream) { switch (tokstream[0].charCodeAt()) { + /* falls through */ case 9: // '\t' - tokens.push(["whitespace", '\t']); + charnum++; + tokens.push(["whitespace", '\t', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 32: // ' ' - tokens.push(["whitespace", ' ']); + charnum++; + tokens.push(["whitespace", ' ', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 10: // '\n' - tokens.push(["whitespace", '\n']); + linenum++; + charnum = 1; + tokens.push(["whitespace", '\n', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 44: // ',' - tokens.push(["comma", ","]); + charnum++; + tokens.push(["comma", ",", charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 40: // '(' - tokens.push(["left_paren", '(']); + charnum++; + tokens.push(["left_paren", '(', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 41: // ')' - tokens.push(["right_paren", ')']); + charnum++; + tokens.push(["right_paren", ')', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 123: // '{' - tokens.push(["left_brace", '{']); + charnum++; + tokens.push(["left_brace", '{', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 125: // '}' - tokens.push(["right_brace", '}']); + charnum++; + tokens.push(["right_brace", '}', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 91: // '[' - tokens.push(["left_square", '[']); + charnum++; + tokens.push(["left_square", '[', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 93: // ']' - tokens.push(["right_square", ']']); + charnum++; + tokens.push(["right_square", ']', charnum, linenum]); tokstream = tokstream.substr(1); break; + /* falls through */ case 34: // '"' - var result = tokenizeStr(tokstream); + result = tokenizeStr(tokstream, charnum, linenum); var str = result[1]; - var i = result[0]; + i = result[0]; tokens.push(str); tokstream = tokstream.substr(i); break; -/* case 43: // '+' - if (isDigit(tokstream[1])) { - var result = tokenizeNum(tokstream); - var num = result[1]; - var i = result[0]; - if (num[1] !== NaN) - tokens.push(num); - tokstream = tokstream.substr(i); - break; - } -*/ + /* falls through */ case 45: // '-' - var lambda = peek(tokstream, "arrow", "->"); + lambda = peek(tokstream, "arrow", "->"); if (lambda) { tokens.push(lambda); tokstream = tokstream.substr(2); break; } else { - tokens.push(["identifier", "-"]); + tokens.push(["identifier", "-", charnum, linenum]); + charnum++; tokstream = tokstream.substr(1); break; } -/* if (isDigit(tokstream[1])) { - var result = tokenizeNum(tokstream); - var num = result[1]; - var i = result[0]; - if (num[1] !== NaN) - tokens.push(num); - tokstream = tokstream.substr(i); - break; - } -*/ + + /* falls through */ case 46: // '.' if (isDigit(tokstream[1])) { - var result = tokenizeNum(tokstream); - var num = result[1]; - var i = result[0]; - if (num[1] !== NaN) + result = tokenizeNum(tokstream, charnum, linenum); + num = result[1]; + i = result[0]; + if (!isNaN(num[1])) { tokens.push(num); + } tokstream = tokstream.substr(i); break; } + /* falls through */ case 116: // 't' - var result = tokenizeT(tokstream); + result = tokenizeT(tokstream); if (result) { tokens.push(result); tokstream = tokstream.substr(4); // 4 = length of either token break; } - + /* falls through */ case 105: // 'i' - var result = peek(tokstream, "ifexp", "if"); - if (result) { - tokens.push(result); + var ifexp = peek(tokstream, "ifexp", "if"); + if (ifexp) { + tokens.push(ifexp); tokstream = tokstream.substr(2); break; } + var inkeyword = peek(tokstream, "in", "in "); + if (inkeyword) { + tokens.push(inkeyword); + tokstream = tokstream.substr(3); + break; + } + /* falls through */ case 100: // 'd' - var result = peek(tokstream, "def", "def"); - if (result) { - tokens.push(result); + var defop = peek(tokstream, "defop", "defop"); + if (defop) { + tokens.push(["defop", "defop", charnum, linenum]); + tokstream = tokstream.substr(5); + break; + } + var def = peek(tokstream, "def", "def"); + if (def) { + tokens.push(["def", "def", charnum, linenum]); tokstream = tokstream.substr(3); break; } + /* falls through */ case 101: // e - var result = peek(tokstream, "elsexp", "else"); + result = peek(tokstream, "elsexp", "else"); if (result) { tokens.push(result); tokstream = tokstream.substr(4); break; } + /* falls through */ case 102: // f - var result = peek(tokstream, "falselit", "false"); + result = peek(tokstream, "falselit", "false"); if (result) { tokens.push(result); tokstream = tokstream.substr(5); break; } + /* falls through */ case 108: // l - var result = peek(tokstream, "lambda", "lambda"); - if (result) { - tokens.push(result); + lambda = peek(tokstream, "lambda", "lambda"); + if (lambda) { + tokens.push(lambda); tokstream = tokstream.substr(6); break; } + var letexp = peek(tokstream, "let", "let"); + if (letexp) { + tokens.push(letexp); + tokstream = tokstream.substr(3); + break; + } + /* falls through */ default: if (isDigit(tokstream[0])) { - var result = tokenizeNum(tokstream); - var num = result[1]; - var i = result[0]; - if (num[1] !== NaN) + result = tokenizeNum(tokstream, charnum, linenum); + num = result[1]; + i = result[0]; + if (!isNaN(num[1])) { tokens.push(num); + } tokstream = tokstream.substr(i); break; } var op = matchop(tokstream); if (op) { var l = op.length; + charnum = charnum + l; tokstream = tokstream.substr(l); - tokens.push(["identifier", op]); + tokens.push(["identifier", op, charnum, linenum]); } else { - var result = tokenizeIdent(tokstream); - result.map(function(x) { - tokens.push(x[1]); - tokstream = tokstream.substr(x[0]); - }); + result = tokenizeIdent(tokstream, matchop, charnum, linenum); + for(var index = 0; index < result.length; index++) { + charnum++; + tokens.push(result[index][1]); + tokstream = tokstream.substr(result[index][0]); + } } - } + } } return tokens; } -function tokenizeFull(input) { - return tokenize(input).reverse().filter(function(x) { - return x[0] !== "whitespace"; - }); +function tokenizeHelp(input, matchop, strip_whitespace) { + try { + return tokenize(input, matchop).reverse().filter(function(x) { + if (strip_whitespace) { + return x[0] !== "whitespace"; + } + else { + return true; + } + }); + } catch (e) { + console.log(e.stxerror()); + process.exit(1); + } } +var defop_pattern = ["defop", "integer", "identifier", + "left_paren", "identifier", + "identifier", "identifier", "right_paren"]; -module.exports = {tokenize : tokenizeFull}; - -//var tokstream = fs.readFileSync("/dev/stdin").toString(); -//console.log(tokenize(tokstream)); -//console.log(tools.buildTrie('', operators)[1][6]); -//console.log(isIdentifier(')')); -//console.log(tools.maxBy(tools.len, operators.filter(function (x) { return "#".indexOf(x) != -1;}))); -//console.log(tokenizeIdent("abc%%3")); +function tokenizeFull(input) { + var matchop = $.opMatch(operators); + var initialPass = tokenizeHelp(input, _.constant(false), true).reverse(); + for (var i = 0; i < initialPass.length; i++) { + if (initialPass.slice(i, i+8).map(_.first).every( + function(x, i) { + return x === defop_pattern[i]; + })) { + rep.OPInfo[initialPass[i+5][1]] = [parseInt(initialPass[i+1][1], 10), + initialPass[i+2][1]]; + } + } + operators = Object.keys(rep.OPInfo); + matchop = $.opMatch(operators); + return tokenizeHelp(input, matchop, true); +} +module.exports = {tokenize : tokenizeFull}; diff --git a/tools.js b/tools.js index 4da833f..5e65e34 100644 --- a/tools.js +++ b/tools.js @@ -1,157 +1,85 @@ -function identity(a) { - return a; -} +var _ = require("underscore"); -function compose(f, g) { - return function(x) { - return f(g(x)); - }; +function empty(xs) { + return _.size(xs) < 1; } function not(x) { return !x; } -function on(g, f) { - return function(x,y) { - return g(f(x), f(y)); - }; -} - -function maxf(f, a, b) { - if (f(a) >= f(b)) - return a; - return b; -} - -function max(a, b) { - if (a > b) - return 1; - else if (a < b) - return -1; - else - return 0; -} - function min(a, b) { - if (a < b) + if (a < b) { return 1; - else if (a > b) - return -1; - else - return 0; -} - -function maxBy(f, xs) { - if (xs.length < 1) - return false; - return xs.reduce(function(maxval, a) { return maxf(f, maxval, a); }); -} - -function sortBy(f, xs) { - return xs.sort(f); -} - -function len(xs) { - return xs.length; -} - -function takeWhile(f, xs) { - var result = []; - for (var i = 0; i < xs.length; i++) { - if (f(xs[i])) - result.push(xs[i]); - else - break; } - return result; -} - -function dropWhile(f, xs) { - for (i = 0; i < xs.length; i++) { - if (!f(xs[i])) - break; + else if (a > b) { + return -1; } - return xs.slice(i); -} - -function span(f, xs) { - return [takeWhile(f, xs), dropWhile(f, xs)]; -} - -function eq(a) { - return function(b) { - return a[0] === b[0]; - }; -} - -function equal(a) { - return function(b) { - return a === b; - }; -} - -function groupBy(eq, xs) { - var groups = []; - var spanned; - while (xs.length > 0) { - spanned = span(eq(xs[0]), xs.slice(1)); - groups.push([xs[0]].concat(spanned[0])); - xs = spanned[1]; + else { + return 0; } - return groups; } function groupOps(ops) { - return groupBy(eq, ops.sort()); + return _.groupBy(ops.sort(), _.isEqual); } function find(f, haystack) { for(var i = 0; i < haystack.length; i++) { - if (f(haystack[i])) + if (f(haystack[i])) { return i; + } } return false; } +function dict(pairs) { + var o = {}; + pairs.map(function(p) { + o[p[0]] = p[1]; + }); + return o; +} -/* - * Problem: - * >> > >>^ <- longest one must be matched - * regex? - */ +function extend(xs, ys) { + var result = _.clone(xs); + result.push.apply(result, ys); + return result; +} RegExp.escape= function(s) { return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); }; function operatorMatch(ops) { + ops = _.filter(ops, + function (op) { + return op.length > 0; + }); var rstring = ops.sort(min).reduce( function(acc, x) { + if (!x || x.length < 1) { + return ""; + } return acc + "(" + RegExp.escape(x) + ")|"; }, ""); var reg = new RegExp(rstring); return function(x) { var matched = reg.exec(x); - if (matched[0]) + if ((!(_.isNull(matched))) && matched[0]) { return matched[0]; - else + } + else { return false; + } }; } -/* -var print = console.log; - -var testOps = [">>", ">>&", ">", "aaaaa:", ">="]; -var matcher = operatorMatch(testOps); -print(matcher(">=")); -*/ - -module.exports = {compose : compose, - not : not, - on : on, - maxBy : maxBy, - len : len, - groupOps : groupOps, - opMatch : operatorMatch} +module.exports = { + not : not, + groupOps : groupOps, + opMatch : operatorMatch, + dict: dict, + extend : extend, + empty : empty, +}; diff --git a/typecheck.js b/typecheck.js new file mode 100644 index 0000000..7c198c6 --- /dev/null +++ b/typecheck.js @@ -0,0 +1,19 @@ +/* + * Typecheck an AST with a given environment + * the environment maps variables to types + * a variable can either be bound or free + * when we say a variable is free that means that it is either + * unbound (which causes an exception to be raised immediately) + * or it is bound in the outer scope + * + * So the AST must first be converted to a form where each function body is tied + * to an environment mapping identifiers to types + */ + +var rep = require("./representation.js"); +var env = require("./environments.js"); + +var TypeOp = rep.TypeOp; +var TypeVar = rep.TypeVar; + +