An experiment in parentheses-free lisp (in JavaScript)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

369 lines
10 KiB

#! /usr/bin/node
var rep = require("./representation.js");
var tools = require("./tools.js");
var error = require("./errors.js");
var operators = Object.keys(rep.OPInfo);
function isDigit(a) {
if (!a)
return false;
var code = a.charCodeAt();
return (46 < code && code < 58 || code < 58 && code > 46);
}
function isWhitespace(a) {
if (!a)
return true;
var code = a.charCodeAt();
return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11);
}
function isIdentifier(a) {
var code = a.charCodeAt();
return code !== 41 && code !== 40 && code && 125 && code && 123 && code !== 93 && code !== 91 && code !== 44;
}
function tokenizeNum(tokstream, charnum, linenum) {
var number = [];
var code = tokstream[0].charCodeAt();
var isFloat = false;
var n = 0;
// + -
// might want to remove this since it probably won't ever get run?
if (code === 43 || code === 45) { // + or -
number.push(tokstream[0]);
tokstream = tokstream.substr(1);
n++;
}
else if (code === 46) { // .
tokstream = tokstream.substr(1);
n++;
charnum++;
number.push('0');
number.push('.');
isFloat = true;
}
while (isDigit(tokstream[0]) && tokstream.length !== 0) {
number.push(tokstream[0]);
tokstream = tokstream.substr(1);
charnum++;
n++;
}
if (tokstream[0] === '.' && isDigit(tokstream[1])) {
number.push('.');
number.push(tokstream[1]);
tokstream = tokstream.substr(2);
charnum++; charnum++;
n++; n++;
while (isDigit(tokstream[0]) && tokstream.length !== 0) {
number.push(tokstream[0]);
tokstream = tokstream.substr(1);
n++;
charnum++;
}
return [n, ["float", parseFloat(number.join(''), 10), charnum, linenum]];
}
if (!isFloat)
return [n, ["integer", parseInt(number.join(''), 10), charnum, linenum]];
else
return [n, ["float", parseFloat(number.join(''), 10), charnum, linenum]];
}
/* Split up the tokenized identifier if an operator appears in it
* prefer longer identifiers that start with the same character(s) as shorter ones
* e.g. ++ over +
* Everything after the operator goes back on to the token stream
*/
function tokenizeIdent(tokstream, matchop, charnum, linenum) {
var identifier = [];
var n = 0;
while ((!isWhitespace(tokstream[0])) && isIdentifier(tokstream[0]) && !matchop(tokstream)) {
identifier.push(tokstream[0]);
tokstream = tokstream.substr(1);
n++;
charnum++;
}
identifier = identifier.join('');
return [[n, ["identifier", identifier, charnum, linenum]]];
}
function tokenizeStr(tokstream, charnum, linenum) {
var stringlit = [];
var n = 1;
tokstream = tokstream.substr(1);
while (tokstream[0].charCodeAt() !== 34) {
stringlit.push(tokstream[0]);
tokstream = tokstream.substr(1);
n++;
charnum++;
if (tokstream.length < 1) {
throw error.JSyntaxError(linenum, charnum, "Error: missing quotation mark");
}
}
n++;
return [n, ["stringlit", stringlit.join(''), charnum, linenum]];
}
function tokenizeT(tokstream, charnum, linenum) {
if (tokstream.length < 4)
return false;
var next4 = tokstream.substr(0,4);
if (next4 === "then")
return ["thenexp", "then"];
else if (next4 === "true")
return ["truelit", "true"];
return false;
}
function peek(tokstream, toktype, word, charnum, linenum) {
var n = word.length;
if (tokstream.length < n)
return false;
var nextN = tokstream.substr(0, n);
if (nextN == word) {
return [toktype, word];
}
return false;
}
function tokenize(tokstream, matchop) {
var tokens = [];
var charnum = 1;
var linenum = 1;
var i, result, lambda, num;
while (tokstream) {
switch (tokstream[0].charCodeAt()) {
/* falls through */
case 9: // '\t'
charnum++;
tokens.push(["whitespace", '\t', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 32: // ' '
charnum++;
tokens.push(["whitespace", ' ', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 10: // '\n'
linenum++;
charnum = 1;
tokens.push(["whitespace", '\n', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 44: // ','
charnum++;
tokens.push(["comma", ",", charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 40: // '('
charnum++;
tokens.push(["left_paren", '(', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 41: // ')'
charnum++;
tokens.push(["right_paren", ')', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 123: // '{'
charnum++;
tokens.push(["left_brace", '{', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 125: // '}'
charnum++;
tokens.push(["right_brace", '}', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 91: // '['
charnum++;
tokens.push(["left_square", '[', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 93: // ']'
charnum++;
tokens.push(["right_square", ']', charnum, linenum]);
tokstream = tokstream.substr(1);
break;
/* falls through */
case 34: // '"'
result = tokenizeStr(tokstream, charnum, linenum);
var str = result[1];
i = result[0];
tokens.push(str);
tokstream = tokstream.substr(i);
break;
/* falls through */
case 45: // '-'
lambda = peek(tokstream, "arrow", "->");
if (lambda) {
tokens.push(lambda);
tokstream = tokstream.substr(2);
break;
}
else {
tokens.push(["identifier", "-", charnum, linenum]);
charnum++;
tokstream = tokstream.substr(1);
break;
}
/* falls through */
case 46: // '.'
if (isDigit(tokstream[1])) {
result = tokenizeNum(tokstream, charnum, linenum);
num = result[1];
i = result[0];
if (!isNaN(num[1])) {
tokens.push(num);
}
tokstream = tokstream.substr(i);
break;
}
/* falls through */
case 116: // 't'
result = tokenizeT(tokstream);
if (result) {
tokens.push(result);
tokstream = tokstream.substr(4); // 4 = length of either token
break;
}
/* falls through */
case 105: // 'i'
var ifexp = peek(tokstream, "ifexp", "if");
if (ifexp) {
tokens.push(ifexp);
tokstream = tokstream.substr(2);
break;
}
var inkeyword = peek(tokstream, "in", "in ");
if (inkeyword) {
tokens.push(inkeyword);
tokstream = tokstream.substr(3);
break;
}
/* falls through */
case 100: // 'd'
var defop = peek(tokstream, "defop", "defop");
if (defop) {
tokens.push(["defop", "defop", charnum, linenum]);
tokstream = tokstream.substr(5);
break;
}
var def = peek(tokstream, "def", "def");
if (def) {
tokens.push(["def", "def", charnum, linenum]);
tokstream = tokstream.substr(3);
break;
}
/* falls through */
case 101: // e
result = peek(tokstream, "elsexp", "else");
if (result) {
tokens.push(result);
tokstream = tokstream.substr(4);
break;
}
/* falls through */
case 102: // f
result = peek(tokstream, "falselit", "false");
if (result) {
tokens.push(result);
tokstream = tokstream.substr(5);
break;
}
/* falls through */
case 108: // l
lambda = peek(tokstream, "lambda", "lambda");
if (lambda) {
tokens.push(lambda);
tokstream = tokstream.substr(6);
break;
}
var letexp = peek(tokstream, "let", "let");
if (letexp) {
tokens.push(letexp);
tokstream = tokstream.substr(3);
break;
}
/* falls through */
default:
if (isDigit(tokstream[0])) {
result = tokenizeNum(tokstream, charnum, linenum);
num = result[1];
i = result[0];
if (!isNaN(num[1])) {
tokens.push(num);
}
tokstream = tokstream.substr(i);
break;
}
var op = matchop(tokstream);
if (op) {
var l = op.length;
charnum = charnum + l;
tokstream = tokstream.substr(l);
tokens.push(["identifier", op, charnum, linenum]);
}
else {
result = tokenizeIdent(tokstream, matchop, charnum, linenum);
for(var index = 0; index < result.length; index++) {
charnum++;
tokens.push(result[index][1]);
tokstream = tokstream.substr(result[index][0]);
}
}
}
}
return tokens;
}
function tokenizeHelp(input, matchop) {
try {
return tokenize(input, matchop).reverse().filter(function(x) {
return x[0] !== "whitespace";
});
} catch (e) {
console.log(e.stxerror());
process.exit(1);
}
}
function tokenizeFull(input) {
var matchop = tools.opMatch(operators);
var initialPass = tokenizeHelp(input, matchop).reverse();
for (var i = 0; i < initialPass.length; i++) {
if (initialPass.slice(i, i+7).map(tools.fst) ===
["defop", "integer", "identifier", "Left", "Right",
"left_paren", "identifier", "identifier", "identifier",
"right_paren"])
rep.OPInfo[initialPass[i+5][1]] = [parseInt(initialPass[i+1][1], 10),
initialPass[i+2][1]];
}
operators = Object.keys(rep.OPInfo);
matchop = tools.opMatch(operators);
return tokenizeHelp(input, matchop);
}
module.exports = {tokenize : tokenizeFull};