Browse Source

did work on tokenizer, removed a bit of reduntant code

pull/21/head
Wesley Kerfoot 12 years ago
parent
commit
86c2c5f76a
  1. 10
      parse.js
  2. 154
      tokenize.js

10
parse.js

@ -2,6 +2,7 @@
var typ = require("./representation.js"); var typ = require("./representation.js");
var tool = require("./tools.js"); var tool = require("./tools.js");
var fs = require("fs");
// Tokenization // Tokenization
@ -408,10 +409,13 @@ function pprint(expr) {
return pprintFunc(expr); return pprintFunc(expr);
} }
var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "");
var input = fs.readFileSync('/dev/stdin').toString();
//var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "");
var tokenized = tokenize(input).reverse(); var tokenized = tokenize(input).reverse();
//parse(tokenized); console.log(tokenized);
//console.log(parse(tokenized)) //console.log(parse(tokenized))
console.log(pprint(parse(tokenized))); //console.log(pprint(parse(tokenized)));
//console.log(tokenized); //console.log(tokenized);

154
tokenize.js

@ -1,115 +1,139 @@
#! /usr/bin/node #! /usr/bin/node
// Tokenization, with no regular expressions, ala Rob Pike :) var fs = require("fs");
function isDigit(a) { function isDigit(a) {
if (!a) if (!a)
return false; return false;
var code = a.charCodeAt(); var code = a.charCodeAt();
if (46 < code && code < 58 || code < 58 && code > 46) return (46 < code && code < 58 || code < 58 && code > 46);
return true;
return false;
} }
var TokenStream = { function isWhitespace(a) {
lookahead : if (!a)
function(n) { return true;
return this.tokstream[this.tokstream.length-n];
},
next :
function() {
return this.lookahead(2);
},
empty :
function() {
return this.tokstream.length === 0;
},
current :
function() {
return this.tokstream[this.tokstream.length-1];
},
pop :
function() {
this.tokstream.pop();
}
}
function MakeTokStream(tokens) { var code = a.charCodeAt();
this.tokstream = tokens; return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11);
} }
MakeTokStream.prototype = TokenStream;
function tokenizeNum(tokstream) { function tokenizeNum() {
var number = []; var number = [];
tokstream.pop(); var code = tokstream[0].charCodeAt();
while (isDigit(tokstream.current()) && !tokstream.empty()) { var isFloat = false;
number.push(tokstream.current()); // + -
tokstream.pop();
if (code === 43 || code === 45) {
number.push(tokstream[0]);
tokstream = tokstream.substr(1);
}
else if (code === 46) {
tokstream = tokstream.substr(1);
number.push('0');
number.push('.');
isFloat = true;
}
while (isDigit(tokstream[0]) && tokstream.length !== 0) {
number.push(tokstream[0]);
tokstream = tokstream.substr(1);
} }
if (tokstream.current() === '.' && isDigit(tokstream.next())) { if (tokstream[0] === '.' && isDigit(tokstream[1])) {
number.push('.'); number.push('.');
number.push(tokstream.next()); number.push(tokstream[1]);
tokstream.pop(); tokstream = tokstream.substr(2);
tokstream.pop(); while (isDigit(tokstream[0]) && tokstream.length !== 0) {
while (isDigit(tokstream.current()) && !tokstream.empty()) { number.push(tokstream[0]);
number.push(tokstream.current()); tokstream = tokstream.substr(1);
tokstream.pop();
} }
return ["Float", parseFloat(number.join(''), 10)]; return ["Float", parseFloat(number.join(''), 10)];
} }
return ["Integer", parseInt(number.join(''), 10)]; if (!isFloat)
return ["Integer", parseInt(number.join(''), 10)];
else
return ["Float", parseFloat(number.join(''), 10)];
}
function tokenizeIdent() {
var identifier = [];
while (!isWhitespace(tokstream[0])) {
identifier.push(tokstream[0]);
tokstream = tokstream.substr(1);
}
return ["identifier", identifier.join('')];
} }
function tokenize(tokstream) { function tokenize() {
var tokens = []; var tokens = [];
while (!tokstream.empty()) { while (tokstream) {
switch (tokstream.current()) { switch (tokstream[0].charCodeAt()) {
case '(': case 9: // '\t'
tokens.push(["whitespace", '\t']);
tokstream = tokstream.substr(1);
break;
case 32: // ' '
tokens.push(["whitespace", ' ']);
tokstream = tokstream.substr(1);
break;
case 10: // '\n'
tokens.push(["whitespace", '\n']);
tokstream = tokstream.substr(1);
break;
case 40: // '('
tokens.push(["left_paren", '(']); tokens.push(["left_paren", '(']);
tokstream = tokstream.substr(1);
break; break;
case ')': case 41: // ')'
tokens.push(["right_paren", ')']); tokens.push(["right_paren", ')']);
tokstream = tokstream.substr(1);
break; break;
case '{': case 123: // '{'
tokens.push(["left_brace", '{']); tokens.push(["left_brace", '{']);
tokstream = tokstream.substr(1);
break; break;
case '}': case 125: // '}'
tokens.push(["right_brace", '}']); tokens.push(["right_brace", '}']);
tokstream = tokstream.substr(1);
break; break;
case '[': case 91: // '['
tokens.push(["left_square", '[']); tokens.push(["left_square", '[']);
tokstream = tokstream.substr(1);
break; break;
case ']': case 93: // ']'
tokens.push(["right_square", ']']); tokens.push(["right_square", ']']);
tokstream = tokstream.substr(1);
break; break;
case '+': case 43: // '+'
var num = tokenizeNum(tokstream); var num = tokenizeNum();
if (num !== NaN) if (num !== NaN)
tokens.push(num); tokens.push(num);
break; break;
case '-': case 45: // '-'
var num = tokenizeNum(tokstream); var num = tokenizeNum();
if (num !== NaN) if (num !== NaN)
tokens.push(num); tokens.push(num);
break; break;
case '.': case 46: // '.'
var num = tokenizeNum(tokstream); var num = tokenizeNum();
if (num !== NaN) if (num !== NaN)
tokens.push(num); tokens.push(num);
break; break;
default: default:
tokens.push(["identifier", tokstream.current()]); if (isDigit(tokstream[0])) {
tokstream.pop(); var num = tokenizeNum();
if (num !== NaN)
tokens.push(num);
break;
}
var ident = tokenizeIdent();
tokens.push(ident);
} }
} }
return tokens; return tokens;
} }
var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim().split('').reverse(); //var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim();
var tokstream = fs.readFileSync("/dev/stdin").toString();
var test = new MakeTokStream(input);
console.log(tokenize(test));
//console.log(isDigit('0'));
console.log(tokenize());

Loading…
Cancel
Save