From 86c2c5f76a07279fc7eeef6f56bd285102fc98dc Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Thu, 15 Aug 2013 22:24:46 -0400 Subject: [PATCH] did work on tokenizer, removed a bit of reduntant code --- parse.js | 10 +++- tokenize.js | 154 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 96 insertions(+), 68 deletions(-) diff --git a/parse.js b/parse.js index 39aa335..e97432f 100755 --- a/parse.js +++ b/parse.js @@ -2,6 +2,7 @@ var typ = require("./representation.js"); var tool = require("./tools.js"); +var fs = require("fs"); // Tokenization @@ -408,10 +409,13 @@ function pprint(expr) { return pprintFunc(expr); } -var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, ""); + + +var input = fs.readFileSync('/dev/stdin').toString(); +//var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, ""); var tokenized = tokenize(input).reverse(); -//parse(tokenized); +console.log(tokenized); //console.log(parse(tokenized)) -console.log(pprint(parse(tokenized))); +//console.log(pprint(parse(tokenized))); //console.log(tokenized); diff --git a/tokenize.js b/tokenize.js index fcf87e3..a15951c 100755 --- a/tokenize.js +++ b/tokenize.js @@ -1,115 +1,139 @@ #! /usr/bin/node -// Tokenization, with no regular expressions, ala Rob Pike :) +var fs = require("fs"); function isDigit(a) { if (!a) return false; var code = a.charCodeAt(); - if (46 < code && code < 58 || code < 58 && code > 46) - return true; - return false; + return (46 < code && code < 58 || code < 58 && code > 46); } -var TokenStream = { - lookahead : - function(n) { - return this.tokstream[this.tokstream.length-n]; - }, - next : - function() { - return this.lookahead(2); - }, - empty : - function() { - return this.tokstream.length === 0; - }, - current : - function() { - return this.tokstream[this.tokstream.length-1]; - }, - pop : - function() { - this.tokstream.pop(); - } -} +function isWhitespace(a) { + if (!a) + return true; -function MakeTokStream(tokens) { - this.tokstream = tokens; + var code = a.charCodeAt(); + return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11); } -MakeTokStream.prototype = TokenStream; -function tokenizeNum(tokstream) { +function tokenizeNum() { var number = []; - tokstream.pop(); - while (isDigit(tokstream.current()) && !tokstream.empty()) { - number.push(tokstream.current()); - tokstream.pop(); + var code = tokstream[0].charCodeAt(); + var isFloat = false; + // + - + + if (code === 43 || code === 45) { + number.push(tokstream[0]); + tokstream = tokstream.substr(1); + } + else if (code === 46) { + tokstream = tokstream.substr(1); + number.push('0'); + number.push('.'); + isFloat = true; + } + + while (isDigit(tokstream[0]) && tokstream.length !== 0) { + number.push(tokstream[0]); + tokstream = tokstream.substr(1); } - if (tokstream.current() === '.' && isDigit(tokstream.next())) { + if (tokstream[0] === '.' && isDigit(tokstream[1])) { number.push('.'); - number.push(tokstream.next()); - tokstream.pop(); - tokstream.pop(); - while (isDigit(tokstream.current()) && !tokstream.empty()) { - number.push(tokstream.current()); - tokstream.pop(); + number.push(tokstream[1]); + tokstream = tokstream.substr(2); + while (isDigit(tokstream[0]) && tokstream.length !== 0) { + number.push(tokstream[0]); + tokstream = tokstream.substr(1); } return ["Float", parseFloat(number.join(''), 10)]; } - return ["Integer", parseInt(number.join(''), 10)]; + if (!isFloat) + return ["Integer", parseInt(number.join(''), 10)]; + else + return ["Float", parseFloat(number.join(''), 10)]; +} + +function tokenizeIdent() { + var identifier = []; + while (!isWhitespace(tokstream[0])) { + identifier.push(tokstream[0]); + tokstream = tokstream.substr(1); + } + return ["identifier", identifier.join('')]; } -function tokenize(tokstream) { +function tokenize() { var tokens = []; - while (!tokstream.empty()) { - switch (tokstream.current()) { - case '(': + while (tokstream) { + switch (tokstream[0].charCodeAt()) { + case 9: // '\t' + tokens.push(["whitespace", '\t']); + tokstream = tokstream.substr(1); + break; + case 32: // ' ' + tokens.push(["whitespace", ' ']); + tokstream = tokstream.substr(1); + break; + case 10: // '\n' + tokens.push(["whitespace", '\n']); + tokstream = tokstream.substr(1); + break; + case 40: // '(' tokens.push(["left_paren", '(']); + tokstream = tokstream.substr(1); break; - case ')': + case 41: // ')' tokens.push(["right_paren", ')']); + tokstream = tokstream.substr(1); break; - case '{': + case 123: // '{' tokens.push(["left_brace", '{']); + tokstream = tokstream.substr(1); break; - case '}': + case 125: // '}' tokens.push(["right_brace", '}']); + tokstream = tokstream.substr(1); break; - case '[': + case 91: // '[' tokens.push(["left_square", '[']); + tokstream = tokstream.substr(1); break; - case ']': + case 93: // ']' tokens.push(["right_square", ']']); + tokstream = tokstream.substr(1); break; - case '+': - var num = tokenizeNum(tokstream); + case 43: // '+' + var num = tokenizeNum(); if (num !== NaN) tokens.push(num); break; - case '-': - var num = tokenizeNum(tokstream); + case 45: // '-' + var num = tokenizeNum(); if (num !== NaN) tokens.push(num); break; - case '.': - var num = tokenizeNum(tokstream); + case 46: // '.' + var num = tokenizeNum(); if (num !== NaN) tokens.push(num); break; default: - tokens.push(["identifier", tokstream.current()]); - tokstream.pop(); + if (isDigit(tokstream[0])) { + var num = tokenizeNum(); + if (num !== NaN) + tokens.push(num); + break; + } + var ident = tokenizeIdent(); + tokens.push(ident); } } return tokens; } -var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim().split('').reverse(); - -var test = new MakeTokStream(input); - -console.log(tokenize(test)); -//console.log(isDigit('0')); +//var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim(); +var tokstream = fs.readFileSync("/dev/stdin").toString(); +console.log(tokenize());