2 changed files with 96 additions and 68 deletions
@ -1,115 +1,139 @@ |
|||||
#! /usr/bin/node |
#! /usr/bin/node |
||||
|
|
||||
// Tokenization, with no regular expressions, ala Rob Pike :)
|
var fs = require("fs"); |
||||
|
|
||||
function isDigit(a) { |
function isDigit(a) { |
||||
if (!a) |
if (!a) |
||||
return false; |
return false; |
||||
var code = a.charCodeAt(); |
var code = a.charCodeAt(); |
||||
if (46 < code && code < 58 || code < 58 && code > 46) |
return (46 < code && code < 58 || code < 58 && code > 46); |
||||
return true; |
|
||||
return false; |
|
||||
} |
} |
||||
|
|
||||
var TokenStream = { |
function isWhitespace(a) { |
||||
lookahead : |
if (!a) |
||||
function(n) { |
return true; |
||||
return this.tokstream[this.tokstream.length-n]; |
|
||||
}, |
|
||||
next : |
|
||||
function() { |
|
||||
return this.lookahead(2); |
|
||||
}, |
|
||||
empty : |
|
||||
function() { |
|
||||
return this.tokstream.length === 0; |
|
||||
}, |
|
||||
current : |
|
||||
function() { |
|
||||
return this.tokstream[this.tokstream.length-1]; |
|
||||
}, |
|
||||
pop : |
|
||||
function() { |
|
||||
this.tokstream.pop(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
function MakeTokStream(tokens) { |
var code = a.charCodeAt(); |
||||
this.tokstream = tokens; |
return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11); |
||||
} |
} |
||||
MakeTokStream.prototype = TokenStream; |
|
||||
|
|
||||
function tokenizeNum(tokstream) { |
function tokenizeNum() { |
||||
var number = []; |
var number = []; |
||||
tokstream.pop(); |
var code = tokstream[0].charCodeAt(); |
||||
while (isDigit(tokstream.current()) && !tokstream.empty()) { |
var isFloat = false; |
||||
number.push(tokstream.current()); |
// + -
|
||||
tokstream.pop(); |
|
||||
|
if (code === 43 || code === 45) { |
||||
|
number.push(tokstream[0]); |
||||
|
tokstream = tokstream.substr(1); |
||||
|
} |
||||
|
else if (code === 46) { |
||||
|
tokstream = tokstream.substr(1); |
||||
|
number.push('0'); |
||||
|
number.push('.'); |
||||
|
isFloat = true; |
||||
|
} |
||||
|
|
||||
|
while (isDigit(tokstream[0]) && tokstream.length !== 0) { |
||||
|
number.push(tokstream[0]); |
||||
|
tokstream = tokstream.substr(1); |
||||
} |
} |
||||
if (tokstream.current() === '.' && isDigit(tokstream.next())) { |
if (tokstream[0] === '.' && isDigit(tokstream[1])) { |
||||
number.push('.'); |
number.push('.'); |
||||
number.push(tokstream.next()); |
number.push(tokstream[1]); |
||||
tokstream.pop(); |
tokstream = tokstream.substr(2); |
||||
tokstream.pop(); |
while (isDigit(tokstream[0]) && tokstream.length !== 0) { |
||||
while (isDigit(tokstream.current()) && !tokstream.empty()) { |
number.push(tokstream[0]); |
||||
number.push(tokstream.current()); |
tokstream = tokstream.substr(1); |
||||
tokstream.pop(); |
|
||||
} |
} |
||||
return ["Float", parseFloat(number.join(''), 10)]; |
return ["Float", parseFloat(number.join(''), 10)]; |
||||
} |
} |
||||
return ["Integer", parseInt(number.join(''), 10)]; |
if (!isFloat) |
||||
|
return ["Integer", parseInt(number.join(''), 10)]; |
||||
|
else |
||||
|
return ["Float", parseFloat(number.join(''), 10)]; |
||||
|
} |
||||
|
|
||||
|
function tokenizeIdent() { |
||||
|
var identifier = []; |
||||
|
while (!isWhitespace(tokstream[0])) { |
||||
|
identifier.push(tokstream[0]); |
||||
|
tokstream = tokstream.substr(1); |
||||
|
} |
||||
|
return ["identifier", identifier.join('')]; |
||||
} |
} |
||||
|
|
||||
function tokenize(tokstream) { |
function tokenize() { |
||||
var tokens = []; |
var tokens = []; |
||||
|
|
||||
while (!tokstream.empty()) { |
while (tokstream) { |
||||
switch (tokstream.current()) { |
switch (tokstream[0].charCodeAt()) { |
||||
case '(': |
case 9: // '\t'
|
||||
|
tokens.push(["whitespace", '\t']); |
||||
|
tokstream = tokstream.substr(1); |
||||
|
break; |
||||
|
case 32: // ' '
|
||||
|
tokens.push(["whitespace", ' ']); |
||||
|
tokstream = tokstream.substr(1); |
||||
|
break; |
||||
|
case 10: // '\n'
|
||||
|
tokens.push(["whitespace", '\n']); |
||||
|
tokstream = tokstream.substr(1); |
||||
|
break; |
||||
|
case 40: // '('
|
||||
tokens.push(["left_paren", '(']); |
tokens.push(["left_paren", '(']); |
||||
|
tokstream = tokstream.substr(1); |
||||
break; |
break; |
||||
case ')': |
case 41: // ')'
|
||||
tokens.push(["right_paren", ')']); |
tokens.push(["right_paren", ')']); |
||||
|
tokstream = tokstream.substr(1); |
||||
break; |
break; |
||||
case '{': |
case 123: // '{'
|
||||
tokens.push(["left_brace", '{']); |
tokens.push(["left_brace", '{']); |
||||
|
tokstream = tokstream.substr(1); |
||||
break; |
break; |
||||
case '}': |
case 125: // '}'
|
||||
tokens.push(["right_brace", '}']); |
tokens.push(["right_brace", '}']); |
||||
|
tokstream = tokstream.substr(1); |
||||
break; |
break; |
||||
case '[': |
case 91: // '['
|
||||
tokens.push(["left_square", '[']); |
tokens.push(["left_square", '[']); |
||||
|
tokstream = tokstream.substr(1); |
||||
break; |
break; |
||||
case ']': |
case 93: // ']'
|
||||
tokens.push(["right_square", ']']); |
tokens.push(["right_square", ']']); |
||||
|
tokstream = tokstream.substr(1); |
||||
break; |
break; |
||||
case '+': |
case 43: // '+'
|
||||
var num = tokenizeNum(tokstream); |
var num = tokenizeNum(); |
||||
if (num !== NaN) |
if (num !== NaN) |
||||
tokens.push(num); |
tokens.push(num); |
||||
break; |
break; |
||||
case '-': |
case 45: // '-'
|
||||
var num = tokenizeNum(tokstream); |
var num = tokenizeNum(); |
||||
if (num !== NaN) |
if (num !== NaN) |
||||
tokens.push(num); |
tokens.push(num); |
||||
break; |
break; |
||||
case '.': |
case 46: // '.'
|
||||
var num = tokenizeNum(tokstream); |
var num = tokenizeNum(); |
||||
if (num !== NaN) |
if (num !== NaN) |
||||
tokens.push(num); |
tokens.push(num); |
||||
break; |
break; |
||||
default: |
default: |
||||
tokens.push(["identifier", tokstream.current()]); |
if (isDigit(tokstream[0])) { |
||||
tokstream.pop(); |
var num = tokenizeNum(); |
||||
|
if (num !== NaN) |
||||
|
tokens.push(num); |
||||
|
break; |
||||
|
} |
||||
|
var ident = tokenizeIdent(); |
||||
|
tokens.push(ident); |
||||
} |
} |
||||
} |
} |
||||
return tokens; |
return tokens; |
||||
} |
} |
||||
|
|
||||
var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim().split('').reverse(); |
//var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim();
|
||||
|
var tokstream = fs.readFileSync("/dev/stdin").toString(); |
||||
var test = new MakeTokStream(input); |
|
||||
|
|
||||
console.log(tokenize(test)); |
|
||||
//console.log(isDigit('0'));
|
|
||||
|
|
||||
|
console.log(tokenize()); |
||||
|
Loading…
Reference in new issue