Browse Source

added string tokenizer

pull/21/head
Wesley Kerfoot 12 years ago
parent
commit
1883c370ab
  1. 89
      tokenize.js

89
tokenize.js

@ -17,18 +17,20 @@ function isWhitespace(a) {
return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11); return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11);
} }
function tokenizeNum() { function tokenizeNum(tokstream) {
var number = []; var number = [];
var code = tokstream[0].charCodeAt(); var code = tokstream[0].charCodeAt();
var isFloat = false; var isFloat = false;
var n = 0;
// + - // + -
if (code === 43 || code === 45) { if (code === 43 || code === 45) {
number.push(tokstream[0]); number.push(tokstream[0]);
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
n++;
} }
else if (code === 46) { else if (code === 46) {
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
n++;
number.push('0'); number.push('0');
number.push('.'); number.push('.');
isFloat = true; isFloat = true;
@ -37,33 +39,56 @@ function tokenizeNum() {
while (isDigit(tokstream[0]) && tokstream.length !== 0) { while (isDigit(tokstream[0]) && tokstream.length !== 0) {
number.push(tokstream[0]); number.push(tokstream[0]);
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
n++;
} }
if (tokstream[0] === '.' && isDigit(tokstream[1])) { if (tokstream[0] === '.' && isDigit(tokstream[1])) {
number.push('.'); number.push('.');
number.push(tokstream[1]); number.push(tokstream[1]);
tokstream = tokstream.substr(2); tokstream = tokstream.substr(2);
n++; n++;
while (isDigit(tokstream[0]) && tokstream.length !== 0) { while (isDigit(tokstream[0]) && tokstream.length !== 0) {
number.push(tokstream[0]); number.push(tokstream[0]);
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
n++;
} }
return ["Float", parseFloat(number.join(''), 10)]; return [n, ["Float", parseFloat(number.join(''), 10)]];
} }
if (!isFloat) if (!isFloat)
return ["Integer", parseInt(number.join(''), 10)]; return [n, ["Integer", parseInt(number.join(''), 10)]];
else else
return ["Float", parseFloat(number.join(''), 10)]; return [n, ["Float", parseFloat(number.join(''), 10)]];
} }
function tokenizeIdent() { function tokenizeIdent(tokstream) {
var identifier = []; var identifier = [];
while (!isWhitespace(tokstream[0])) { var n = 0;
while (!(isWhitespace(tokstream[0]) || tokstream[0].charCodeAt() === 34)) {
identifier.push(tokstream[0]); identifier.push(tokstream[0]);
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
n++;
}
return [n, ["identifier", identifier.join('')]];
}
function tokenizeStr(tokstream) {
var stringlit = [];
var n = 1;
tokstream = tokstream.substr(1);
while (tokstream[0].charCodeAt() !== 34) {
stringlit.push(tokstream[0]);
tokstream = tokstream.substr(1);
n++;
if (tokstream.length < 1) {
console.log("Error: missing quotation mark");
process.exit(code=1);
} }
return ["identifier", identifier.join('')];
} }
n++;
return [n, ["stringlit", stringlit.join('')]];
function tokenize() { }
function tokenize(tokstream) {
var tokens = []; var tokens = [];
while (tokstream) { while (tokstream) {
@ -104,36 +129,60 @@ function tokenize() {
tokens.push(["right_square", ']']); tokens.push(["right_square", ']']);
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
break; break;
case 34: // '"'
var result = tokenizeStr(tokstream);
var str = result[1];
var i = result[0];
tokens.push(str);
tokstream = tokstream.substr(i);
break;
case 43: // '+' case 43: // '+'
var num = tokenizeNum(); var result = tokenizeNum(tokstream);
if (num !== NaN) var num = result[1];
var i = result[0];
if (num[1] !== NaN)
tokens.push(num); tokens.push(num);
tokstream = tokstream.substr(i);
break; break;
case 45: // '-' case 45: // '-'
var num = tokenizeNum(); var result = tokenizeNum(tokstream);
if (num !== NaN) var num = result[1];
var i = result[0];
if (num[1] !== NaN)
tokens.push(num); tokens.push(num);
tokstream = tokstream.substr(i);
break; break;
case 46: // '.' case 46: // '.'
var num = tokenizeNum(); var result = tokenizeNum(tokstream);
if (num !== NaN) var num = result[1];
var i = result[0];
if (num[1] !== NaN)
tokens.push(num); tokens.push(num);
tokstream = tokstream.substr(i);
break; break;
default: default:
if (isDigit(tokstream[0])) { if (isDigit(tokstream[0])) {
var num = tokenizeNum(); var result = tokenizeNum(tokstream);
if (num !== NaN) var num = result[1];
var i = result[0];
if (num[1] !== NaN)
tokens.push(num); tokens.push(num);
tokstream = tokstream.substr(i);
break; break;
} }
var ident = tokenizeIdent(); var result = tokenizeIdent(tokstream);
var i = result[0];
var ident = result[1];
tokens.push(ident); tokens.push(ident);
tokstream = tokstream.substr(i);
} }
} }
return tokens; return tokens;
} }
//var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim();
var tokstream = fs.readFileSync("/dev/stdin").toString(); var tokstream = fs.readFileSync("/dev/stdin").toString();
console.log(tokenize()); //console.log(tokenize(tokstream));
tokenize(tokstream);

Loading…
Cancel
Save