From 1883c370ab085d92ed3b2ef8e3b484fc3ce43328 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 17 Aug 2013 02:20:48 -0400 Subject: [PATCH] added string tokenizer --- tokenize.js | 91 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/tokenize.js b/tokenize.js index a15951c..8306c26 100755 --- a/tokenize.js +++ b/tokenize.js @@ -17,18 +17,20 @@ function isWhitespace(a) { return (code === 9 || code === 32 || code === 10 || code === 13 || code === 11); } -function tokenizeNum() { +function tokenizeNum(tokstream) { var number = []; var code = tokstream[0].charCodeAt(); var isFloat = false; + var n = 0; // + - - if (code === 43 || code === 45) { number.push(tokstream[0]); tokstream = tokstream.substr(1); + n++; } else if (code === 46) { tokstream = tokstream.substr(1); + n++; number.push('0'); number.push('.'); isFloat = true; @@ -37,33 +39,56 @@ function tokenizeNum() { while (isDigit(tokstream[0]) && tokstream.length !== 0) { number.push(tokstream[0]); tokstream = tokstream.substr(1); + n++; } if (tokstream[0] === '.' && isDigit(tokstream[1])) { number.push('.'); number.push(tokstream[1]); tokstream = tokstream.substr(2); + n++; n++; while (isDigit(tokstream[0]) && tokstream.length !== 0) { number.push(tokstream[0]); tokstream = tokstream.substr(1); + n++; } - return ["Float", parseFloat(number.join(''), 10)]; + return [n, ["Float", parseFloat(number.join(''), 10)]]; } if (!isFloat) - return ["Integer", parseInt(number.join(''), 10)]; + return [n, ["Integer", parseInt(number.join(''), 10)]]; else - return ["Float", parseFloat(number.join(''), 10)]; + return [n, ["Float", parseFloat(number.join(''), 10)]]; } -function tokenizeIdent() { +function tokenizeIdent(tokstream) { var identifier = []; - while (!isWhitespace(tokstream[0])) { + var n = 0; + while (!(isWhitespace(tokstream[0]) || tokstream[0].charCodeAt() === 34)) { identifier.push(tokstream[0]); tokstream = tokstream.substr(1); + n++; + } + return [n, ["identifier", identifier.join('')]]; +} + +function tokenizeStr(tokstream) { + var stringlit = []; + var n = 1; + tokstream = tokstream.substr(1); + while (tokstream[0].charCodeAt() !== 34) { + stringlit.push(tokstream[0]); + tokstream = tokstream.substr(1); + n++; + if (tokstream.length < 1) { + console.log("Error: missing quotation mark"); + process.exit(code=1); + } } - return ["identifier", identifier.join('')]; + n++; + return [n, ["stringlit", stringlit.join('')]]; + } -function tokenize() { +function tokenize(tokstream) { var tokens = []; while (tokstream) { @@ -104,36 +129,60 @@ function tokenize() { tokens.push(["right_square", ']']); tokstream = tokstream.substr(1); break; + case 34: // '"' + var result = tokenizeStr(tokstream); + var str = result[1]; + var i = result[0]; + tokens.push(str); + tokstream = tokstream.substr(i); + break; + case 43: // '+' - var num = tokenizeNum(); - if (num !== NaN) + var result = tokenizeNum(tokstream); + var num = result[1]; + var i = result[0]; + if (num[1] !== NaN) tokens.push(num); + tokstream = tokstream.substr(i); break; case 45: // '-' - var num = tokenizeNum(); - if (num !== NaN) + var result = tokenizeNum(tokstream); + var num = result[1]; + var i = result[0]; + if (num[1] !== NaN) tokens.push(num); + tokstream = tokstream.substr(i); break; case 46: // '.' - var num = tokenizeNum(); - if (num !== NaN) + var result = tokenizeNum(tokstream); + var num = result[1]; + var i = result[0]; + if (num[1] !== NaN) tokens.push(num); + tokstream = tokstream.substr(i); break; default: if (isDigit(tokstream[0])) { - var num = tokenizeNum(); - if (num !== NaN) + var result = tokenizeNum(tokstream); + var num = result[1]; + var i = result[0]; + if (num[1] !== NaN) tokens.push(num); - break; + tokstream = tokstream.substr(i); + break; } - var ident = tokenizeIdent(); + var result = tokenizeIdent(tokstream); + var i = result[0]; + var ident = result[1]; tokens.push(ident); + tokstream = tokstream.substr(i); } } return tokens; } -//var input = process.argv.slice(2).reduce(function(acc, x) {return acc + " " + x}, "").trim(); var tokstream = fs.readFileSync("/dev/stdin").toString(); -console.log(tokenize()); +//console.log(tokenize(tokstream)); +tokenize(tokstream); +