From 976aa271514cbe8006b56c50fe86099db5ef047c Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sun, 15 Sep 2013 22:05:25 -0400 Subject: [PATCH] worked on toknizer some more --- representation.js | 2 ++ tokenize.js | 22 +++++++------ tools.js | 82 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 91 insertions(+), 15 deletions(-) diff --git a/representation.js b/representation.js index d1067a1..8fcb323 100644 --- a/representation.js +++ b/representation.js @@ -161,7 +161,9 @@ OPInfo = {"+" : [3, "Left"], "++" : [3, "Left"], "==" : [2, "Left"], ">" : [2, "Left"], + ">=" : [2, "Left"], "<" : [2, "Left"], + "<=" : [2, "Left"], ":" : [2, "Left"], "$" : [1, "Left"]} diff --git a/tokenize.js b/tokenize.js index a860b12..0bcd5af 100755 --- a/tokenize.js +++ b/tokenize.js @@ -5,8 +5,6 @@ var rep = require("./representation.js"); var tools = require("./tools.js"); var operators = Object.keys(rep.OPInfo); -hasOp = tools.hasOperator(operators); - function isDigit(a) { if (!a) return false; @@ -84,13 +82,17 @@ function tokenizeIdent(tokstream) { n++; } identifier = identifier.join(''); - var op = hasOp(identifier); +/* var op = hasOp(identifier); if (op) { + if (identifier === op) + return [[tools.len(op), ["identifier", op]]]; var splitted = identifier.split(op); + console.log(splitted); var newIdent = splitted[0]; tokstream = splitted[1]+tokstream; return [[n-(tools.len(op)), ["identifier", newIdent]], [tools.len(op), ["identifier", op]]]; } +*/ return [[n, ["identifier", identifier]]]; } @@ -278,10 +280,10 @@ function tokenize(tokstream) { break; } var result = tokenizeIdent(tokstream); - var i = result[0]; - var ident = result[1]; - tokens.push(ident); - tokstream = tokstream.substr(i); + result.map(function(x) { + tokens.push(x[1]); + tokstream = tokstream.substr(x[0]); + }); } } return tokens; @@ -290,8 +292,10 @@ function tokenize(tokstream) { module.exports = {tokenize : tokenize}; //var tokstream = fs.readFileSync("/dev/stdin").toString(); -//console.log(isIdentifier(')')); //console.log(tokenize(tokstream)); +console.log(tools.buildTrie('', operators)[1][6]); +//console.log(isIdentifier(')')); //console.log(tools.maxBy(tools.len, operators.filter(function (x) { return "#".indexOf(x) != -1;}))); -console.log(tokenizeIdent("abc%%3")); +//console.log(tokenizeIdent("abc%%3")); + diff --git a/tools.js b/tools.js index 303006a..1b38181 100644 --- a/tools.js +++ b/tools.js @@ -30,17 +30,87 @@ function len(xs) { return xs.length; } -function hasOperator(operators) { - return function (str) { - if (len(operators) < 1) - return false; - return maxBy(len, operators.filter(function (x) { return str.indexOf(x) != -1; })); +function takeWhile(f, xs) { + var result = []; + for (i = 0; i < xs.length; i++) { + if (f(xs[i])) + result.push(xs[i]); + else + break; + } + return result; +} + +function dropWhile(f, xs) { + for (i = 0; i < xs.length; i++) { + if (!f(xs[i])) + break; + } + return xs.slice(i); +} + +function span(f, xs) { + return [takeWhile(f, xs), dropWhile(f, xs)]; +} + +function eq(a) { + return function(b) { + return a[0] === b[0]; }; } +function groupBy(eq, xs) { + var groups = []; + var spanned; + while (xs.length > 0) { + spanned = span(eq(xs[0]), xs.slice(1)); + groups.push([xs[0]].concat(spanned[0])); + xs = spanned[1]; + } + return groups; +} + +function groupOps(ops) { + return groupBy(eq, ops.sort()); +} + +// See: https://en.wikipedia.org/wiki/Trie +function buildTrie(prefix, operators) { + var grps = groupOps(operators); + return [prefix, grps.map( + function(ops) { + if (ops.length === 1) + return ops; + return buildTrie(ops[0][0], + ops.map( + function(op) { + return op.slice(1); + }).filter(function (x){ return x;})); + })]; +} + +function matchStr(n, str, trie) { + if (trie[0][0].length > 1 && str.length <= 1) + return false; + else if (trie[0].every(function(x){return x[0][0] !== str[0];})) { + return false; + } + else if (trie[0].length === 1) { + if (trie[0][0] !== str.slice(n, trie[0][0].length)) + return false; + return trie[0][0].length + n; + } + else { + return matchStr(n+1, str.slice(1), trie[0][1]) + } +} + +console.log(matchStr(0, "**^*$4545", buildTrie("", ["**", "**^"])[1])); + module.exports = {compose : compose, not : not, on : on, maxBy : maxBy, len : len, - hasOperator : hasOperator} + groupOps : groupOps, + buildTrie : buildTrie}