Browse Source

finally fixed the tokenizer

pull/21/head
Wesley Kerfoot 12 years ago
parent
commit
f57bbcce4e
  1. 4
      representation.js
  2. 30
      tokenize.js
  3. 126
      tools.js

4
representation.js

@ -165,7 +165,9 @@ OPInfo = {"+" : [3, "Left"],
"<" : [2, "Left"], "<" : [2, "Left"],
"<=" : [2, "Left"], "<=" : [2, "Left"],
":" : [2, "Left"], ":" : [2, "Left"],
"$" : [1, "Left"]} "$" : [1, "Left"],
">>" : [1, "Left"],
">>=" : [1, "Left"]}
module.exports = module.exports =
{ IntT : IntT, { IntT : IntT,

30
tokenize.js

@ -5,6 +5,8 @@ var rep = require("./representation.js");
var tools = require("./tools.js"); var tools = require("./tools.js");
var operators = Object.keys(rep.OPInfo); var operators = Object.keys(rep.OPInfo);
var matchop = tools.opMatch(operators);
function isDigit(a) { function isDigit(a) {
if (!a) if (!a)
return false; return false;
@ -76,23 +78,13 @@ function tokenizeNum(tokstream) {
function tokenizeIdent(tokstream) { function tokenizeIdent(tokstream) {
var identifier = []; var identifier = [];
var n = 0; var n = 0;
while ((!isWhitespace(tokstream[0])) && isIdentifier(tokstream[0])) { while ((!isWhitespace(tokstream[0])) && isIdentifier(tokstream[0]) && !matchop(tokstream)) {
identifier.push(tokstream[0]); identifier.push(tokstream[0]);
tokstream = tokstream.substr(1); tokstream = tokstream.substr(1);
n++; n++;
} }
identifier = identifier.join(''); identifier = identifier.join('');
/* var op = hasOp(identifier);
if (op) {
if (identifier === op)
return [[tools.len(op), ["identifier", op]]];
var splitted = identifier.split(op);
console.log(splitted);
var newIdent = splitted[0];
tokstream = splitted[1]+tokstream;
return [[n-(tools.len(op)), ["identifier", newIdent]], [tools.len(op), ["identifier", op]]];
}
*/
return [[n, ["identifier", identifier]]]; return [[n, ["identifier", identifier]]];
} }
@ -279,6 +271,13 @@ function tokenize(tokstream) {
tokstream = tokstream.substr(i); tokstream = tokstream.substr(i);
break; break;
} }
var op = matchop(tokstream);
if (op) {
var l = op.length;
tokstream = tokstream.substr(l);
tokens.push(["identifier", op]);
}
else {
var result = tokenizeIdent(tokstream); var result = tokenizeIdent(tokstream);
result.map(function(x) { result.map(function(x) {
tokens.push(x[1]); tokens.push(x[1]);
@ -286,14 +285,15 @@ function tokenize(tokstream) {
}); });
} }
} }
}
return tokens; return tokens;
} }
module.exports = {tokenize : tokenize}; module.exports = {tokenize : tokenize};
//var tokstream = fs.readFileSync("/dev/stdin").toString(); var tokstream = fs.readFileSync("/dev/stdin").toString();
//console.log(tokenize(tokstream)); console.log(tokenize(tokstream));
console.log(tools.buildTrie('', operators)[1][6]); //console.log(tools.buildTrie('', operators)[1][6]);
//console.log(isIdentifier(')')); //console.log(isIdentifier(')'));
//console.log(tools.maxBy(tools.len, operators.filter(function (x) { return "#".indexOf(x) != -1;}))); //console.log(tools.maxBy(tools.len, operators.filter(function (x) { return "#".indexOf(x) != -1;})));
//console.log(tokenizeIdent("abc%%3")); //console.log(tokenizeIdent("abc%%3"));

126
tools.js

@ -1,3 +1,7 @@
function identity(a) {
return a;
}
function compose(f, g) { function compose(f, g) {
return function(x) { return function(x) {
return f(g(x)); return f(g(x));
@ -14,16 +18,38 @@ function on(g, f) {
}; };
} }
function max(f, a, b) { function maxf(f, a, b) {
if (f(a) >= f(b)) if (f(a) >= f(b))
return a; return a;
return b; return b;
} }
function max(a, b) {
if (a > b)
return 1;
else if (a < b)
return -1;
else
return 0;
}
function min(a, b) {
if (a < b)
return 1;
else if (a > b)
return -1;
else
return 0;
}
function maxBy(f, xs) { function maxBy(f, xs) {
if (xs.length < 1) if (xs.length < 1)
return false; return false;
return xs.reduce(function(maxval, a) { return max(f, maxval, a); }); return xs.reduce(function(maxval, a) { return maxf(f, maxval, a); });
}
function sortBy(f, xs) {
return xs.sort(f);
} }
function len(xs) { function len(xs) {
@ -59,6 +85,12 @@ function eq(a) {
}; };
} }
function equal(a) {
return function(b) {
return a === b;
};
}
function groupBy(eq, xs) { function groupBy(eq, xs) {
var groups = []; var groups = [];
var spanned; var spanned;
@ -75,7 +107,7 @@ function groupOps(ops) {
} }
// See: https://en.wikipedia.org/wiki/Trie // See: https://en.wikipedia.org/wiki/Trie
function buildTrie(prefix, operators) { /*function buildTrie(prefix, operators) {
var grps = groupOps(operators); var grps = groupOps(operators);
return [prefix, grps.map( return [prefix, grps.map(
function(ops) { function(ops) {
@ -87,7 +119,7 @@ function buildTrie(prefix, operators) {
return op.slice(1); return op.slice(1);
}).filter(function (x){ return x;})); }).filter(function (x){ return x;}));
})]; })];
} }*/
function find(f, haystack) { function find(f, haystack) {
for(var i = 0; i < haystack.length; i++) { for(var i = 0; i < haystack.length; i++) {
@ -97,26 +129,98 @@ function find(f, haystack) {
return false; return false;
} }
function matchNext(c, trie) { /*function matchNext(c, trie) {
var next = find(function(path) { var next = find(function(path) {
if (path.length === 1) { if (path.length === 1) {
return path[0][0] === c; return path[0][0] === c;
} }
return path[0] === c; return path[0] === c;
}, trie); }, trie);
if (next !== false)
return trie[next]; return trie[next];
return false;
}
function trieMatch(n, str, trie) {
if (trie.length === 1 && ((typeof trie[0]) === "string")) {
if (trie[0].slice(1) === str.slice(0, trie[0].length-1))
return trie[0].length + n;
return false;
}
var matched = matchNext(str[0], trie);
if (matched && matched.length > 1)
return trieMatch(n+1, str.slice(1), matched[1]);
else if (matched)
return trieMatch(n, str.slice(1), matched)
else
return n;
} }
function trieMatch(matches, iterated, n, str, trie) {
console.log(str, trie);
iterated = iterated + str[0];
if (matchable(iterated)) {
matches.push(n);
}
var matched = matchNext(str[0], trie);
if (matched) {
return trieMatch(matches, iterated, n+1, str.slice(1), matched);
}
else
return matches;
}
var trie = buildTrie("", ["**a","**%*^", "$$"])[1]; var ops = ["**","**%a&&","**%*^", "$$", "&s"];
function matchable(x) {
return find(equal(x), ops) !== false;
}
console.log(find(function(x) { return x === "a";}, "tybabb")); var trie = buildTrie("", ["**","**%a&&","**%*^", "$$", "&s"])[1];
console.log(matchNext("%", matchNext("*", matchNext('*',trie)[1])[1])); //console.log(find(function(x) { return x === "a";}, "tybabb"));
//console.log(trie); //console.log(matchNext('*', matchNext('*',trie)[1]));
//function matchStr(n, str, trie) { //function matchStr(n, str, trie) {
//} //}
//
var str = "**%a&&345454";
var matched = trieMatch([], "", 0, str, trie);
console.log(matched);
//console.log(matchStr(0, "**^*$4545", buildTrie("", ["**", "**^"])[1])); //console.log(matchStr(0, "**^*$4545", buildTrie("", ["**", "**^"])[1]));
*/
/*
* Problem:
* >> > >>^ <- longest one must be matched
* regex?
*/
RegExp.escape= function(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
};
function operatorMatch(ops) {
var rstring = ops.sort(min).reduce(
function(acc, x) {
return acc + "(" + RegExp.escape(x) + ")|";
}, "");
var reg = new RegExp(rstring);
return function(x) {
var matched = reg.exec(x);
if (matched[0])
return matched[0];
else
return false;
};
}
/*
var print = console.log;
var testOps = [">>", ">>&", ">", "aaaaa:", ">="];
var matcher = operatorMatch(testOps);
print(matcher(">="));
*/
module.exports = {compose : compose, module.exports = {compose : compose,
not : not, not : not,
@ -124,4 +228,4 @@ module.exports = {compose : compose,
maxBy : maxBy, maxBy : maxBy,
len : len, len : len,
groupOps : groupOps, groupOps : groupOps,
buildTrie : buildTrie} opMatch : operatorMatch}

Loading…
Cancel
Save