2 changed files with 1 additions and 155 deletions
@ -1,153 +0,0 @@ |
|||
##! /usr/bin/python3 |
|||
from itertools import groupby, chain |
|||
from sys import stdout |
|||
from functools import partial |
|||
from json import dumps |
|||
|
|||
def gensymer(): |
|||
n = [0] |
|||
def inner(): |
|||
result = str(n[0]) |
|||
n[0] += 1 |
|||
return result |
|||
return inner |
|||
|
|||
gensym = gensymer() |
|||
|
|||
def printTrie(graph, prev, trie, weight): |
|||
new_node = str(gensym()) |
|||
graph.node(new_node, "%s" % trie.letter) |
|||
graph.edge(prev, new_node, label="%.2f" % weight) |
|||
if not trie.children: |
|||
return |
|||
for child, weight in zip(trie.children, trie.ws): |
|||
printTrie(graph, new_node, child, weight) |
|||
|
|||
|
|||
class Trie(object): |
|||
def __init__(self, letter, children, ws): |
|||
self.letter = letter |
|||
self.children = children |
|||
self.ws = ws |
|||
|
|||
def probweight(suffixes): |
|||
weights = [float(s["value"]) for s in suffixes] |
|||
s = float(sum(weights)) |
|||
ws = [w/s for w in weights] |
|||
return ws |
|||
|
|||
def buildtrie(trie, suffixes): |
|||
""" |
|||
Build a trie, also known as a prefix tree, of all the possible completions |
|||
""" |
|||
trie.children = [] |
|||
for letter, suffs in suffixes: |
|||
ped = partition(suffs) |
|||
if any(map(lambda p: p[0], ped)): |
|||
# check if there are any children |
|||
trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs))) |
|||
else: |
|||
# we've reached the end of this word so just include the final letter |
|||
# [1] = there is a probability of 1 of reaching this single leaf node, |
|||
# since it is the only possible completion here |
|||
trie.children.append(Trie(letter, [], [1])) |
|||
return trie |
|||
|
|||
|
|||
def keyf(x): |
|||
if not x["key"]: |
|||
return "" |
|||
return x["key"][0] |
|||
|
|||
def tails(words): |
|||
for word in words: |
|||
yield { |
|||
"key" : word["key"][1:], |
|||
"value" : word["value"] |
|||
} |
|||
|
|||
def partition(words): |
|||
""" |
|||
Partition the words into different prefixes based on the first character |
|||
""" |
|||
groups = [ |
|||
(g[0], list(tails(g[1]))) |
|||
for g in groupby( |
|||
sorted(words, key=keyf), |
|||
key=keyf) |
|||
] |
|||
return groups |
|||
|
|||
|
|||
def flatten_helper(letter, trie): |
|||
return ([letter + child.letter for |
|||
child in trie.children], trie.children) |
|||
|
|||
def flatten(trie): |
|||
if not trie.children: |
|||
return trie.letter |
|||
prefixes, suffixes = flatten_helper(trie.letter, trie) |
|||
return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)] |
|||
|
|||
def flattenlist(xs): |
|||
locs = [] |
|||
for x in xs: |
|||
if not isinstance(x, list): |
|||
locs.append(x) |
|||
else: |
|||
locs.extend(flattenlist(x)) |
|||
return locs |
|||
|
|||
def matchc(trie, prefix): |
|||
c = None |
|||
if len(prefix) > 1: |
|||
c = prefix[0] |
|||
else: |
|||
c = prefix |
|||
return [ch for ch in trie.children if ch.letter == c] |
|||
|
|||
def match(trie, word): |
|||
if not word: |
|||
return [] |
|||
m = matchc(trie, word[0]) |
|||
if not m: |
|||
return [] |
|||
else: |
|||
return [m[0]] + match(m[0], word[1:]) |
|||
|
|||
def complete(trie, word): |
|||
m = match(trie, word) |
|||
if len(word) != len(m): |
|||
return False |
|||
completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))] |
|||
if len(completions) > 10: |
|||
return dumps(completions[0:10]) |
|||
return dumps(completions) |
|||
|
|||
def sortTrie(trie): |
|||
""" |
|||
Sort the children of each node in descending order |
|||
of the probability that each child would be the completion |
|||
of whatever that word is |
|||
""" |
|||
if not trie.children: |
|||
return |
|||
sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True) |
|||
trie.children = [x[0] for x in sortedChilds] |
|||
trie.ws = [x[1] for x in sortedChilds] |
|||
for child in trie.children: |
|||
sortTrie(child) |
|||
|
|||
def toTrie(words): |
|||
for word in words: |
|||
word["key"] = word["key"].lower() |
|||
trie = buildtrie(Trie("", [], [1]), partition(words)) |
|||
trie.ws = [1]*len(trie.children) |
|||
sortTrie(trie) |
|||
return trie |
|||
|
|||
def testkey(w): |
|||
return { |
|||
"key" : w, |
|||
"value" : "1" |
|||
} |
Loading…
Reference in new issue