Find Cheaper University Textbooks
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

153 lines
4.0 KiB

##! /usr/bin/python2
from itertools import groupby, chain
from sys import stdout
from functools import partial
from json import dumps
def gensymer():
n = [0]
def inner():
result = str(n[0])
n[0] += 1
return result
return inner
gensym = gensymer()
def printTrie(graph, prev, trie, weight):
new_node = str(gensym())
graph.node(new_node, "%s" % trie.letter)
graph.edge(prev, new_node, label="%.2f" % weight)
if not trie.children:
return
for child, weight in zip(trie.children, trie.ws):
printTrie(graph, new_node, child, weight)
class Trie(object):
def __init__(self, letter, children, ws):
self.letter = letter
self.children = children
self.ws = ws
def probweight(suffixes):
weights = [float(s["value"]) for s in suffixes]
s = float(sum(weights))
ws = [w/s for w in weights]
return ws
def buildtrie(trie, suffixes):
"""
Build a trie, also known as a prefix tree, of all the possible completions
"""
trie.children = []
for letter, suffs in suffixes:
ped = partition(suffs)
if any(map(lambda p: p[0], ped)):
# check if there are any children
trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs)))
else:
# we've reached the end of this word so just include the final letter
# [1] = there is a probability of 1 of reaching this single leaf node,
# since it is the only possible completion here
trie.children.append(Trie(letter, [], [1]))
return trie
def keyf(x):
if not x["key"]:
return ""
return x["key"][0]
def tails(words):
for word in words:
yield {
"key" : word["key"][1:],
"value" : word["value"]
}
def partition(words):
"""
Partition the words into different prefixes based on the first character
"""
groups = [
(g[0], list(tails(g[1])))
for g in groupby(
sorted(words, key=keyf),
key=keyf)
]
return groups
def flatten_helper(letter, trie):
return ([letter + child.letter for
child in trie.children], trie.children)
def flatten(trie):
if not trie.children:
return trie.letter
prefixes, suffixes = flatten_helper(trie.letter, trie)
return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)]
def flattenlist(xs):
locs = []
for x in xs:
if not isinstance(x, list):
locs.append(x)
else:
locs.extend(flattenlist(x))
return locs
def matchc(trie, prefix):
c = None
if len(prefix) > 1:
c = prefix[0]
else:
c = prefix
return [ch for ch in trie.children if ch.letter == c]
def match(trie, word):
if not word:
return []
m = matchc(trie, word[0])
if not m:
return []
else:
return [m[0]] + match(m[0], word[1:])
def complete(trie, word):
m = match(trie, word)
if len(word) != len(m):
return False
completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))]
if len(completions) > 10:
return dumps(completions[0:10])
return dumps(completions)
def sortTrie(trie):
"""
Sort the children of each node in descending order
of the probability that each child would be the completion
of whatever that word is
"""
if not trie.children:
return
sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True)
trie.children = [x[0] for x in sortedChilds]
trie.ws = [x[1] for x in sortedChilds]
for child in trie.children:
sortTrie(child)
def toTrie(words):
for word in words:
word["key"] = word["key"].lower()
trie = buildtrie(Trie("", [], [1]), partition(words))
trie.ws = [1]*len(trie.children)
sortTrie(trie)
return trie
def testkey(w):
return {
"key" : w,
"value" : "1"
}