Revert "Explain the purpose of the Racket files"

This reverts commit dc1d0c87b2.
9 years ago · a225b94832
24 changed files with 2062 additions and 8 deletions
--- a/archive.py
+++ b/archive.py
@ -0,0 +1,34 @@
 #! /usr/bin/python2
 from urllib import quote
 from json import loads, dumps
 import requests as req
 searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw"
 def searchIA(title, author):
    """
    Do a search on The Internet Archive for a book
    """
    print "running a search"
    requrl = searchUrl.format(quote(title + " " + author))
    try:
        results = loads(req.get(requrl).text[9:][0:-1])
    except ValueError:
        return []
    rownum = results["responseHeader"]["params"]["rows"]
    if rownum < 1:
        print "Couldn't find results for %s %s" % (title, author)
        return []
    docs = results["response"]["docs"]
    urls = []
    for result in results["response"]["docs"][0:3]:
        urls.append("https://archive.org/details/%s" % result["identifier"])
    return urls
 # Example, search for David Hume's Enquiry Concerning Human Understanding
 #for url in searchIA("Hume", "Enquiry Concerning Human Understanding"):
    #print url
--- a/course_mapping.rkt
+++ b/course_mapping.rkt
@ -1,11 +1,5 @@
 #! /usr/bin/racket
 #lang racket
 ; This file is used to generate the mapping for elasticsearch
 ; It is written in Racket (a dialect of Scheme)
 ; It will not be necessary to run unless you want to change the elasticsearch mapping
 ; This may be necessary if you have fields you want to add, or need some other customization
 ; You may also edit the JSON mapping directly, or use whatever tool you want to edit the mapping with
 (require "schemadsl.rkt")
 (displayln
--- a/database.py
+++ b/database.py
@ -0,0 +1,62 @@
 #! /usr/bin/python2
 from sys import argv
 from hashlib import sha1
 def truncate(docid):
    """
    Truncate a document id to 12 digits
    The document ID should be based on a
    hash of unique identifiers
    """
    return int(str(docid)[0:12])
 def createResource(textbookInfo, course, dept, coursecode, docid):
    """
    Create a document associated with a course
    This document contains any/all resources associated
    with that course
    example,
    {
     'books': [],
     'dept': 'COLLAB',
     'code': '2C03',
     'sections': [
                    {
                     'prof': 'Lisa Pender',
                     'sem': '2015/09/08 - 2015/12/08',
                     'day': 'Mo'
                     },
                     {
                      'prof': 'Staff',
                      'sem': '2015/09/08 - 2015/12/08',
                      'day': 'Th'
                      }
                  ],
     'title': 'COLLAB 2C03 - Sociology I'
     }
    """
    textbooks = textbookInfo(dept.strip(), coursecode.strip())
    # We truncate the id so we can have nicer looking URLs
    # Since the id will be used to point to the resource page for that course
    _id = str(truncate(docid))
    fields = {
            "_id" : _id,
            "textbooks" : textbooks,
            "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()),
            "courseinfo" : course
            #"Syllabus" : "blah"
            }
    try:
        revisions = list(localdb.revisions(_id))
        if not revisions:
            return localdb.save(fields)
        else:
            rev = dict(revisions[0])["_rev"]
            fields["_rev"] = rev
            return localdb.save(fields)
    except ResourceConflict:
        print "Resource for %s already exists, not creating a new one" % (docid)
--- a/goasearch.py
+++ b/goasearch.py
@ -0,0 +1,14 @@
 #! /usr/bin/python2
 # predictive data
 # switch to elasticsearch's prediction
 import database
 import predictions
 class GOASearch(object):
    def __init__(self):
        return self
--- a/mcmaster/init.py
+++ b/mcmaster/init.py
--- a/mcmaster/classes.py
+++ b/mcmaster/classes.py
@ -0,0 +1,349 @@
 #! /usr/bin/python2
 from sys import argv
 from itertools import chain, islice, izip as zip
 from re import search, sub
 from functools import total_ordering
 from sylla import textbookInfo
 from collections import MutableMapping
 import datetime as dt
 import lxml.html as lxh
 import requests
 import sys
 import copy
 fall = "2159"
 spring_summer = "2165"
 winter = "2161"
 # threading stuff
 import Queue as q
 import threading as thd
 baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
 searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
 custom_headers = {
        "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0",
        "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
        }
 courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}"
 courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}"
 payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}"
 payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}"
 year = dt.date.today().year
 month = dt.date.today().month
 days = {
        "Mo" : 0,
        "Tu" : 1,
        "We" : 2,
        "Th" : 3,
        "Fr" : 4,
        "Sa" : 5,
        "Su" : 6
        }
 day_descs = {
        "Mo" : "Monday Mon Mo",
        "Tu" : "Tuesday Tues Tu Tue",
        "We" : "Wednesday Wed We",
        "Th" : "Thursday Th Thurs",
        "Fr" : "Friday Fr Fri",
        "Sa" : "Saturday Sat Sa",
        "Su" : "Sunday Su Sun",
        "T"  : "TBA"
        }
 def timeparse(time):
    """
    Parse the time into numbers
    """
    if len(time) == 7:
        hour = int(time[0:2])
        minutes = int(time[3:5])
        half = time[5:7]
    else:
        hour = int(time[0])
        minutes = int(time[2:4])
        half = time[4:6]
    if half == "PM":
        if hour < 12:
            hour = hour + 12
    return (str(hour), str(minutes), half)
 class Class(object):
    def __init__(self, dept, title, sections):
        self.title = title.encode("UTF-8")
        self.sections = sections
        self.dept = dept
    def __repr__(self):
        return repr((self.title, self.sections))
    def __iter__(self):
        return iter((self.title, sec) for sec in self.sections)
    def hasCode(self):
        splitted = self.title.strip().split(" ")
        return ((len(splitted) >= 2) and
                (splitted[0].upper() == splitted[0]) and
                (splitted[1].upper() == splitted[1]))
    @property
    def code(self):
        if self.hasCode():
            return self.title.strip().split(" ")[1].strip()
        return False
    @property
    def books(self):
        if self.dept and self.code:
            return textbookInfo(self.dept, self.code, withPrices=True)
        return False
@total_ordering
 class Section(dict):
    def __init__(self, time, loc, prof, sem):
        self.time = time.encode("UTF-8")
        self.loc = loc.encode("UTF-8")
        self.prof = prof.encode("UTF-8")
        self.sem = sem.encode("UTF-8")
        self._date = False
        self._day = False
    @property
    def date(self):
        if self.time != "TBA":
            day, start, _, end = self.time.split()
            if self._day:
                assert len(self._day) == 2
                day = self._day
            else:
                day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)]
            self._date = (day, timeparse(start), timeparse(end))
            return self._date
        return self.time
    @property
    def day(self):
        return self.date[0]
    @property
    def start(self):
        return self.date[1][0] + self.date[1][1]
    def __repr__(self):
        return ("""
                Time = %s, Location = %s, Instructor = %s, Semester Running = %s
                 """ % (self.date, self.loc, self.prof, self.sem))
    def __gt__(self, x):
        if isinstance(self.day, list):
            raise NotImplementedError
        if (self.date == "TBA" or
            x.date == "TBA"):
            return False
        return ((days[self.day] > days[x.day]) or
                ((self.day == x.day) and
                 (self.start > x.start)))
    def __eq__(self, x):
        return (x.date == self.date and
                x.prof == self.prof and
                x.loc == self.loc and
                x.sem == self.sem)
 def getStateNum(html):
    """
    Get the state num from Mosaic
    This is unique to each requester
    """
    parsed = lxh.fromstring(html)
    return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value
 def parseSection(section):
    cols = section.xpath(".//td")
    assert len(cols) == 4
    time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols]
    classinfo = Section(time, loc, prof, sem)
    return classinfo
 def getSectionInfo(table):
    trs = table.xpath(".//tr")
    for tr in trs:
        if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]):
            yield parseSection(tr)
 def parseColumns(subject, html):
    parsed = lxh.fromstring(html)
    classInfo = (list(getSectionInfo(table)) for table in
                  islice((table for table in parsed.xpath(".//table")
                    if table.xpath("@id") and
                    search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint))
    classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span")
                    if span.xpath("@id") and
                       search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0]))
    return zip(classNames, classInfo)
 def getCodes(html):
    parsed = lxh.fromstring(html)
    return (code.text_content().encode("UTF-8") for code in
                parsed.xpath("//span")
                if code.xpath("@id") and
                   search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0]))
 class MosReq(object):
    def __init__(self, semester):
        self.semester = semester
        s = requests.Session()
        resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content
        # Let the server set some cookies before doing the searching
        cookies = {}
        for key, val in s.cookies.iteritems():
            cookies[key] = val
        self.cookies = cookies
        self.statenum = False
        self.codes_ = []
    def getlist(self, subject):
        sys.stderr.write("Getting " + subject + "\n")
        first_req = requests.get(searchurl, cookies=self.cookies).content
        # for some reason Mosaic wants us to request it twice, ??????????????????
        self.statenum = getStateNum(first_req)
        first_req = requests.post(searchurl,
                                  data=payload.format(self.statenum, subject, self.semester),
                                  cookies=self.cookies,
                                  allow_redirects=False,
                                  headers=custom_headers).content
        # we make a first request to get the ICStateNum in case it thinks there are too many results
        try:
            self.statenum = getStateNum(first_req)
        except IndexError:
            pass
        if "Your search will return over" in first_req:
            return requests.post(searchurl,
                                 data=payload2.format(self.statenum, self.semester),
                                 cookies=self.cookies,
                                 allow_redirects=False,
                                 headers=custom_headers).content
        else:
            return first_req
    def classes(self, subject):
        return list(parseColumns(subject, self.getlist(subject)))
    def getCodes(self, letter):
        sys.stderr.write("Getting letter " + letter + "\n")
        first_req = requests.get(searchurl, cookies=self.cookies).content
        self.statenum = getStateNum(first_req)
        self.statenum = getStateNum(requests.post(searchurl,
                                    data=courseCodes1.format(self.statenum, self.semester),
                                    cookies=self.cookies,
                                    headers=custom_headers).content)
        return getCodes(requests.post(searchurl,
                             data=courseCodes2.format(self.statenum, letter, self.semester),
                             cookies=self.cookies,
                             allow_redirects=False,
                             headers=custom_headers).content)
    @property
    def codes(self):
        if not self.codes_:
            self.codes_ = list(chain.from_iterable(
                                map((lambda l:
                                    self.getCodes(chr(l))),
                                    xrange(65, 91))))
        return self.codes_
 def request(codes, lists, semester):
    requester = MosReq(semester)
    while not codes.empty():
        code = codes.get()
        try:
            lists.put(requester.classes(code))
        except:
            codes.task_done()
            return
        codes.task_done()
 class CourseInfo(object):
    def __init__(self, threadcount, semester):
        self._codes = False
        self.threadcount = threadcount
        self.semester = semester
    @property
    def codes(self):
        if not self._codes:
            req = MosReq(self.semester)
            self._codes = req.codes
        return self._codes
    def classes(self):
        qcodes = q.Queue()
        for code in self.codes:
            qcodes.put(code)
        lists = q.Queue()
        threads = []
        thread = None
        for i in xrange(self.threadcount):
            thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester))
            threads.append(thread)
            thread.start()
        qcodes.join()
        for t in threads:
            t.join()
        sections = []
        while not lists.empty():
            sections.append(lists.get())
        for cl in chain.from_iterable(sections):
            new_sections = []
            for sec in cl[1]:
                if len(sec.day) > 1:
                    for day in sec.day:
                        new_sections.append(copy.deepcopy(sec))
                        new_sections[-1]._day = day
                else:
                    sec._day = sec.day[0]
                    new_sections.append(sec)
            yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections))
 def getCourses(semester, threadcount=10):
    return CourseInfo(threadcount, semester).classes()
 def allCourses():
    return chain.from_iterable(
     (getCourses(sem, threadcount=10)
        for sem in (fall, winter, spring_summer)))
 #for course in allCourses():
    #sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books))
    #print course.sections
--- a/mcmaster/site.py
+++ b/mcmaster/site.py
@ -0,0 +1,9 @@
 from oersearch import Search
 from classes import getCourses
 from sylla import getTextbooks
 mcmasterSearch = Search("McMaster")
 mcmasterSearch.setup(getCourses)
 mcmasterSearch.run()
--- a/mcmaster/sylla.py
+++ b/mcmaster/sylla.py
@ -0,0 +1,117 @@
 #! /usr/bin/python2
 from sys import argv
 from itertools import chain, islice, izip_longest, izip as zip
 from re import search, sub
 from functools import total_ordering
 from re import sub
 import datetime as dt
 import lxml.html as lxh
 import requests
 # Purpose of this module is to download and parse syllabi from various departments
 # In order to be corellated with individual courses
 class Price(object):
    def __init__(self, amnt, status):
        self.dollars = float(amnt[1:])
        self.status = status
    def __repr__(self):
        return "$%s %s" % (repr(self.dollars), self.status)
 class Book(object):
    def __init__(self, title, price):
        self.title = title
        self.price = price
    def __repr__(self):
        return '["%s", "%s"]' % (self.title, repr(self.price))
 def grouper(n, iterable, fillvalue=None):
    "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
    args = [iter(iterable)] * n
    return izip_longest(fillvalue=fillvalue, *args)
 searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1"
 def normalize(word):
    if len(word) > 1:
        return ("%s%s" %
                (word[0].upper(),
                "".join(word[1:]).lower()))
    return word
 def parseAuthor(author):
    split = author.split(" ")
    if len(split) <= 1:
        return author
    lastname = split[0]
    firstname = split[1]
    return "%s %s" % (firstname, lastname)
 def normwords(phrase):
    words = phrase.split(" ")
    return " ".join(map(normalize, words))
 def books(dept, code, withPrices):
    """
    Snatch me up a book title or three
    """
    req = searchUrl % (dept, code)
    html = requests.get(req).text
    parsed = lxh.fromstring(html)
    pricelist = prices(parsed)
    for div in parsed.xpath(".//div"):
        if (div.attrib.has_key("id") and
            "prodDesc" in div.attrib["id"]):
            textbook = div.text_content()
            author = sub(r',', '',
                           "".join(
                            (div.getparent()
                            .xpath(".//span[@class='inline']")
                            [0].text_content()
                            .split(":")[1:])).strip())
            price = pricelist.pop()
            if withPrices:
                yield (normwords(textbook), normwords(author), repr(price))
            else:
                yield (normwords(textbook), normwords(author))
 def prices(html):
    """
    Get the prices from a search result page
    """
    ps = [
           p.getparent().text_content().split()[0]
             for p in html.xpath("//p/input[@type='checkbox']")
         ]
    try:
        amts, stats = zip(*list(reversed(list(grouper(2, ps)))))
        return map(Price, amts, stats)
    except ValueError:
        return []
 def textbookInfo(dept, code, withPrices=False):
    """
    Return all the textbooks for a course
    """
    return list(books(dept, code, withPrices))
 def humanities():
    """
    Download humanities syllabi
    """
    return []
 # Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code)
 # print list(courseInfo("PSYCH", "2B03"))
--- a/openlibrary.py
+++ b/openlibrary.py
@ -0,0 +1,24 @@
 #! /usr/bin/python2
 from urllib import quote
 from json import loads, dumps
 import requests as req
 #query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s"
 searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s'
 def bookUrls(title, author):
    print title, author
    if ":" in title:
        title = title.split(":")[0]
    requrl = searchurl % (quote(author), quote(title))
    results = loads(req.get(requrl).text)
    for result in results["docs"][0:2]:
        if result.has_key("edition_key"):
            yield "https://openlibrary.org/books/%s" % result["edition_key"][0]
 # 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle'
 #for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"):
    #print book
--- a/predictions.py
+++ b/predictions.py
@ -0,0 +1,153 @@
 ##! /usr/bin/python2
 from itertools import groupby, chain
 from sys import stdout
 from functools import partial
 from json import dumps
 def gensymer():
    n = [0]
    def inner():
        result = str(n[0])
        n[0] += 1
        return result
    return inner
 gensym = gensymer()
 def printTrie(graph, prev, trie, weight):
    new_node = str(gensym())
    graph.node(new_node, "%s" % trie.letter)
    graph.edge(prev, new_node, label="%.2f" % weight)
    if not trie.children:
        return
    for child, weight in zip(trie.children, trie.ws):
        printTrie(graph, new_node, child, weight)
 class Trie(object):
    def __init__(self, letter, children, ws):
        self.letter = letter
        self.children = children
        self.ws = ws
 def probweight(suffixes):
    weights = [float(s["value"]) for s in suffixes]
    s = float(sum(weights))
    ws = [w/s for w in weights]
    return ws
 def buildtrie(trie, suffixes):
    """
    Build a trie, also known as a prefix tree, of all the possible completions
    """
    trie.children = []
    for letter, suffs in suffixes:
        ped = partition(suffs)
        if any(map(lambda p: p[0], ped)):
            # check if there are any children
            trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs)))
        else:
            # we've reached the end of this word so just include the final letter
            # [1] = there is a probability of 1 of reaching this single leaf node,
            # since it is the only possible completion here
            trie.children.append(Trie(letter, [], [1]))
    return trie
 def keyf(x):
    if not x["key"]:
        return ""
    return x["key"][0]
 def tails(words):
    for word in words:
        yield {
               "key" : word["key"][1:],
               "value" : word["value"]
               }
 def partition(words):
    """
    Partition the words into different prefixes based on the first character
    """
    groups = [
            (g[0], list(tails(g[1])))
                for g in groupby(
                    sorted(words, key=keyf),
                    key=keyf)
             ]
    return groups
 def flatten_helper(letter, trie):
    return ([letter + child.letter for
            child in trie.children], trie.children)
 def flatten(trie):
    if not trie.children:
        return trie.letter
    prefixes, suffixes = flatten_helper(trie.letter, trie)
    return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)]
 def flattenlist(xs):
    locs = []
    for x in xs:
        if not isinstance(x, list):
            locs.append(x)
        else:
            locs.extend(flattenlist(x))
    return locs
 def matchc(trie, prefix):
    c = None
    if len(prefix) > 1:
        c = prefix[0]
    else:
        c = prefix
    return [ch for ch in trie.children if ch.letter == c]
 def match(trie, word):
    if not word:
        return []
    m = matchc(trie, word[0])
    if not m:
        return []
    else:
        return [m[0]] + match(m[0], word[1:])
 def complete(trie, word):
    m = match(trie, word)
    if len(word) != len(m):
        return False
    completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))]
    if len(completions) > 10:
        return dumps(completions[0:10])
    return dumps(completions)
 def sortTrie(trie):
    """
    Sort the children of each node in descending order
    of the probability that each child would be the completion
    of whatever that word is
    """
    if not trie.children:
        return
    sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True)
    trie.children = [x[0] for x in sortedChilds]
    trie.ws = [x[1] for x in sortedChilds]
    for child in trie.children:
        sortTrie(child)
 def toTrie(words):
    for word in words:
        word["key"] = word["key"].lower()
    trie = buildtrie(Trie("", [], [1]), partition(words))
    trie.ws = [1]*len(trie.children)
    sortTrie(trie)
    return trie
 def testkey(w):
    return {
            "key" : w,
            "value" : "1"
            }
--- a/schemadsl.rkt
+++ b/schemadsl.rkt
@ -1,6 +1,5 @@
 #lang racket
-; This file is used to generate the elasticsearch mapping
+
 ; It is written in Racket (a dialect of Scheme)
 (require json)
 (define (root name type)
--- a/search.py
+++ b/search.py
@ -0,0 +1,237 @@
 #! /usr/bin/python2
 import elasticsearch
 from elasticsearch_dsl import FacetedSearch, Search, Q
 from elasticsearch_dsl.aggs import Terms, DateHistogram
 from sys import exit, stderr
 from json import dumps, loads
 from itertools import chain, imap
 from hashlib import sha1
 from textbookExceptions import UnIndexable
 from mcmaster.classes import allCourses
 # Generic instance of elasticsearch right now
 es = elasticsearch.Elasticsearch()
 def summarize(text):
    splitted = text.split(" ")
    if len(splitted) > 4:
        return " ".join(splitted[0:4]) + ".."
    return text
 def sectionToJSON(section):
    return {
            "prof" : section.prof,
            "sem"  : section.sem,
            "day"  : section.day
            }
 def classToJSON(clss):
    return {
            "title"    : clss.title,
            "sections" : map(sectionToJSON, clss.sections),
            "dept"     : clss.dept,
            "code"     : clss.code,
            "books"    : list(clss.books) if clss.books else []
            }
 def truncate(docid):
    """
    Truncate a document id to 12 digits
    The document ID should be based on a
    hash of unique identifiers
    """
    return int(str(docid)[0:12])
 def hashsec(course):
    """
    Hash a course into a usable id
    """
    if not course["code"]:
        code = ""
    else:
        code = course["code"]
    if not course["title"]:
        title = ""
    else:
        title = course["title"]
    if not course["sections"] or len(course["sections"]) < 1:
        course["sections"][0] = ""
    if not (code or title):
        raise UnIndexable(course)
    h = sha1()
    h.update(code + title + course["sections"][0]["sem"])
    return int(h.hexdigest(), 16)
 def createIndex(name):
    """
    This creates a new index in elasticsearch
    An index is like a schema in a regular database
    Create an elasticsearch index
    """
    indices = elasticsearch.client.IndicesClient(es)
    print indices.create(name)
    with open("./course.json", "r") as mapping:
        print indices.put_mapping("course", loads(mapping.read()), name)
 def indexListing(course):
    """
    Index a specific course in the database (using the courses index)
    example,
    {
     'books': [],
     'dept': 'COLLAB',
     'code': '2C03',
     'sections': [
                    {
                     'prof': 'Lisa Pender',
                     'sem': '2015/09/08 - 2015/12/08',
                     'day': 'Mo'
                     },
                     {
                      'prof': 'Staff',
                      'sem': '2015/09/08 - 2015/12/08',
                      'day': 'Th'
                      }
                  ],
     'title': 'COLLAB 2C03 - Sociology I'
     }
    """
    courseID = hashsec(course)
    print es.index(index="oersearch",
            doc_type="course",
            id=courseID,
            body=course)
    # For every course we index, we also create a resource for it
    # This should be an idempotent operation because we're putting it in couchdb
    # And we're using the id obtained from the hash function, so it should just update the document
    # no need to delete anything
    #try:
        #courseDept = course[0]["title"].strip().split(" ")[0].strip()
        #courseCode = course[0]["title"].strip().split(" ")[1].strip()
        #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode)
        #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID)
    #except:
        #print "Couldn't create the resource associated with %s" % course
 def termSearch(field):
    """
    Make a term search (exact match)
    """
    def t(term):
        q = Q("term",
                **{
                    "sections."+field : term
                    })
        return q
    return t
 def search(field):
    """
    Make a match search
    """
    def s(term):
        q = Q("match",
                 **{
                     field : term
                    })
        return q
    return s
 def join(x, y):
    """
    Join two queries
    """
    return x & y
 def filterSections(secs):
    """
    Get rid of tutorial sections
    because they almost always have "Staff" as the instructor
    This is just a heuristic of course
    """
    filtered = [s for s in secs.sections if "Staff" not in s.prof]
    if len(filtered) > 0:
        return filtered
    return False
 def searchTerms(terms):
    """
    Run a search for courses
    """
    # A list of all the queries we want to run
    qs = [searchers[field](term) for
            field, term in
            terms.iteritems() if
                term and searchers.has_key(field)]
    if not qs:
        # No queries = no results
        return dumps([])
    # Reduce joins all of the queries into one query
    # It will search for the conjunction of all of them
    # So that means it cares about each query equally
    q = reduce(join, qs)
    s = (Search(using=es, index="oersearch")
        .query(q))[0:100] # only return up to 100 results for now
    results = s.execute()
    filtered = [
                 (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials
                  for secs in results
                    if filterSections(secs)
               ]
    results = []
    for obj, secs in filtered:
        # Add the truncated course id
        # This is used to point to the resource page for that course
        secs["id"] = truncate(obj.meta.id)
        secs["title"] = obj.title
        if obj["dept"] not in secs["title"]:
            secs["dept"] = obj.dept
        if obj.books:
            secs["books"] = [
                             {
                               "booktitle"  : summarize(book[0].encode("ASCII")),
                               "bookauthor" : book[1].encode("ASCII"),
                               "bookprice"  : book[2].encode("ASCII")
                             }
                                for book in obj.books
                            ]
        else:
            secs["books"] = ""
        results.append(secs)
    return dumps(results)
 searchers = {
    "title" : search("title"),
    "loc"   : search("loc"),
    "time"  : search("time"),
    "prof"  : search("prof"),
    "day"   : search("day"),
    }
 #print searchTerms({"title" : "PHILOS"})
 #for c in imap(classToJSON, allCourses()):
    #try:
        #print indexListing(c)
    #except UnIndexable as e:
--- a/src/archive.py
+++ b/src/archive.py
@ -0,0 +1,34 @@
 #! /usr/bin/python2
 from urllib import quote
 from json import loads, dumps
 import requests as req
 searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw"
 def searchIA(title, author):
    """
    Do a search on The Internet Archive for a book
    """
    print "running a search"
    requrl = searchUrl.format(quote(title + " " + author))
    try:
        results = loads(req.get(requrl).text[9:][0:-1])
    except ValueError:
        return []
    rownum = results["responseHeader"]["params"]["rows"]
    if rownum < 1:
        print "Couldn't find results for %s %s" % (title, author)
        return []
    docs = results["response"]["docs"]
    urls = []
    for result in results["response"]["docs"][0:3]:
        urls.append("https://archive.org/details/%s" % result["identifier"])
    return urls
 # Example, search for David Hume's Enquiry Concerning Human Understanding
 #for url in searchIA("Hume", "Enquiry Concerning Human Understanding"):
    #print url
--- a/src/database.py
+++ b/src/database.py
@ -0,0 +1,62 @@
 #! /usr/bin/python2
 from sys import argv
 from hashlib import sha1
 def truncate(docid):
    """
    Truncate a document id to 12 digits
    The document ID should be based on a
    hash of unique identifiers
    """
    return int(str(docid)[0:12])
 def createResource(textbookInfo, course, dept, coursecode, docid):
    """
    Create a document associated with a course
    This document contains any/all resources associated
    with that course
    example,
    {
     'books': [],
     'dept': 'COLLAB',
     'code': '2C03',
     'sections': [
                    {
                     'prof': 'Lisa Pender',
                     'sem': '2015/09/08 - 2015/12/08',
                     'day': 'Mo'
                     },
                     {
                      'prof': 'Staff',
                      'sem': '2015/09/08 - 2015/12/08',
                      'day': 'Th'
                      }
                  ],
     'title': 'COLLAB 2C03 - Sociology I'
     }
    """
    textbooks = textbookInfo(dept.strip(), coursecode.strip())
    # We truncate the id so we can have nicer looking URLs
    # Since the id will be used to point to the resource page for that course
    _id = str(truncate(docid))
    fields = {
            "_id" : _id,
            "textbooks" : textbooks,
            "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()),
            "courseinfo" : course
            #"Syllabus" : "blah"
            }
    try:
        revisions = list(localdb.revisions(_id))
        if not revisions:
            return localdb.save(fields)
        else:
            rev = dict(revisions[0])["_rev"]
            fields["_rev"] = rev
            return localdb.save(fields)
    except ResourceConflict:
        print "Resource for %s already exists, not creating a new one" % (docid)
--- a/src/goasearch.py
+++ b/src/goasearch.py
@ -0,0 +1,14 @@
 #! /usr/bin/python2
 # predictive data
 # switch to elasticsearch's prediction
 import database
 import predictions
 class GOASearch(object):
    def __init__(self):
        return self
--- a/src/openlibrary.py
+++ b/src/openlibrary.py
@ -0,0 +1,24 @@
 #! /usr/bin/python2
 from urllib import quote
 from json import loads, dumps
 import requests as req
 #query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s"
 searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s'
 def bookUrls(title, author):
    print title, author
    if ":" in title:
        title = title.split(":")[0]
    requrl = searchurl % (quote(author), quote(title))
    results = loads(req.get(requrl).text)
    for result in results["docs"][0:2]:
        if result.has_key("edition_key"):
            yield "https://openlibrary.org/books/%s" % result["edition_key"][0]
 # 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle'
 #for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"):
    #print book
--- a/src/predictions.py
+++ b/src/predictions.py
@ -0,0 +1,153 @@
 ##! /usr/bin/python2
 from itertools import groupby, chain
 from sys import stdout
 from functools import partial
 from json import dumps
 def gensymer():
    n = [0]
    def inner():
        result = str(n[0])
        n[0] += 1
        return result
    return inner
 gensym = gensymer()
 def printTrie(graph, prev, trie, weight):
    new_node = str(gensym())
    graph.node(new_node, "%s" % trie.letter)
    graph.edge(prev, new_node, label="%.2f" % weight)
    if not trie.children:
        return
    for child, weight in zip(trie.children, trie.ws):
        printTrie(graph, new_node, child, weight)
 class Trie(object):
    def __init__(self, letter, children, ws):
        self.letter = letter
        self.children = children
        self.ws = ws
 def probweight(suffixes):
    weights = [float(s["value"]) for s in suffixes]
    s = float(sum(weights))
    ws = [w/s for w in weights]
    return ws
 def buildtrie(trie, suffixes):
    """
    Build a trie, also known as a prefix tree, of all the possible completions
    """
    trie.children = []
    for letter, suffs in suffixes:
        ped = partition(suffs)
        if any(map(lambda p: p[0], ped)):
            # check if there are any children
            trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs)))
        else:
            # we've reached the end of this word so just include the final letter
            # [1] = there is a probability of 1 of reaching this single leaf node,
            # since it is the only possible completion here
            trie.children.append(Trie(letter, [], [1]))
    return trie
 def keyf(x):
    if not x["key"]:
        return ""
    return x["key"][0]
 def tails(words):
    for word in words:
        yield {
               "key" : word["key"][1:],
               "value" : word["value"]
               }
 def partition(words):
    """
    Partition the words into different prefixes based on the first character
    """
    groups = [
            (g[0], list(tails(g[1])))
                for g in groupby(
                    sorted(words, key=keyf),
                    key=keyf)
             ]
    return groups
 def flatten_helper(letter, trie):
    return ([letter + child.letter for
            child in trie.children], trie.children)
 def flatten(trie):
    if not trie.children:
        return trie.letter
    prefixes, suffixes = flatten_helper(trie.letter, trie)
    return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)]
 def flattenlist(xs):
    locs = []
    for x in xs:
        if not isinstance(x, list):
            locs.append(x)
        else:
            locs.extend(flattenlist(x))
    return locs
 def matchc(trie, prefix):
    c = None
    if len(prefix) > 1:
        c = prefix[0]
    else:
        c = prefix
    return [ch for ch in trie.children if ch.letter == c]
 def match(trie, word):
    if not word:
        return []
    m = matchc(trie, word[0])
    if not m:
        return []
    else:
        return [m[0]] + match(m[0], word[1:])
 def complete(trie, word):
    m = match(trie, word)
    if len(word) != len(m):
        return False
    completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))]
    if len(completions) > 10:
        return dumps(completions[0:10])
    return dumps(completions)
 def sortTrie(trie):
    """
    Sort the children of each node in descending order
    of the probability that each child would be the completion
    of whatever that word is
    """
    if not trie.children:
        return
    sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True)
    trie.children = [x[0] for x in sortedChilds]
    trie.ws = [x[1] for x in sortedChilds]
    for child in trie.children:
        sortTrie(child)
 def toTrie(words):
    for word in words:
        word["key"] = word["key"].lower()
    trie = buildtrie(Trie("", [], [1]), partition(words))
    trie.ws = [1]*len(trie.children)
    sortTrie(trie)
    return trie
 def testkey(w):
    return {
            "key" : w,
            "value" : "1"
            }
--- a/src/search.py
+++ b/src/search.py
@ -0,0 +1,237 @@
 #! /usr/bin/python2
 import elasticsearch
 from elasticsearch_dsl import FacetedSearch, Search, Q
 from elasticsearch_dsl.aggs import Terms, DateHistogram
 from sys import exit, stderr
 from json import dumps, loads
 from itertools import chain, imap
 from hashlib import sha1
 from textbookExceptions import UnIndexable
 from mcmaster.classes import allCourses
 # Generic instance of elasticsearch right now
 es = elasticsearch.Elasticsearch()
 def summarize(text):
    splitted = text.split(" ")
    if len(splitted) > 4:
        return " ".join(splitted[0:4]) + ".."
    return text
 def sectionToJSON(section):
    return {
            "prof" : section.prof,
            "sem"  : section.sem,
            "day"  : section.day
            }
 def classToJSON(clss):
    return {
            "title"    : clss.title,
            "sections" : map(sectionToJSON, clss.sections),
            "dept"     : clss.dept,
            "code"     : clss.code,
            "books"    : list(clss.books) if clss.books else []
            }
 def truncate(docid):
    """
    Truncate a document id to 12 digits
    The document ID should be based on a
    hash of unique identifiers
    """
    return int(str(docid)[0:12])
 def hashsec(course):
    """
    Hash a course into a usable id
    """
    if not course["code"]:
        code = ""
    else:
        code = course["code"]
    if not course["title"]:
        title = ""
    else:
        title = course["title"]
    if not course["sections"] or len(course["sections"]) < 1:
        course["sections"][0] = ""
    if not (code or title):
        raise UnIndexable(course)
    h = sha1()
    h.update(code + title + course["sections"][0]["sem"])
    return int(h.hexdigest(), 16)
 def createIndex(name):
    """
    This creates a new index in elasticsearch
    An index is like a schema in a regular database
    Create an elasticsearch index
    """
    indices = elasticsearch.client.IndicesClient(es)
    print indices.create(name)
    with open("./course.json", "r") as mapping:
        print indices.put_mapping("course", loads(mapping.read()), name)
 def indexListing(course):
    """
    Index a specific course in the database (using the courses index)
    example,
    {
     'books': [],
     'dept': 'COLLAB',
     'code': '2C03',
     'sections': [
                    {
                     'prof': 'Lisa Pender',
                     'sem': '2015/09/08 - 2015/12/08',
                     'day': 'Mo'
                     },
                     {
                      'prof': 'Staff',
                      'sem': '2015/09/08 - 2015/12/08',
                      'day': 'Th'
                      }
                  ],
     'title': 'COLLAB 2C03 - Sociology I'
     }
    """
    courseID = hashsec(course)
    print es.index(index="oersearch",
            doc_type="course",
            id=courseID,
            body=course)
    # For every course we index, we also create a resource for it
    # This should be an idempotent operation because we're putting it in couchdb
    # And we're using the id obtained from the hash function, so it should just update the document
    # no need to delete anything
    #try:
        #courseDept = course[0]["title"].strip().split(" ")[0].strip()
        #courseCode = course[0]["title"].strip().split(" ")[1].strip()
        #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode)
        #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID)
    #except:
        #print "Couldn't create the resource associated with %s" % course
 def termSearch(field):
    """
    Make a term search (exact match)
    """
    def t(term):
        q = Q("term",
                **{
                    "sections."+field : term
                    })
        return q
    return t
 def search(field):
    """
    Make a match search
    """
    def s(term):
        q = Q("match",
                 **{
                     field : term
                    })
        return q
    return s
 def join(x, y):
    """
    Join two queries
    """
    return x & y
 def filterSections(secs):
    """
    Get rid of tutorial sections
    because they almost always have "Staff" as the instructor
    This is just a heuristic of course
    """
    filtered = [s for s in secs.sections if "Staff" not in s.prof]
    if len(filtered) > 0:
        return filtered
    return False
 def searchTerms(terms):
    """
    Run a search for courses
    """
    # A list of all the queries we want to run
    qs = [searchers[field](term) for
            field, term in
            terms.iteritems() if
                term and searchers.has_key(field)]
    if not qs:
        # No queries = no results
        return dumps([])
    # Reduce joins all of the queries into one query
    # It will search for the conjunction of all of them
    # So that means it cares about each query equally
    q = reduce(join, qs)
    s = (Search(using=es, index="oersearch")
        .query(q))[0:100] # only return up to 100 results for now
    results = s.execute()
    filtered = [
                 (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials
                  for secs in results
                    if filterSections(secs)
               ]
    results = []
    for obj, secs in filtered:
        # Add the truncated course id
        # This is used to point to the resource page for that course
        secs["id"] = truncate(obj.meta.id)
        secs["title"] = obj.title
        if obj["dept"] not in secs["title"]:
            secs["dept"] = obj.dept
        if obj.books:
            secs["books"] = [
                             {
                               "booktitle"  : summarize(book[0].encode("ASCII")),
                               "bookauthor" : book[1].encode("ASCII"),
                               "bookprice"  : book[2].encode("ASCII")
                             }
                                for book in obj.books
                            ]
        else:
            secs["books"] = ""
        results.append(secs)
    return dumps(results)
 searchers = {
    "title" : search("title"),
    "loc"   : search("loc"),
    "time"  : search("time"),
    "prof"  : search("prof"),
    "day"   : search("day"),
    }
 #print searchTerms({"title" : "PHILOS"})
 #for c in imap(classToJSON, allCourses()):
    #try:
        #print indexListing(c)
    #except UnIndexable as e:
--- a/src/textbookExceptions.py
+++ b/src/textbookExceptions.py
@ -0,0 +1,24 @@
 #! /usr/bin/python2
 class UnIndexable(Exception):
    def __init__(self, course):
        self.course = course
    @property
    def reason(self):
        course = self.course
        if not course["code"] and not course["title"]:
            message = "there was no course code and no title defined"
        if not course["code"]:
            message = "there was no course code defined"
        if not course["title"]:
            message = "there was no course title defined"
        if not course["sections"]:
            message = "there were no sections defined"
        return """
        There was a problem with indexing this course.
        %s
        There could be several reasons why, my best guess is that %s
        We need at least the course code, title, and one or more sections to index
        """ % (course, message)
--- a/src/visualize.py
+++ b/src/visualize.py
@ -0,0 +1,97 @@
 #! /usr/bin/python2
 from json import loads, load
 from re import sub, split
 from itertools import groupby
 from numpy import mean
 from operator import attrgetter
 import pygal
 import csv
 class Textbook(object):
    def __init__(self, dept, code, title, author, price):
        self.dept = dept
        self.code = code
        self.title = title
        self.author = author
        self.price = float(price)
    def __repr__(self):
        return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept,
                                                              self.code,
                                                              self.title,
                                                              self.author,
                                                              self.price)
 def courses():
    with open("./books.csv", "r") as books:
        booksreader = csv.reader(books)
        for row in booksreader:
            yield row
 def groupDept(courselist):
    sortedCourses = sorted(courselist, key=attrgetter("dept"))
    for course in groupby(sortedCourses, attrgetter("dept")):
        yield course[0], list(course[1])
 def meanPrice(books):
    return mean([book.price for book in books])
 # Questions,
 # mean cost per department
 # mean cost per faculty
 # mean difference between book store copies and other copies per dept and faculty
 # number of overlapping books per faculty, do eng students benefit from that?
 # maybe a survey for students to see how often they buy books from other sources
 # correlate with how much they could be saving?
 facultyDesc = {
        "hum" : "Humanities",
        "bus" : "Business",
        "hlth" : "Health Science",
        "eng" : "Engineering",
        "sci" : "Science",
        "socsci" : "Social Sciences",
        "artsci" : "Arts & Sciences",
        "meld" : "MELD"
 }
 faculties = load(open("./faculties.json"))
 def categorize(dept):
    # faculties
    return facultyDesc.get(faculties.get(dept, False), False)
 def byFaculty():
    for dept, books in groupDept(courses()):
        yield (categorize(dept), dept, books)
 def meanFacultyCosts():
    byfac = list(byFaculty())
    graph = pygal.Bar()
    graph.title = "Mean textbook cost by faculty"
    sortedFacs = sorted(byfac, key=lambda x: x[0])
    for fac in groupby(sortedFacs, lambda x: x[0]):
        graph.add(fac[0], meanPrice(list(fac[1])[0][2]))
    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
    return graph.render(transpose=True)
 def meanCosts():
    cs = groupDept(courses())
    graph = pygal.Bar()
    graph.title = "Mean textbook cost by department"
    for c in cs:
        dept, books = c
        graph.add(dept, meanPrice(books))
    #graph.render_to_file("./test_graph.svg")
    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
    return graph.render_table(style=True, transpose=True)
 for x in courses():
    print x
 #print meanCosts()
 #print meanFacultyCosts()
--- a/src/website.py
+++ b/src/website.py
@ -0,0 +1,148 @@
 #! /usr/bin/python2
 from functools import partial
 from couchdb import ResourceConflict
 from flask import Flask, render_template, flash, request, send_from_directory
 from flask_bootstrap import Bootstrap
 from flask_appconfig import AppConfig
 from urllib import unquote
 from search import searchTerms
 from openlibrary import bookUrls
 from archive import searchIA
 from urllib import quote, unquote
 from json import dumps, loads
 from werkzeug.contrib.cache import MemcachedCache
 cache = MemcachedCache(['127.0.0.1:11211'])
 import os
 def predict(fieldtype, term):
    print fieldtype
    print term
    if not term:
        return "[]"
    else:
        try:
            cs = completers[fieldtype](term.lower())
        except KeyError:
            return "[]"
    if cs:
        return cs
    return "[]"
 def predictor(fieldtype):
    def inner(request):
        params = dict(request.args.items())
        return predict(fieldtype, params["term"])
    return inner
 def cacheit(key, thunk):
    """
    Tries to find a cached version of ``key''
    If there is no cached version then it will
    evaluate thunk (which must be a generator)
    and cache that, then return the result
    """
    cached = cache.get(quote(key))
    if cached is None:
        result = list(thunk())
        cache.set(quote(key), result)
        return result
    return cached
 def ClassSearch(configfile=None):
    defaults = {"Day", "Building", "Exact Location", "Department"}
    app = Flask(__name__)
    AppConfig(app, configfile)  # Flask-Appconfig is not necessary, but
                                # highly recommend =)
                                # https://github.com/mbr/flask-appconfig
    Bootstrap(app)
    app.config["scripts"] = "/home/wes/MGOAL/scripts"
    app.config["styles"] = "/home/wes/MGOAL/styles"
    @app.route('/favicon.ico')
    def favicon():
        return send_from_directory("/srv/http/goal/favicon.ico",
                                   'favicon.ico', mimetype='image/vnd.microsoft.icon')
    @app.route("/buildpred", methods=("GET", "POST"))
    def buildpred():
        return predictbuild(request)
    @app.route("/locpred", methods=("GET", "POST"))
    def locpred():
        return predictloc(request)
    @app.route("/daypred", methods=("GET", "POST"))
    def daypred():
        return predictday(request)
    @app.route("/deptpred", methods=("GET", "POST"))
    def deptpred():
        return predictdept(request)
    @app.route("/titlepred", methods=("GET", "POST"))
    def titlepred():
        return predicttitle(request)
    @app.route("/", methods=("GET", "POST"))
    def index():
        return render_template("search.html")
    @app.route("/fc", methods=("GET", "POST"))
    def fc():
        """ Filter Courses """
        print "trying to get courses"
        params = dict(request.args.items())
        for key, val in params.iteritems():
            if val in defaults:
                del params[key]
        results = searchTerms(params)
        return results
    @app.route("/resources", methods=("GET", "POST"))
    def resources():
        """ Get Resources """
        notRequired = False
        params = loads(dict(request.args.items())["data"])
        print params
        author = params["author"]
        title = params["title"]
        if ("No Textbooks" in title or
            "No Adoption" in title):
            return dumps("false")
        # Cache the result of the open library search
        openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author))
        print openlib
        # cache the result of an internet archive search
        iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author))
        print iarchive
        if not (any(openlib) or any(iarchive)):
            # We literally could not find ANYTHING
            return dumps("false")
        return dumps({
                       "iarchive" : iarchive,
                       "openlib" : openlib
                     })
    @app.route("/scripts/<filename>")
    def send_script(filename):
        return send_from_directory(app.config["scripts"], filename)
    @app.route("/styles/<filename>")
    def send_style(filename):
        return send_from_directory(app.config["styles"], filename)
    return app
 if __name__ == "__main__":
    ClassSearch().run(port=8001, debug=True)
--- a/textbookExceptions.py
+++ b/textbookExceptions.py
@ -0,0 +1,24 @@
 #! /usr/bin/python2
 class UnIndexable(Exception):
    def __init__(self, course):
        self.course = course
    @property
    def reason(self):
        course = self.course
        if not course["code"] and not course["title"]:
            message = "there was no course code and no title defined"
        if not course["code"]:
            message = "there was no course code defined"
        if not course["title"]:
            message = "there was no course title defined"
        if not course["sections"]:
            message = "there were no sections defined"
        return """
        There was a problem with indexing this course.
        %s
        There could be several reasons why, my best guess is that %s
        We need at least the course code, title, and one or more sections to index
        """ % (course, message)
--- a/visualize.py
+++ b/visualize.py
@ -0,0 +1,97 @@
 #! /usr/bin/python2
 from json import loads, load
 from re import sub, split
 from itertools import groupby
 from numpy import mean
 from operator import attrgetter
 import pygal
 import csv
 class Textbook(object):
    def __init__(self, dept, code, title, author, price):
        self.dept = dept
        self.code = code
        self.title = title
        self.author = author
        self.price = float(price)
    def __repr__(self):
        return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept,
                                                              self.code,
                                                              self.title,
                                                              self.author,
                                                              self.price)
 def courses():
    with open("./books.csv", "r") as books:
        booksreader = csv.reader(books)
        for row in booksreader:
            yield row
 def groupDept(courselist):
    sortedCourses = sorted(courselist, key=attrgetter("dept"))
    for course in groupby(sortedCourses, attrgetter("dept")):
        yield course[0], list(course[1])
 def meanPrice(books):
    return mean([book.price for book in books])
 # Questions,
 # mean cost per department
 # mean cost per faculty
 # mean difference between book store copies and other copies per dept and faculty
 # number of overlapping books per faculty, do eng students benefit from that?
 # maybe a survey for students to see how often they buy books from other sources
 # correlate with how much they could be saving?
 facultyDesc = {
        "hum" : "Humanities",
        "bus" : "Business",
        "hlth" : "Health Science",
        "eng" : "Engineering",
        "sci" : "Science",
        "socsci" : "Social Sciences",
        "artsci" : "Arts & Sciences",
        "meld" : "MELD"
 }
 faculties = load(open("./faculties.json"))
 def categorize(dept):
    # faculties
    return facultyDesc.get(faculties.get(dept, False), False)
 def byFaculty():
    for dept, books in groupDept(courses()):
        yield (categorize(dept), dept, books)
 def meanFacultyCosts():
    byfac = list(byFaculty())
    graph = pygal.Bar()
    graph.title = "Mean textbook cost by faculty"
    sortedFacs = sorted(byfac, key=lambda x: x[0])
    for fac in groupby(sortedFacs, lambda x: x[0]):
        graph.add(fac[0], meanPrice(list(fac[1])[0][2]))
    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
    return graph.render(transpose=True)
 def meanCosts():
    cs = groupDept(courses())
    graph = pygal.Bar()
    graph.title = "Mean textbook cost by department"
    for c in cs:
        dept, books = c
        graph.add(dept, meanPrice(books))
    #graph.render_to_file("./test_graph.svg")
    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
    return graph.render_table(style=True, transpose=True)
 for x in courses():
    print x
 #print meanCosts()
 #print meanFacultyCosts()
--- a/website.py
+++ b/website.py
@ -0,0 +1,148 @@
 #! /usr/bin/python2
 from functools import partial
 from couchdb import ResourceConflict
 from flask import Flask, render_template, flash, request, send_from_directory
 from flask_bootstrap import Bootstrap
 from flask_appconfig import AppConfig
 from urllib import unquote
 from search import searchTerms
 from openlibrary import bookUrls
 from archive import searchIA
 from urllib import quote, unquote
 from json import dumps, loads
 from werkzeug.contrib.cache import MemcachedCache
 cache = MemcachedCache(['127.0.0.1:11211'])
 import os
 def predict(fieldtype, term):
    print fieldtype
    print term
    if not term:
        return "[]"
    else:
        try:
            cs = completers[fieldtype](term.lower())
        except KeyError:
            return "[]"
    if cs:
        return cs
    return "[]"
 def predictor(fieldtype):
    def inner(request):
        params = dict(request.args.items())
        return predict(fieldtype, params["term"])
    return inner
 def cacheit(key, thunk):
    """
    Tries to find a cached version of ``key''
    If there is no cached version then it will
    evaluate thunk (which must be a generator)
    and cache that, then return the result
    """
    cached = cache.get(quote(key))
    if cached is None:
        result = list(thunk())
        cache.set(quote(key), result)
        return result
    return cached
 def ClassSearch(configfile=None):
    defaults = {"Day", "Building", "Exact Location", "Department"}
    app = Flask(__name__)
    AppConfig(app, configfile)  # Flask-Appconfig is not necessary, but
                                # highly recommend =)
                                # https://github.com/mbr/flask-appconfig
    Bootstrap(app)
    app.config["scripts"] = "/home/wes/MGOAL/scripts"
    app.config["styles"] = "/home/wes/MGOAL/styles"
    @app.route('/favicon.ico')
    def favicon():
        return send_from_directory("/srv/http/goal/favicon.ico",
                                   'favicon.ico', mimetype='image/vnd.microsoft.icon')
    @app.route("/buildpred", methods=("GET", "POST"))
    def buildpred():
        return predictbuild(request)
    @app.route("/locpred", methods=("GET", "POST"))
    def locpred():
        return predictloc(request)
    @app.route("/daypred", methods=("GET", "POST"))
    def daypred():
        return predictday(request)
    @app.route("/deptpred", methods=("GET", "POST"))
    def deptpred():
        return predictdept(request)
    @app.route("/titlepred", methods=("GET", "POST"))
    def titlepred():
        return predicttitle(request)
    @app.route("/", methods=("GET", "POST"))
    def index():
        return render_template("search.html")
    @app.route("/fc", methods=("GET", "POST"))
    def fc():
        """ Filter Courses """
        print "trying to get courses"
        params = dict(request.args.items())
        for key, val in params.iteritems():
            if val in defaults:
                del params[key]
        results = searchTerms(params)
        return results
    @app.route("/resources", methods=("GET", "POST"))
    def resources():
        """ Get Resources """
        notRequired = False
        params = loads(dict(request.args.items())["data"])
        print params
        author = params["author"]
        title = params["title"]
        if ("No Textbooks" in title or
            "No Adoption" in title):
            return dumps("false")
        # Cache the result of the open library search
        openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author))
        print openlib
        # cache the result of an internet archive search
        iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author))
        print iarchive
        if not (any(openlib) or any(iarchive)):
            # We literally could not find ANYTHING
            return dumps("false")
        return dumps({
                       "iarchive" : iarchive,
                       "openlib" : openlib
                     })
    @app.route("/scripts/<filename>")
    def send_script(filename):
        return send_from_directory(app.config["scripts"], filename)
    @app.route("/styles/<filename>")
    def send_style(filename):
        return send_from_directory(app.config["styles"], filename)
    return app
 if __name__ == "__main__":
    ClassSearch().run(port=8001, debug=True)