Revert "Explain the purpose of the Racket files"

This reverts commit dc1d0c87b2.
10 years ago · a225b94832
24 changed files with 2062 additions and 8 deletions
--- a/archive.py
+++ b/archive.py
@ -0,0 +1,34 @@
+#! /usr/bin/python2
+
+from urllib import quote
+from json import loads, dumps
+
+import requests as req
+
+searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw"
+
+def searchIA(title, author):
+    """
+    Do a search on The Internet Archive for a book
+    """
+    print "running a search"
+    requrl = searchUrl.format(quote(title + " " + author))
+    try:
+        results = loads(req.get(requrl).text[9:][0:-1])
+    except ValueError:
+        return []
+
+    rownum = results["responseHeader"]["params"]["rows"]
+    if rownum < 1:
+        print "Couldn't find results for %s %s" % (title, author)
+        return []
+    docs = results["response"]["docs"]
+    urls = []
+    for result in results["response"]["docs"][0:3]:
+        urls.append("https://archive.org/details/%s" % result["identifier"])
+    return urls
+
+
+# Example, search for David Hume's Enquiry Concerning Human Understanding
+#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"):
+    #print url
--- a/course_mapping.rkt
+++ b/course_mapping.rkt
@ -1,11 +1,5 @@
 #! /usr/bin/racket
 #lang racket
-; This file is used to generate the mapping for elasticsearch
-; It is written in Racket (a dialect of Scheme)
-; It will not be necessary to run unless you want to change the elasticsearch mapping
-; This may be necessary if you have fields you want to add, or need some other customization
-; You may also edit the JSON mapping directly, or use whatever tool you want to edit the mapping with
-
 (require "schemadsl.rkt")

 (displayln
--- a/database.py
+++ b/database.py
@ -0,0 +1,62 @@
+#! /usr/bin/python2
+
+from sys import argv
+from hashlib import sha1
+
+def truncate(docid):
+    """
+    Truncate a document id to 12 digits
+    The document ID should be based on a
+    hash of unique identifiers
+    """
+    return int(str(docid)[0:12])
+
+def createResource(textbookInfo, course, dept, coursecode, docid):
+    """
+    Create a document associated with a course
+    This document contains any/all resources associated
+    with that course
+
+    example,
+    {
+     'books': [],
+     'dept': 'COLLAB',
+     'code': '2C03',
+     'sections': [
+                    {
+                     'prof': 'Lisa Pender',
+                     'sem': '2015/09/08 - 2015/12/08',
+                     'day': 'Mo'
+                     },
+                     {
+                      'prof': 'Staff',
+                      'sem': '2015/09/08 - 2015/12/08',
+                      'day': 'Th'
+                      }
+                  ],
+     'title': 'COLLAB 2C03 - Sociology I'
+     }
+    """
+    textbooks = textbookInfo(dept.strip(), coursecode.strip())
+
+    # We truncate the id so we can have nicer looking URLs
+    # Since the id will be used to point to the resource page for that course
+    _id = str(truncate(docid))
+
+    fields = {
+            "_id" : _id,
+            "textbooks" : textbooks,
+            "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()),
+            "courseinfo" : course
+            #"Syllabus" : "blah"
+            }
+    try:
+        revisions = list(localdb.revisions(_id))
+        if not revisions:
+            return localdb.save(fields)
+        else:
+            rev = dict(revisions[0])["_rev"]
+            fields["_rev"] = rev
+            return localdb.save(fields)
+    except ResourceConflict:
+        print "Resource for %s already exists, not creating a new one" % (docid)
--- a/goasearch.py
+++ b/goasearch.py
@ -0,0 +1,14 @@
+#! /usr/bin/python2
+
+# predictive data
+# switch to elasticsearch's prediction
+
+
+
+import database
+import predictions
+
+class GOASearch(object):
+    def __init__(self):
+        return self
+
--- a/mcmaster/init.py
+++ b/mcmaster/init.py
--- a/mcmaster/classes.py
+++ b/mcmaster/classes.py
@ -0,0 +1,349 @@
+#! /usr/bin/python2
+
+from sys import argv
+from itertools import chain, islice, izip as zip
+from re import search, sub
+from functools import total_ordering
+
+from sylla import textbookInfo
+from collections import MutableMapping
+
+import datetime as dt
+import lxml.html as lxh
+import requests
+import sys
+import copy
+
+fall = "2159"
+spring_summer = "2165"
+winter = "2161"
+
+# threading stuff
+import Queue as q
+import threading as thd
+
+baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
+
+searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
+
+custom_headers = {
+        "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0",
+        "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
+        }
+
+courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}"
+
+courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}"
+
+payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}"
+
+payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}"
+
+
+year = dt.date.today().year
+month = dt.date.today().month
+
+days = {
+        "Mo" : 0,
+        "Tu" : 1,
+        "We" : 2,
+        "Th" : 3,
+        "Fr" : 4,
+        "Sa" : 5,
+        "Su" : 6
+        }
+
+day_descs = {
+        "Mo" : "Monday Mon Mo",
+        "Tu" : "Tuesday Tues Tu Tue",
+        "We" : "Wednesday Wed We",
+        "Th" : "Thursday Th Thurs",
+        "Fr" : "Friday Fr Fri",
+        "Sa" : "Saturday Sat Sa",
+        "Su" : "Sunday Su Sun",
+        "T"  : "TBA"
+        }
+
+def timeparse(time):
+    """
+    Parse the time into numbers
+    """
+    if len(time) == 7:
+        hour = int(time[0:2])
+        minutes = int(time[3:5])
+        half = time[5:7]
+    else:
+        hour = int(time[0])
+        minutes = int(time[2:4])
+        half = time[4:6]
+    if half == "PM":
+        if hour < 12:
+            hour = hour + 12
+
+    return (str(hour), str(minutes), half)
+
+class Class(object):
+    def __init__(self, dept, title, sections):
+        self.title = title.encode("UTF-8")
+        self.sections = sections
+        self.dept = dept
+
+    def __repr__(self):
+        return repr((self.title, self.sections))
+
+    def __iter__(self):
+        return iter((self.title, sec) for sec in self.sections)
+
+    def hasCode(self):
+        splitted = self.title.strip().split(" ")
+        return ((len(splitted) >= 2) and
+                (splitted[0].upper() == splitted[0]) and
+                (splitted[1].upper() == splitted[1]))
+
+    @property
+    def code(self):
+        if self.hasCode():
+            return self.title.strip().split(" ")[1].strip()
+        return False
+
+    @property
+    def books(self):
+        if self.dept and self.code:
+            return textbookInfo(self.dept, self.code, withPrices=True)
+        return False
+
+@total_ordering
+class Section(dict):
+    def __init__(self, time, loc, prof, sem):
+        self.time = time.encode("UTF-8")
+        self.loc = loc.encode("UTF-8")
+        self.prof = prof.encode("UTF-8")
+        self.sem = sem.encode("UTF-8")
+        self._date = False
+        self._day = False
+
+    @property
+    def date(self):
+        if self.time != "TBA":
+            day, start, _, end = self.time.split()
+
+            if self._day:
+                assert len(self._day) == 2
+                day = self._day
+            else:
+                day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)]
+
+            self._date = (day, timeparse(start), timeparse(end))
+
+            return self._date
+
+        return self.time
+
+    @property
+    def day(self):
+        return self.date[0]
+
+    @property
+    def start(self):
+        return self.date[1][0] + self.date[1][1]
+
+    def __repr__(self):
+        return ("""
+                Time = %s, Location = %s, Instructor = %s, Semester Running = %s
+                 """ % (self.date, self.loc, self.prof, self.sem))
+    def __gt__(self, x):
+        if isinstance(self.day, list):
+            raise NotImplementedError
+
+        if (self.date == "TBA" or
+            x.date == "TBA"):
+            return False
+
+        return ((days[self.day] > days[x.day]) or
+                ((self.day == x.day) and
+                 (self.start > x.start)))
+
+    def __eq__(self, x):
+        return (x.date == self.date and
+                x.prof == self.prof and
+                x.loc == self.loc and
+                x.sem == self.sem)
+
+
+def getStateNum(html):
+    """
+    Get the state num from Mosaic
+    This is unique to each requester
+    """
+    parsed = lxh.fromstring(html)
+    return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value
+
+def parseSection(section):
+    cols = section.xpath(".//td")
+    assert len(cols) == 4
+    time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols]
+
+    classinfo = Section(time, loc, prof, sem)
+    return classinfo
+
+def getSectionInfo(table):
+    trs = table.xpath(".//tr")
+    for tr in trs:
+        if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]):
+            yield parseSection(tr)
+
+def parseColumns(subject, html):
+    parsed = lxh.fromstring(html)
+
+    classInfo = (list(getSectionInfo(table)) for table in
+                  islice((table for table in parsed.xpath(".//table")
+                    if table.xpath("@id") and
+                    search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint))
+
+    classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span")
+                    if span.xpath("@id") and
+                       search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0]))
+
+    return zip(classNames, classInfo)
+
+def getCodes(html):
+    parsed = lxh.fromstring(html)
+
+    return (code.text_content().encode("UTF-8") for code in
+                parsed.xpath("//span")
+                if code.xpath("@id") and
+                   search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0]))
+
+class MosReq(object):
+    def __init__(self, semester):
+        self.semester = semester
+        s = requests.Session()
+        resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content
+
+        # Let the server set some cookies before doing the searching
+        cookies = {}
+        for key, val in s.cookies.iteritems():
+            cookies[key] = val
+        self.cookies = cookies
+        self.statenum = False
+        self.codes_ = []
+
+    def getlist(self, subject):
+        sys.stderr.write("Getting " + subject + "\n")
+        first_req = requests.get(searchurl, cookies=self.cookies).content
+        # for some reason Mosaic wants us to request it twice, ??????????????????
+        self.statenum = getStateNum(first_req)
+        first_req = requests.post(searchurl,
+                                  data=payload.format(self.statenum, subject, self.semester),
+                                  cookies=self.cookies,
+                                  allow_redirects=False,
+                                  headers=custom_headers).content
+        # we make a first request to get the ICStateNum in case it thinks there are too many results
+        try:
+            self.statenum = getStateNum(first_req)
+        except IndexError:
+            pass
+        if "Your search will return over" in first_req:
+
+            return requests.post(searchurl,
+                                 data=payload2.format(self.statenum, self.semester),
+                                 cookies=self.cookies,
+                                 allow_redirects=False,
+                                 headers=custom_headers).content
+        else:
+            return first_req
+
+    def classes(self, subject):
+        return list(parseColumns(subject, self.getlist(subject)))
+
+    def getCodes(self, letter):
+        sys.stderr.write("Getting letter " + letter + "\n")
+        first_req = requests.get(searchurl, cookies=self.cookies).content
+        self.statenum = getStateNum(first_req)
+
+        self.statenum = getStateNum(requests.post(searchurl,
+                                    data=courseCodes1.format(self.statenum, self.semester),
+                                    cookies=self.cookies,
+                                    headers=custom_headers).content)
+
+        return getCodes(requests.post(searchurl,
+                             data=courseCodes2.format(self.statenum, letter, self.semester),
+                             cookies=self.cookies,
+                             allow_redirects=False,
+                             headers=custom_headers).content)
+    @property
+    def codes(self):
+        if not self.codes_:
+            self.codes_ = list(chain.from_iterable(
+                                map((lambda l:
+                                    self.getCodes(chr(l))),
+                                    xrange(65, 91))))
+        return self.codes_
+
+def request(codes, lists, semester):
+    requester = MosReq(semester)
+    while not codes.empty():
+        code = codes.get()
+        try:
+            lists.put(requester.classes(code))
+        except:
+            codes.task_done()
+            return
+        codes.task_done()
+
+
+class CourseInfo(object):
+    def __init__(self, threadcount, semester):
+        self._codes = False
+        self.threadcount = threadcount
+        self.semester = semester
+
+    @property
+    def codes(self):
+        if not self._codes:
+            req = MosReq(self.semester)
+            self._codes = req.codes
+        return self._codes
+
+    def classes(self):
+        qcodes = q.Queue()
+        for code in self.codes:
+            qcodes.put(code)
+        lists = q.Queue()
+        threads = []
+        thread = None
+        for i in xrange(self.threadcount):
+            thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester))
+            threads.append(thread)
+            thread.start()
+        qcodes.join()
+        for t in threads:
+            t.join()
+
+        sections = []
+        while not lists.empty():
+            sections.append(lists.get())
+
+        for cl in chain.from_iterable(sections):
+            new_sections = []
+            for sec in cl[1]:
+                if len(sec.day) > 1:
+                    for day in sec.day:
+                        new_sections.append(copy.deepcopy(sec))
+                        new_sections[-1]._day = day
+                else:
+                    sec._day = sec.day[0]
+                    new_sections.append(sec)
+            yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections))
+
+def getCourses(semester, threadcount=10):
+    return CourseInfo(threadcount, semester).classes()
+
+def allCourses():
+    return chain.from_iterable(
+     (getCourses(sem, threadcount=10)
+        for sem in (fall, winter, spring_summer)))
+
+#for course in allCourses():
+    #sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books))
+    #print course.sections
--- a/mcmaster/site.py
+++ b/mcmaster/site.py
@ -0,0 +1,9 @@
+from oersearch import Search
+from classes import getCourses
+from sylla import getTextbooks
+
+mcmasterSearch = Search("McMaster")
+
+mcmasterSearch.setup(getCourses)
+
+mcmasterSearch.run()
--- a/mcmaster/sylla.py
+++ b/mcmaster/sylla.py
@ -0,0 +1,117 @@
+#! /usr/bin/python2
+
+from sys import argv
+from itertools import chain, islice, izip_longest, izip as zip
+from re import search, sub
+from functools import total_ordering
+from re import sub
+
+import datetime as dt
+import lxml.html as lxh
+import requests
+
+# Purpose of this module is to download and parse syllabi from various departments
+# In order to be corellated with individual courses
+
+class Price(object):
+    def __init__(self, amnt, status):
+        self.dollars = float(amnt[1:])
+        self.status = status
+
+    def __repr__(self):
+        return "$%s %s" % (repr(self.dollars), self.status)
+
+
+class Book(object):
+    def __init__(self, title, price):
+        self.title = title
+        self.price = price
+
+    def __repr__(self):
+        return '["%s", "%s"]' % (self.title, repr(self.price))
+
+
+def grouper(n, iterable, fillvalue=None):
+    "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
+    args = [iter(iterable)] * n
+    return izip_longest(fillvalue=fillvalue, *args)
+
+searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1"
+
+def normalize(word):
+    if len(word) > 1:
+        return ("%s%s" %
+                (word[0].upper(),
+                "".join(word[1:]).lower()))
+    return word
+
+def parseAuthor(author):
+    split = author.split(" ")
+    if len(split) <= 1:
+        return author
+    lastname = split[0]
+    firstname = split[1]
+    return "%s %s" % (firstname, lastname)
+
+def normwords(phrase):
+    words = phrase.split(" ")
+    return " ".join(map(normalize, words))
+
+def books(dept, code, withPrices):
+    """
+    Snatch me up a book title or three
+    """
+    req = searchUrl % (dept, code)
+
+    html = requests.get(req).text
+
+    parsed = lxh.fromstring(html)
+
+    pricelist = prices(parsed)
+
+    for div in parsed.xpath(".//div"):
+        if (div.attrib.has_key("id") and
+            "prodDesc" in div.attrib["id"]):
+
+            textbook = div.text_content()
+            author = sub(r',', '',
+                           "".join(
+                            (div.getparent()
+                            .xpath(".//span[@class='inline']")
+                            [0].text_content()
+                            .split(":")[1:])).strip())
+            price = pricelist.pop()
+            if withPrices:
+                yield (normwords(textbook), normwords(author), repr(price))
+            else:
+                yield (normwords(textbook), normwords(author))
+
+def prices(html):
+    """
+    Get the prices from a search result page
+    """
+    ps = [
+           p.getparent().text_content().split()[0]
+             for p in html.xpath("//p/input[@type='checkbox']")
+         ]
+
+    try:
+        amts, stats = zip(*list(reversed(list(grouper(2, ps)))))
+        return map(Price, amts, stats)
+    except ValueError:
+        return []
+
+def textbookInfo(dept, code, withPrices=False):
+    """
+    Return all the textbooks for a course
+    """
+    return list(books(dept, code, withPrices))
+
+def humanities():
+    """
+    Download humanities syllabi
+    """
+    return []
+
+# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code)
+# print list(courseInfo("PSYCH", "2B03"))
--- a/openlibrary.py
+++ b/openlibrary.py
@ -0,0 +1,24 @@
+#! /usr/bin/python2
+
+from urllib import quote
+from json import loads, dumps
+
+import requests as req
+
+#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s"
+searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s'
+
+def bookUrls(title, author):
+    print title, author
+    if ":" in title:
+        title = title.split(":")[0]
+    requrl = searchurl % (quote(author), quote(title))
+    results = loads(req.get(requrl).text)
+    for result in results["docs"][0:2]:
+        if result.has_key("edition_key"):
+            yield "https://openlibrary.org/books/%s" % result["edition_key"][0]
+
+# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle'
+
+#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"):
+    #print book
--- a/predictions.py
+++ b/predictions.py
@ -0,0 +1,153 @@
+##! /usr/bin/python2
+from itertools import groupby, chain
+from sys import stdout
+from functools import partial
+from json import dumps
+
+def gensymer():
+    n = [0]
+    def inner():
+        result = str(n[0])
+        n[0] += 1
+        return result
+    return inner
+
+gensym = gensymer()
+
+def printTrie(graph, prev, trie, weight):
+    new_node = str(gensym())
+    graph.node(new_node, "%s" % trie.letter)
+    graph.edge(prev, new_node, label="%.2f" % weight)
+    if not trie.children:
+        return
+    for child, weight in zip(trie.children, trie.ws):
+        printTrie(graph, new_node, child, weight)
+
+
+class Trie(object):
+    def __init__(self, letter, children, ws):
+        self.letter = letter
+        self.children = children
+        self.ws = ws
+
+def probweight(suffixes):
+    weights = [float(s["value"]) for s in suffixes]
+    s = float(sum(weights))
+    ws = [w/s for w in weights]
+    return ws
+
+def buildtrie(trie, suffixes):
+    """
+    Build a trie, also known as a prefix tree, of all the possible completions
+    """
+    trie.children = []
+    for letter, suffs in suffixes:
+        ped = partition(suffs)
+        if any(map(lambda p: p[0], ped)):
+            # check if there are any children
+            trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs)))
+        else:
+            # we've reached the end of this word so just include the final letter
+            # [1] = there is a probability of 1 of reaching this single leaf node,
+            # since it is the only possible completion here
+            trie.children.append(Trie(letter, [], [1]))
+    return trie
+
+
+def keyf(x):
+    if not x["key"]:
+        return ""
+    return x["key"][0]
+
+def tails(words):
+    for word in words:
+        yield {
+               "key" : word["key"][1:],
+               "value" : word["value"]
+               }
+
+def partition(words):
+    """
+    Partition the words into different prefixes based on the first character
+    """
+    groups = [
+            (g[0], list(tails(g[1])))
+                for g in groupby(
+                    sorted(words, key=keyf),
+                    key=keyf)
+             ]
+    return groups
+
+
+def flatten_helper(letter, trie):
+    return ([letter + child.letter for
+            child in trie.children], trie.children)
+
+def flatten(trie):
+    if not trie.children:
+        return trie.letter
+    prefixes, suffixes = flatten_helper(trie.letter, trie)
+    return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)]
+
+def flattenlist(xs):
+    locs = []
+    for x in xs:
+        if not isinstance(x, list):
+            locs.append(x)
+        else:
+            locs.extend(flattenlist(x))
+    return locs
+
+def matchc(trie, prefix):
+    c = None
+    if len(prefix) > 1:
+        c = prefix[0]
+    else:
+        c = prefix
+    return [ch for ch in trie.children if ch.letter == c]
+
+def match(trie, word):
+    if not word:
+        return []
+    m = matchc(trie, word[0])
+    if not m:
+        return []
+    else:
+        return [m[0]] + match(m[0], word[1:])
+
+def complete(trie, word):
+    m = match(trie, word)
+    if len(word) != len(m):
+        return False
+    completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))]
+    if len(completions) > 10:
+        return dumps(completions[0:10])
+    return dumps(completions)
+
+def sortTrie(trie):
+    """
+    Sort the children of each node in descending order
+    of the probability that each child would be the completion
+    of whatever that word is
+    """
+    if not trie.children:
+        return
+    sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True)
+    trie.children = [x[0] for x in sortedChilds]
+    trie.ws = [x[1] for x in sortedChilds]
+    for child in trie.children:
+        sortTrie(child)
+
+def toTrie(words):
+    for word in words:
+        word["key"] = word["key"].lower()
+    trie = buildtrie(Trie("", [], [1]), partition(words))
+    trie.ws = [1]*len(trie.children)
+    sortTrie(trie)
+    return trie
+
+def testkey(w):
+    return {
+            "key" : w,
+            "value" : "1"
+            }
--- a/schemadsl.rkt
+++ b/schemadsl.rkt
@ -1,6 +1,5 @@
 #lang racket
-; This file is used to generate the elasticsearch mapping
-; It is written in Racket (a dialect of Scheme)
+
 (require json)

 (define (root name type)
--- a/search.py
+++ b/search.py
@ -0,0 +1,237 @@
+#! /usr/bin/python2
+
+import elasticsearch
+
+from elasticsearch_dsl import FacetedSearch, Search, Q
+from elasticsearch_dsl.aggs import Terms, DateHistogram
+from sys import exit, stderr
+from json import dumps, loads
+from itertools import chain, imap
+
+from hashlib import sha1
+
+from textbookExceptions import UnIndexable
+
+from mcmaster.classes import allCourses
+
+# Generic instance of elasticsearch right now
+es = elasticsearch.Elasticsearch()
+
+def summarize(text):
+    splitted = text.split(" ")
+    if len(splitted) > 4:
+        return " ".join(splitted[0:4]) + ".."
+    return text
+
+def sectionToJSON(section):
+    return {
+            "prof" : section.prof,
+            "sem"  : section.sem,
+            "day"  : section.day
+            }
+
+def classToJSON(clss):
+    return {
+            "title"    : clss.title,
+            "sections" : map(sectionToJSON, clss.sections),
+            "dept"     : clss.dept,
+            "code"     : clss.code,
+            "books"    : list(clss.books) if clss.books else []
+            }
+
+
+def truncate(docid):
+    """
+    Truncate a document id to 12 digits
+    The document ID should be based on a
+    hash of unique identifiers
+    """
+    return int(str(docid)[0:12])
+
+def hashsec(course):
+    """
+    Hash a course into a usable id
+    """
+    if not course["code"]:
+        code = ""
+    else:
+        code = course["code"]
+    if not course["title"]:
+        title = ""
+    else:
+        title = course["title"]
+
+    if not course["sections"] or len(course["sections"]) < 1:
+        course["sections"][0] = ""
+
+    if not (code or title):
+        raise UnIndexable(course)
+
+    h = sha1()
+    h.update(code + title + course["sections"][0]["sem"])
+    return int(h.hexdigest(), 16)
+
+def createIndex(name):
+    """
+    This creates a new index in elasticsearch
+    An index is like a schema in a regular database
+    Create an elasticsearch index
+
+    """
+    indices = elasticsearch.client.IndicesClient(es)
+
+    print indices.create(name)
+    with open("./course.json", "r") as mapping:
+        print indices.put_mapping("course", loads(mapping.read()), name)
+
+def indexListing(course):
+    """
+    Index a specific course in the database (using the courses index)
+    example,
+    {
+     'books': [],
+     'dept': 'COLLAB',
+     'code': '2C03',
+     'sections': [
+                    {
+                     'prof': 'Lisa Pender',
+                     'sem': '2015/09/08 - 2015/12/08',
+                     'day': 'Mo'
+                     },
+                     {
+                      'prof': 'Staff',
+                      'sem': '2015/09/08 - 2015/12/08',
+                      'day': 'Th'
+                      }
+                  ],
+     'title': 'COLLAB 2C03 - Sociology I'
+     }
+
+    """
+    courseID = hashsec(course)
+    print es.index(index="oersearch",
+            doc_type="course",
+            id=courseID,
+            body=course)
+
+    # For every course we index, we also create a resource for it
+    # This should be an idempotent operation because we're putting it in couchdb
+    # And we're using the id obtained from the hash function, so it should just update the document
+    # no need to delete anything
+    #try:
+        #courseDept = course[0]["title"].strip().split(" ")[0].strip()
+        #courseCode = course[0]["title"].strip().split(" ")[1].strip()
+        #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode)
+        #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID)
+    #except:
+        #print "Couldn't create the resource associated with %s" % course
+
+def termSearch(field):
+    """
+    Make a term search (exact match)
+    """
+    def t(term):
+        q = Q("term",
+                **{
+                    "sections."+field : term
+                    })
+        return q
+    return t
+
+def search(field):
+    """
+    Make a match search
+    """
+    def s(term):
+        q = Q("match",
+                 **{
+                     field : term
+                    })
+        return q
+    return s
+
+def join(x, y):
+    """
+    Join two queries
+    """
+    return x & y
+
+def filterSections(secs):
+    """
+    Get rid of tutorial sections
+    because they almost always have "Staff" as the instructor
+    This is just a heuristic of course
+    """
+    filtered = [s for s in secs.sections if "Staff" not in s.prof]
+    if len(filtered) > 0:
+        return filtered
+    return False
+
+def searchTerms(terms):
+    """
+    Run a search for courses
+    """
+
+    # A list of all the queries we want to run
+    qs = [searchers[field](term) for
+            field, term in
+            terms.iteritems() if
+                term and searchers.has_key(field)]
+
+    if not qs:
+        # No queries = no results
+        return dumps([])
+
+    # Reduce joins all of the queries into one query
+    # It will search for the conjunction of all of them
+    # So that means it cares about each query equally
+    q = reduce(join, qs)
+
+    s = (Search(using=es, index="oersearch")
+        .query(q))[0:100] # only return up to 100 results for now
+
+    results = s.execute()
+
+    filtered = [
+                 (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials
+                  for secs in results
+                    if filterSections(secs)
+               ]
+    results = []
+    for obj, secs in filtered:
+        # Add the truncated course id
+        # This is used to point to the resource page for that course
+        secs["id"] = truncate(obj.meta.id)
+        secs["title"] = obj.title
+        if obj["dept"] not in secs["title"]:
+            secs["dept"] = obj.dept
+        if obj.books:
+            secs["books"] = [
+                             {
+                               "booktitle"  : summarize(book[0].encode("ASCII")),
+                               "bookauthor" : book[1].encode("ASCII"),
+                               "bookprice"  : book[2].encode("ASCII")
+                             }
+                                for book in obj.books
+                            ]
+        else:
+            secs["books"] = ""
+        results.append(secs)
+
+    return dumps(results)
+
+
+searchers = {
+    "title" : search("title"),
+    "loc"   : search("loc"),
+    "time"  : search("time"),
+    "prof"  : search("prof"),
+    "day"   : search("day"),
+    }
+
+#print searchTerms({"title" : "PHILOS"})
+
+#for c in imap(classToJSON, allCourses()):
+    #try:
+        #print indexListing(c)
+    #except UnIndexable as e:
--- a/src/archive.py
+++ b/src/archive.py
@ -0,0 +1,34 @@
+#! /usr/bin/python2
+
+from urllib import quote
+from json import loads, dumps
+
+import requests as req
+
+searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw"
+
+def searchIA(title, author):
+    """
+    Do a search on The Internet Archive for a book
+    """
+    print "running a search"
+    requrl = searchUrl.format(quote(title + " " + author))
+    try:
+        results = loads(req.get(requrl).text[9:][0:-1])
+    except ValueError:
+        return []
+
+    rownum = results["responseHeader"]["params"]["rows"]
+    if rownum < 1:
+        print "Couldn't find results for %s %s" % (title, author)
+        return []
+    docs = results["response"]["docs"]
+    urls = []
+    for result in results["response"]["docs"][0:3]:
+        urls.append("https://archive.org/details/%s" % result["identifier"])
+    return urls
+
+
+# Example, search for David Hume's Enquiry Concerning Human Understanding
+#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"):
+    #print url
--- a/src/database.py
+++ b/src/database.py
@ -0,0 +1,62 @@
+#! /usr/bin/python2
+
+from sys import argv
+from hashlib import sha1
+
+def truncate(docid):
+    """
+    Truncate a document id to 12 digits
+    The document ID should be based on a
+    hash of unique identifiers
+    """
+    return int(str(docid)[0:12])
+
+def createResource(textbookInfo, course, dept, coursecode, docid):
+    """
+    Create a document associated with a course
+    This document contains any/all resources associated
+    with that course
+
+    example,
+    {
+     'books': [],
+     'dept': 'COLLAB',
+     'code': '2C03',
+     'sections': [
+                    {
+                     'prof': 'Lisa Pender',
+                     'sem': '2015/09/08 - 2015/12/08',
+                     'day': 'Mo'
+                     },
+                     {
+                      'prof': 'Staff',
+                      'sem': '2015/09/08 - 2015/12/08',
+                      'day': 'Th'
+                      }
+                  ],
+     'title': 'COLLAB 2C03 - Sociology I'
+     }
+    """
+    textbooks = textbookInfo(dept.strip(), coursecode.strip())
+
+    # We truncate the id so we can have nicer looking URLs
+    # Since the id will be used to point to the resource page for that course
+    _id = str(truncate(docid))
+
+    fields = {
+            "_id" : _id,
+            "textbooks" : textbooks,
+            "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()),
+            "courseinfo" : course
+            #"Syllabus" : "blah"
+            }
+    try:
+        revisions = list(localdb.revisions(_id))
+        if not revisions:
+            return localdb.save(fields)
+        else:
+            rev = dict(revisions[0])["_rev"]
+            fields["_rev"] = rev
+            return localdb.save(fields)
+    except ResourceConflict:
+        print "Resource for %s already exists, not creating a new one" % (docid)
--- a/src/goasearch.py
+++ b/src/goasearch.py
@ -0,0 +1,14 @@
+#! /usr/bin/python2
+
+# predictive data
+# switch to elasticsearch's prediction
+
+
+
+import database
+import predictions
+
+class GOASearch(object):
+    def __init__(self):
+        return self
+
--- a/src/openlibrary.py
+++ b/src/openlibrary.py
@ -0,0 +1,24 @@
+#! /usr/bin/python2
+
+from urllib import quote
+from json import loads, dumps
+
+import requests as req
+
+#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s"
+searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s'
+
+def bookUrls(title, author):
+    print title, author
+    if ":" in title:
+        title = title.split(":")[0]
+    requrl = searchurl % (quote(author), quote(title))
+    results = loads(req.get(requrl).text)
+    for result in results["docs"][0:2]:
+        if result.has_key("edition_key"):
+            yield "https://openlibrary.org/books/%s" % result["edition_key"][0]
+
+# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle'
+
+#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"):
+    #print book
--- a/src/predictions.py
+++ b/src/predictions.py
@ -0,0 +1,153 @@
+##! /usr/bin/python2
+from itertools import groupby, chain
+from sys import stdout
+from functools import partial
+from json import dumps
+
+def gensymer():
+    n = [0]
+    def inner():
+        result = str(n[0])
+        n[0] += 1
+        return result
+    return inner
+
+gensym = gensymer()
+
+def printTrie(graph, prev, trie, weight):
+    new_node = str(gensym())
+    graph.node(new_node, "%s" % trie.letter)
+    graph.edge(prev, new_node, label="%.2f" % weight)
+    if not trie.children:
+        return
+    for child, weight in zip(trie.children, trie.ws):
+        printTrie(graph, new_node, child, weight)
+
+
+class Trie(object):
+    def __init__(self, letter, children, ws):
+        self.letter = letter
+        self.children = children
+        self.ws = ws
+
+def probweight(suffixes):
+    weights = [float(s["value"]) for s in suffixes]
+    s = float(sum(weights))
+    ws = [w/s for w in weights]
+    return ws
+
+def buildtrie(trie, suffixes):
+    """
+    Build a trie, also known as a prefix tree, of all the possible completions
+    """
+    trie.children = []
+    for letter, suffs in suffixes:
+        ped = partition(suffs)
+        if any(map(lambda p: p[0], ped)):
+            # check if there are any children
+            trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs)))
+        else:
+            # we've reached the end of this word so just include the final letter
+            # [1] = there is a probability of 1 of reaching this single leaf node,
+            # since it is the only possible completion here
+            trie.children.append(Trie(letter, [], [1]))
+    return trie
+
+
+def keyf(x):
+    if not x["key"]:
+        return ""
+    return x["key"][0]
+
+def tails(words):
+    for word in words:
+        yield {
+               "key" : word["key"][1:],
+               "value" : word["value"]
+               }
+
+def partition(words):
+    """
+    Partition the words into different prefixes based on the first character
+    """
+    groups = [
+            (g[0], list(tails(g[1])))
+                for g in groupby(
+                    sorted(words, key=keyf),
+                    key=keyf)
+             ]
+    return groups
+
+
+def flatten_helper(letter, trie):
+    return ([letter + child.letter for
+            child in trie.children], trie.children)
+
+def flatten(trie):
+    if not trie.children:
+        return trie.letter
+    prefixes, suffixes = flatten_helper(trie.letter, trie)
+    return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)]
+
+def flattenlist(xs):
+    locs = []
+    for x in xs:
+        if not isinstance(x, list):
+            locs.append(x)
+        else:
+            locs.extend(flattenlist(x))
+    return locs
+
+def matchc(trie, prefix):
+    c = None
+    if len(prefix) > 1:
+        c = prefix[0]
+    else:
+        c = prefix
+    return [ch for ch in trie.children if ch.letter == c]
+
+def match(trie, word):
+    if not word:
+        return []
+    m = matchc(trie, word[0])
+    if not m:
+        return []
+    else:
+        return [m[0]] + match(m[0], word[1:])
+
+def complete(trie, word):
+    m = match(trie, word)
+    if len(word) != len(m):
+        return False
+    completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))]
+    if len(completions) > 10:
+        return dumps(completions[0:10])
+    return dumps(completions)
+
+def sortTrie(trie):
+    """
+    Sort the children of each node in descending order
+    of the probability that each child would be the completion
+    of whatever that word is
+    """
+    if not trie.children:
+        return
+    sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True)
+    trie.children = [x[0] for x in sortedChilds]
+    trie.ws = [x[1] for x in sortedChilds]
+    for child in trie.children:
+        sortTrie(child)
+
+def toTrie(words):
+    for word in words:
+        word["key"] = word["key"].lower()
+    trie = buildtrie(Trie("", [], [1]), partition(words))
+    trie.ws = [1]*len(trie.children)
+    sortTrie(trie)
+    return trie
+
+def testkey(w):
+    return {
+            "key" : w,
+            "value" : "1"
+            }
--- a/src/search.py
+++ b/src/search.py
@ -0,0 +1,237 @@
+#! /usr/bin/python2
+
+import elasticsearch
+
+from elasticsearch_dsl import FacetedSearch, Search, Q
+from elasticsearch_dsl.aggs import Terms, DateHistogram
+from sys import exit, stderr
+from json import dumps, loads
+from itertools import chain, imap
+
+from hashlib import sha1
+
+from textbookExceptions import UnIndexable
+
+from mcmaster.classes import allCourses
+
+# Generic instance of elasticsearch right now
+es = elasticsearch.Elasticsearch()
+
+def summarize(text):
+    splitted = text.split(" ")
+    if len(splitted) > 4:
+        return " ".join(splitted[0:4]) + ".."
+    return text
+
+def sectionToJSON(section):
+    return {
+            "prof" : section.prof,
+            "sem"  : section.sem,
+            "day"  : section.day
+            }
+
+def classToJSON(clss):
+    return {
+            "title"    : clss.title,
+            "sections" : map(sectionToJSON, clss.sections),
+            "dept"     : clss.dept,
+            "code"     : clss.code,
+            "books"    : list(clss.books) if clss.books else []
+            }
+
+
+def truncate(docid):
+    """
+    Truncate a document id to 12 digits
+    The document ID should be based on a
+    hash of unique identifiers
+    """
+    return int(str(docid)[0:12])
+
+def hashsec(course):
+    """
+    Hash a course into a usable id
+    """
+    if not course["code"]:
+        code = ""
+    else:
+        code = course["code"]
+    if not course["title"]:
+        title = ""
+    else:
+        title = course["title"]
+
+    if not course["sections"] or len(course["sections"]) < 1:
+        course["sections"][0] = ""
+
+    if not (code or title):
+        raise UnIndexable(course)
+
+    h = sha1()
+    h.update(code + title + course["sections"][0]["sem"])
+    return int(h.hexdigest(), 16)
+
+def createIndex(name):
+    """
+    This creates a new index in elasticsearch
+    An index is like a schema in a regular database
+    Create an elasticsearch index
+
+    """
+    indices = elasticsearch.client.IndicesClient(es)
+
+    print indices.create(name)
+    with open("./course.json", "r") as mapping:
+        print indices.put_mapping("course", loads(mapping.read()), name)
+
+def indexListing(course):
+    """
+    Index a specific course in the database (using the courses index)
+    example,
+    {
+     'books': [],
+     'dept': 'COLLAB',
+     'code': '2C03',
+     'sections': [
+                    {
+                     'prof': 'Lisa Pender',
+                     'sem': '2015/09/08 - 2015/12/08',
+                     'day': 'Mo'
+                     },
+                     {
+                      'prof': 'Staff',
+                      'sem': '2015/09/08 - 2015/12/08',
+                      'day': 'Th'
+                      }
+                  ],
+     'title': 'COLLAB 2C03 - Sociology I'
+     }
+
+    """
+    courseID = hashsec(course)
+    print es.index(index="oersearch",
+            doc_type="course",
+            id=courseID,
+            body=course)
+
+    # For every course we index, we also create a resource for it
+    # This should be an idempotent operation because we're putting it in couchdb
+    # And we're using the id obtained from the hash function, so it should just update the document
+    # no need to delete anything
+    #try:
+        #courseDept = course[0]["title"].strip().split(" ")[0].strip()
+        #courseCode = course[0]["title"].strip().split(" ")[1].strip()
+        #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode)
+        #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID)
+    #except:
+        #print "Couldn't create the resource associated with %s" % course
+
+def termSearch(field):
+    """
+    Make a term search (exact match)
+    """
+    def t(term):
+        q = Q("term",
+                **{
+                    "sections."+field : term
+                    })
+        return q
+    return t
+
+def search(field):
+    """
+    Make a match search
+    """
+    def s(term):
+        q = Q("match",
+                 **{
+                     field : term
+                    })
+        return q
+    return s
+
+def join(x, y):
+    """
+    Join two queries
+    """
+    return x & y
+
+def filterSections(secs):
+    """
+    Get rid of tutorial sections
+    because they almost always have "Staff" as the instructor
+    This is just a heuristic of course
+    """
+    filtered = [s for s in secs.sections if "Staff" not in s.prof]
+    if len(filtered) > 0:
+        return filtered
+    return False
+
+def searchTerms(terms):
+    """
+    Run a search for courses
+    """
+
+    # A list of all the queries we want to run
+    qs = [searchers[field](term) for
+            field, term in
+            terms.iteritems() if
+                term and searchers.has_key(field)]
+
+    if not qs:
+        # No queries = no results
+        return dumps([])
+
+    # Reduce joins all of the queries into one query
+    # It will search for the conjunction of all of them
+    # So that means it cares about each query equally
+    q = reduce(join, qs)
+
+    s = (Search(using=es, index="oersearch")
+        .query(q))[0:100] # only return up to 100 results for now
+
+    results = s.execute()
+
+    filtered = [
+                 (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials
+                  for secs in results
+                    if filterSections(secs)
+               ]
+    results = []
+    for obj, secs in filtered:
+        # Add the truncated course id
+        # This is used to point to the resource page for that course
+        secs["id"] = truncate(obj.meta.id)
+        secs["title"] = obj.title
+        if obj["dept"] not in secs["title"]:
+            secs["dept"] = obj.dept
+        if obj.books:
+            secs["books"] = [
+                             {
+                               "booktitle"  : summarize(book[0].encode("ASCII")),
+                               "bookauthor" : book[1].encode("ASCII"),
+                               "bookprice"  : book[2].encode("ASCII")
+                             }
+                                for book in obj.books
+                            ]
+        else:
+            secs["books"] = ""
+        results.append(secs)
+
+    return dumps(results)
+
+
+searchers = {
+    "title" : search("title"),
+    "loc"   : search("loc"),
+    "time"  : search("time"),
+    "prof"  : search("prof"),
+    "day"   : search("day"),
+    }
+
+#print searchTerms({"title" : "PHILOS"})
+
+#for c in imap(classToJSON, allCourses()):
+    #try:
+        #print indexListing(c)
+    #except UnIndexable as e:
--- a/src/textbookExceptions.py
+++ b/src/textbookExceptions.py
@ -0,0 +1,24 @@
+#! /usr/bin/python2
+
+class UnIndexable(Exception):
+    def __init__(self, course):
+        self.course = course
+
+    @property
+    def reason(self):
+        course = self.course
+        if not course["code"] and not course["title"]:
+            message = "there was no course code and no title defined"
+        if not course["code"]:
+            message = "there was no course code defined"
+        if not course["title"]:
+            message = "there was no course title defined"
+        if not course["sections"]:
+            message = "there were no sections defined"
+        return """
+        There was a problem with indexing this course.
+        %s
+        There could be several reasons why, my best guess is that %s
+        We need at least the course code, title, and one or more sections to index
+
+        """ % (course, message)
--- a/src/visualize.py
+++ b/src/visualize.py
@ -0,0 +1,97 @@
+#! /usr/bin/python2
+
+from json import loads, load
+from re import sub, split
+from itertools import groupby
+from numpy import mean
+from operator import attrgetter
+
+import pygal
+import csv
+
+class Textbook(object):
+    def __init__(self, dept, code, title, author, price):
+        self.dept = dept
+        self.code = code
+        self.title = title
+        self.author = author
+        self.price = float(price)
+
+    def __repr__(self):
+        return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept,
+                                                              self.code,
+                                                              self.title,
+                                                              self.author,
+                                                              self.price)
+
+
+def courses():
+    with open("./books.csv", "r") as books:
+        booksreader = csv.reader(books)
+        for row in booksreader:
+            yield row
+
+
+def groupDept(courselist):
+    sortedCourses = sorted(courselist, key=attrgetter("dept"))
+    for course in groupby(sortedCourses, attrgetter("dept")):
+        yield course[0], list(course[1])
+
+def meanPrice(books):
+    return mean([book.price for book in books])
+
+# Questions,
+# mean cost per department
+# mean cost per faculty
+# mean difference between book store copies and other copies per dept and faculty
+# number of overlapping books per faculty, do eng students benefit from that?
+
+# maybe a survey for students to see how often they buy books from other sources
+# correlate with how much they could be saving?
+
+facultyDesc = {
+        "hum" : "Humanities",
+        "bus" : "Business",
+        "hlth" : "Health Science",
+        "eng" : "Engineering",
+        "sci" : "Science",
+        "socsci" : "Social Sciences",
+        "artsci" : "Arts & Sciences",
+        "meld" : "MELD"
+}
+
+faculties = load(open("./faculties.json"))
+
+def categorize(dept):
+    # faculties
+    return facultyDesc.get(faculties.get(dept, False), False)
+
+def byFaculty():
+    for dept, books in groupDept(courses()):
+        yield (categorize(dept), dept, books)
+
+def meanFacultyCosts():
+    byfac = list(byFaculty())
+    graph = pygal.Bar()
+    graph.title = "Mean textbook cost by faculty"
+    sortedFacs = sorted(byfac, key=lambda x: x[0])
+    for fac in groupby(sortedFacs, lambda x: x[0]):
+        graph.add(fac[0], meanPrice(list(fac[1])[0][2]))
+    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
+    return graph.render(transpose=True)
+
+def meanCosts():
+    cs = groupDept(courses())
+    graph = pygal.Bar()
+    graph.title = "Mean textbook cost by department"
+    for c in cs:
+        dept, books = c
+        graph.add(dept, meanPrice(books))
+    #graph.render_to_file("./test_graph.svg")
+    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
+    return graph.render_table(style=True, transpose=True)
+
+for x in courses():
+    print x
+#print meanCosts()
+#print meanFacultyCosts()
--- a/src/website.py
+++ b/src/website.py
@ -0,0 +1,148 @@
+#! /usr/bin/python2
+from functools import partial
+from couchdb import ResourceConflict
+
+from flask import Flask, render_template, flash, request, send_from_directory
+from flask_bootstrap import Bootstrap
+from flask_appconfig import AppConfig
+from urllib import unquote
+from search import searchTerms
+
+from openlibrary import bookUrls
+
+from archive import searchIA
+from urllib import quote, unquote
+from json import dumps, loads
+
+from werkzeug.contrib.cache import MemcachedCache
+cache = MemcachedCache(['127.0.0.1:11211'])
+
+import os
+
+def predict(fieldtype, term):
+    print fieldtype
+    print term
+    if not term:
+        return "[]"
+    else:
+        try:
+            cs = completers[fieldtype](term.lower())
+        except KeyError:
+            return "[]"
+    if cs:
+        return cs
+    return "[]"
+
+def predictor(fieldtype):
+    def inner(request):
+        params = dict(request.args.items())
+        return predict(fieldtype, params["term"])
+    return inner
+
+def cacheit(key, thunk):
+    """
+    Tries to find a cached version of ``key''
+    If there is no cached version then it will
+    evaluate thunk (which must be a generator)
+    and cache that, then return the result
+    """
+    cached = cache.get(quote(key))
+    if cached is None:
+        result = list(thunk())
+        cache.set(quote(key), result)
+        return result
+    return cached
+
+def ClassSearch(configfile=None):
+    defaults = {"Day", "Building", "Exact Location", "Department"}
+    app = Flask(__name__)
+    AppConfig(app, configfile)  # Flask-Appconfig is not necessary, but
+                                # highly recommend =)
+                                # https://github.com/mbr/flask-appconfig
+    Bootstrap(app)
+
+    app.config["scripts"] = "/home/wes/MGOAL/scripts"
+    app.config["styles"] = "/home/wes/MGOAL/styles"
+
+    @app.route('/favicon.ico')
+    def favicon():
+        return send_from_directory("/srv/http/goal/favicon.ico",
+                                   'favicon.ico', mimetype='image/vnd.microsoft.icon')
+
+
+    @app.route("/buildpred", methods=("GET", "POST"))
+    def buildpred():
+        return predictbuild(request)
+
+    @app.route("/locpred", methods=("GET", "POST"))
+    def locpred():
+        return predictloc(request)
+
+    @app.route("/daypred", methods=("GET", "POST"))
+    def daypred():
+        return predictday(request)
+
+    @app.route("/deptpred", methods=("GET", "POST"))
+    def deptpred():
+        return predictdept(request)
+
+    @app.route("/titlepred", methods=("GET", "POST"))
+    def titlepred():
+        return predicttitle(request)
+
+    @app.route("/", methods=("GET", "POST"))
+    def index():
+        return render_template("search.html")
+
+    @app.route("/fc", methods=("GET", "POST"))
+    def fc():
+        """ Filter Courses """
+        print "trying to get courses"
+        params = dict(request.args.items())
+        for key, val in params.iteritems():
+            if val in defaults:
+                del params[key]
+        results = searchTerms(params)
+        return results
+
+    @app.route("/resources", methods=("GET", "POST"))
+    def resources():
+        """ Get Resources """
+        notRequired = False
+        params = loads(dict(request.args.items())["data"])
+        print params
+        author = params["author"]
+        title = params["title"]
+
+        if ("No Textbooks" in title or
+            "No Adoption" in title):
+            return dumps("false")
+
+        # Cache the result of the open library search
+        openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author))
+        print openlib
+
+        # cache the result of an internet archive search
+        iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author))
+        print iarchive
+
+        if not (any(openlib) or any(iarchive)):
+            # We literally could not find ANYTHING
+            return dumps("false")
+            
+        return dumps({
+                       "iarchive" : iarchive,
+                       "openlib" : openlib
+                     })
+
+    @app.route("/scripts/<filename>")
+    def send_script(filename):
+        return send_from_directory(app.config["scripts"], filename)
+
+    @app.route("/styles/<filename>")
+    def send_style(filename):
+        return send_from_directory(app.config["styles"], filename)
+    return app
+
+if __name__ == "__main__":
+    ClassSearch().run(port=8001, debug=True)
--- a/textbookExceptions.py
+++ b/textbookExceptions.py
@ -0,0 +1,24 @@
+#! /usr/bin/python2
+
+class UnIndexable(Exception):
+    def __init__(self, course):
+        self.course = course
+
+    @property
+    def reason(self):
+        course = self.course
+        if not course["code"] and not course["title"]:
+            message = "there was no course code and no title defined"
+        if not course["code"]:
+            message = "there was no course code defined"
+        if not course["title"]:
+            message = "there was no course title defined"
+        if not course["sections"]:
+            message = "there were no sections defined"
+        return """
+        There was a problem with indexing this course.
+        %s
+        There could be several reasons why, my best guess is that %s
+        We need at least the course code, title, and one or more sections to index
+
+        """ % (course, message)
--- a/visualize.py
+++ b/visualize.py
@ -0,0 +1,97 @@
+#! /usr/bin/python2
+
+from json import loads, load
+from re import sub, split
+from itertools import groupby
+from numpy import mean
+from operator import attrgetter
+
+import pygal
+import csv
+
+class Textbook(object):
+    def __init__(self, dept, code, title, author, price):
+        self.dept = dept
+        self.code = code
+        self.title = title
+        self.author = author
+        self.price = float(price)
+
+    def __repr__(self):
+        return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept,
+                                                              self.code,
+                                                              self.title,
+                                                              self.author,
+                                                              self.price)
+
+
+def courses():
+    with open("./books.csv", "r") as books:
+        booksreader = csv.reader(books)
+        for row in booksreader:
+            yield row
+
+
+def groupDept(courselist):
+    sortedCourses = sorted(courselist, key=attrgetter("dept"))
+    for course in groupby(sortedCourses, attrgetter("dept")):
+        yield course[0], list(course[1])
+
+def meanPrice(books):
+    return mean([book.price for book in books])
+
+# Questions,
+# mean cost per department
+# mean cost per faculty
+# mean difference between book store copies and other copies per dept and faculty
+# number of overlapping books per faculty, do eng students benefit from that?
+
+# maybe a survey for students to see how often they buy books from other sources
+# correlate with how much they could be saving?
+
+facultyDesc = {
+        "hum" : "Humanities",
+        "bus" : "Business",
+        "hlth" : "Health Science",
+        "eng" : "Engineering",
+        "sci" : "Science",
+        "socsci" : "Social Sciences",
+        "artsci" : "Arts & Sciences",
+        "meld" : "MELD"
+}
+
+faculties = load(open("./faculties.json"))
+
+def categorize(dept):
+    # faculties
+    return facultyDesc.get(faculties.get(dept, False), False)
+
+def byFaculty():
+    for dept, books in groupDept(courses()):
+        yield (categorize(dept), dept, books)
+
+def meanFacultyCosts():
+    byfac = list(byFaculty())
+    graph = pygal.Bar()
+    graph.title = "Mean textbook cost by faculty"
+    sortedFacs = sorted(byfac, key=lambda x: x[0])
+    for fac in groupby(sortedFacs, lambda x: x[0]):
+        graph.add(fac[0], meanPrice(list(fac[1])[0][2]))
+    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
+    return graph.render(transpose=True)
+
+def meanCosts():
+    cs = groupDept(courses())
+    graph = pygal.Bar()
+    graph.title = "Mean textbook cost by department"
+    for c in cs:
+        dept, books = c
+        graph.add(dept, meanPrice(books))
+    #graph.render_to_file("./test_graph.svg")
+    graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None"
+    return graph.render_table(style=True, transpose=True)
+
+for x in courses():
+    print x
+#print meanCosts()
+#print meanFacultyCosts()
--- a/website.py
+++ b/website.py
@ -0,0 +1,148 @@
+#! /usr/bin/python2
+from functools import partial
+from couchdb import ResourceConflict
+
+from flask import Flask, render_template, flash, request, send_from_directory
+from flask_bootstrap import Bootstrap
+from flask_appconfig import AppConfig
+from urllib import unquote
+from search import searchTerms
+
+from openlibrary import bookUrls
+
+from archive import searchIA
+from urllib import quote, unquote
+from json import dumps, loads
+
+from werkzeug.contrib.cache import MemcachedCache
+cache = MemcachedCache(['127.0.0.1:11211'])
+
+import os
+
+def predict(fieldtype, term):
+    print fieldtype
+    print term
+    if not term:
+        return "[]"
+    else:
+        try:
+            cs = completers[fieldtype](term.lower())
+        except KeyError:
+            return "[]"
+    if cs:
+        return cs
+    return "[]"
+
+def predictor(fieldtype):
+    def inner(request):
+        params = dict(request.args.items())
+        return predict(fieldtype, params["term"])
+    return inner
+
+def cacheit(key, thunk):
+    """
+    Tries to find a cached version of ``key''
+    If there is no cached version then it will
+    evaluate thunk (which must be a generator)
+    and cache that, then return the result
+    """
+    cached = cache.get(quote(key))
+    if cached is None:
+        result = list(thunk())
+        cache.set(quote(key), result)
+        return result
+    return cached
+
+def ClassSearch(configfile=None):
+    defaults = {"Day", "Building", "Exact Location", "Department"}
+    app = Flask(__name__)
+    AppConfig(app, configfile)  # Flask-Appconfig is not necessary, but
+                                # highly recommend =)
+                                # https://github.com/mbr/flask-appconfig
+    Bootstrap(app)
+
+    app.config["scripts"] = "/home/wes/MGOAL/scripts"
+    app.config["styles"] = "/home/wes/MGOAL/styles"
+
+    @app.route('/favicon.ico')
+    def favicon():
+        return send_from_directory("/srv/http/goal/favicon.ico",
+                                   'favicon.ico', mimetype='image/vnd.microsoft.icon')
+
+
+    @app.route("/buildpred", methods=("GET", "POST"))
+    def buildpred():
+        return predictbuild(request)
+
+    @app.route("/locpred", methods=("GET", "POST"))
+    def locpred():
+        return predictloc(request)
+
+    @app.route("/daypred", methods=("GET", "POST"))
+    def daypred():
+        return predictday(request)
+
+    @app.route("/deptpred", methods=("GET", "POST"))
+    def deptpred():
+        return predictdept(request)
+
+    @app.route("/titlepred", methods=("GET", "POST"))
+    def titlepred():
+        return predicttitle(request)
+
+    @app.route("/", methods=("GET", "POST"))
+    def index():
+        return render_template("search.html")
+
+    @app.route("/fc", methods=("GET", "POST"))
+    def fc():
+        """ Filter Courses """
+        print "trying to get courses"
+        params = dict(request.args.items())
+        for key, val in params.iteritems():
+            if val in defaults:
+                del params[key]
+        results = searchTerms(params)
+        return results
+
+    @app.route("/resources", methods=("GET", "POST"))
+    def resources():
+        """ Get Resources """
+        notRequired = False
+        params = loads(dict(request.args.items())["data"])
+        print params
+        author = params["author"]
+        title = params["title"]
+
+        if ("No Textbooks" in title or
+            "No Adoption" in title):
+            return dumps("false")
+
+        # Cache the result of the open library search
+        openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author))
+        print openlib
+
+        # cache the result of an internet archive search
+        iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author))
+        print iarchive
+
+        if not (any(openlib) or any(iarchive)):
+            # We literally could not find ANYTHING
+            return dumps("false")
+            
+        return dumps({
+                       "iarchive" : iarchive,
+                       "openlib" : openlib
+                     })
+
+    @app.route("/scripts/<filename>")
+    def send_script(filename):
+        return send_from_directory(app.config["scripts"], filename)
+
+    @app.route("/styles/<filename>")
+    def send_style(filename):
+        return send_from_directory(app.config["styles"], filename)
+    return app
+
+if __name__ == "__main__":
+    ClassSearch().run(port=8001, debug=True)