diff --git a/archive.py b/archive.py deleted file mode 100755 index 73fcde7..0000000 --- a/archive.py +++ /dev/null @@ -1,34 +0,0 @@ -#! /usr/bin/python2 - -from urllib import quote -from json import loads, dumps - -import requests as req - -searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw" - -def searchIA(title, author): - """ - Do a search on The Internet Archive for a book - """ - print "running a search" - requrl = searchUrl.format(quote(title + " " + author)) - try: - results = loads(req.get(requrl).text[9:][0:-1]) - except ValueError: - return [] - - rownum = results["responseHeader"]["params"]["rows"] - if rownum < 1: - print "Couldn't find results for %s %s" % (title, author) - return [] - docs = results["response"]["docs"] - urls = [] - for result in results["response"]["docs"][0:3]: - urls.append("https://archive.org/details/%s" % result["identifier"]) - return urls - - -# Example, search for David Hume's Enquiry Concerning Human Understanding -#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"): - #print url diff --git a/database.py b/database.py deleted file mode 100755 index a19272c..0000000 --- a/database.py +++ /dev/null @@ -1,62 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from hashlib import sha1 - -def truncate(docid): - """ - Truncate a document id to 12 digits - The document ID should be based on a - hash of unique identifiers - """ - return int(str(docid)[0:12]) - -def createResource(textbookInfo, course, dept, coursecode, docid): - """ - Create a document associated with a course - This document contains any/all resources associated - with that course - - example, - { - 'books': [], - 'dept': 'COLLAB', - 'code': '2C03', - 'sections': [ - { - 'prof': 'Lisa Pender', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Mo' - }, - { - 'prof': 'Staff', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Th' - } - ], - 'title': 'COLLAB 2C03 - Sociology I' - } - """ - textbooks = textbookInfo(dept.strip(), coursecode.strip()) - - # We truncate the id so we can have nicer looking URLs - # Since the id will be used to point to the resource page for that course - _id = str(truncate(docid)) - - fields = { - "_id" : _id, - "textbooks" : textbooks, - "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()), - "courseinfo" : course - #"Syllabus" : "blah" - } - try: - revisions = list(localdb.revisions(_id)) - if not revisions: - return localdb.save(fields) - else: - rev = dict(revisions[0])["_rev"] - fields["_rev"] = rev - return localdb.save(fields) - except ResourceConflict: - print "Resource for %s already exists, not creating a new one" % (docid) diff --git a/goasearch.py b/goasearch.py deleted file mode 100755 index 3dca7eb..0000000 --- a/goasearch.py +++ /dev/null @@ -1,14 +0,0 @@ -#! /usr/bin/python2 - -# predictive data -# switch to elasticsearch's prediction - - - -import database -import predictions - -class GOASearch(object): - def __init__(self): - return self - diff --git a/openlibrary.py b/openlibrary.py deleted file mode 100755 index d558c21..0000000 --- a/openlibrary.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python2 - -from urllib import quote -from json import loads, dumps - -import requests as req - -#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s" -searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s' - -def bookUrls(title, author): - print title, author - if ":" in title: - title = title.split(":")[0] - requrl = searchurl % (quote(author), quote(title)) - results = loads(req.get(requrl).text) - for result in results["docs"][0:2]: - if result.has_key("edition_key"): - yield "https://openlibrary.org/books/%s" % result["edition_key"][0] - -# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle' - -#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"): - #print book diff --git a/predictions.py b/predictions.py deleted file mode 100755 index b770a0b..0000000 --- a/predictions.py +++ /dev/null @@ -1,153 +0,0 @@ -##! /usr/bin/python2 -from itertools import groupby, chain -from sys import stdout -from functools import partial -from json import dumps - -def gensymer(): - n = [0] - def inner(): - result = str(n[0]) - n[0] += 1 - return result - return inner - -gensym = gensymer() - -def printTrie(graph, prev, trie, weight): - new_node = str(gensym()) - graph.node(new_node, "%s" % trie.letter) - graph.edge(prev, new_node, label="%.2f" % weight) - if not trie.children: - return - for child, weight in zip(trie.children, trie.ws): - printTrie(graph, new_node, child, weight) - - -class Trie(object): - def __init__(self, letter, children, ws): - self.letter = letter - self.children = children - self.ws = ws - -def probweight(suffixes): - weights = [float(s["value"]) for s in suffixes] - s = float(sum(weights)) - ws = [w/s for w in weights] - return ws - -def buildtrie(trie, suffixes): - """ - Build a trie, also known as a prefix tree, of all the possible completions - """ - trie.children = [] - for letter, suffs in suffixes: - ped = partition(suffs) - if any(map(lambda p: p[0], ped)): - # check if there are any children - trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs))) - else: - # we've reached the end of this word so just include the final letter - # [1] = there is a probability of 1 of reaching this single leaf node, - # since it is the only possible completion here - trie.children.append(Trie(letter, [], [1])) - return trie - - -def keyf(x): - if not x["key"]: - return "" - return x["key"][0] - -def tails(words): - for word in words: - yield { - "key" : word["key"][1:], - "value" : word["value"] - } - -def partition(words): - """ - Partition the words into different prefixes based on the first character - """ - groups = [ - (g[0], list(tails(g[1]))) - for g in groupby( - sorted(words, key=keyf), - key=keyf) - ] - return groups - - -def flatten_helper(letter, trie): - return ([letter + child.letter for - child in trie.children], trie.children) - -def flatten(trie): - if not trie.children: - return trie.letter - prefixes, suffixes = flatten_helper(trie.letter, trie) - return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)] - -def flattenlist(xs): - locs = [] - for x in xs: - if not isinstance(x, list): - locs.append(x) - else: - locs.extend(flattenlist(x)) - return locs - -def matchc(trie, prefix): - c = None - if len(prefix) > 1: - c = prefix[0] - else: - c = prefix - return [ch for ch in trie.children if ch.letter == c] - -def match(trie, word): - if not word: - return [] - m = matchc(trie, word[0]) - if not m: - return [] - else: - return [m[0]] + match(m[0], word[1:]) - -def complete(trie, word): - m = match(trie, word) - if len(word) != len(m): - return False - completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))] - if len(completions) > 10: - return dumps(completions[0:10]) - return dumps(completions) - -def sortTrie(trie): - """ - Sort the children of each node in descending order - of the probability that each child would be the completion - of whatever that word is - """ - if not trie.children: - return - sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True) - trie.children = [x[0] for x in sortedChilds] - trie.ws = [x[1] for x in sortedChilds] - for child in trie.children: - sortTrie(child) - -def toTrie(words): - for word in words: - word["key"] = word["key"].lower() - trie = buildtrie(Trie("", [], [1]), partition(words)) - trie.ws = [1]*len(trie.children) - sortTrie(trie) - return trie - -def testkey(w): - return { - "key" : w, - "value" : "1" - } diff --git a/search.py b/search.py deleted file mode 100755 index 777222f..0000000 --- a/search.py +++ /dev/null @@ -1,237 +0,0 @@ -#! /usr/bin/python2 - -import elasticsearch - -from elasticsearch_dsl import FacetedSearch, Search, Q -from elasticsearch_dsl.aggs import Terms, DateHistogram -from sys import exit, stderr -from json import dumps, loads -from itertools import chain, imap - -from hashlib import sha1 - -from textbookExceptions import UnIndexable - -from mcmaster.classes import allCourses - -# Generic instance of elasticsearch right now -es = elasticsearch.Elasticsearch() - -def summarize(text): - splitted = text.split(" ") - if len(splitted) > 4: - return " ".join(splitted[0:4]) + ".." - return text - -def sectionToJSON(section): - return { - "prof" : section.prof, - "sem" : section.sem, - "day" : section.day - } - -def classToJSON(clss): - return { - "title" : clss.title, - "sections" : map(sectionToJSON, clss.sections), - "dept" : clss.dept, - "code" : clss.code, - "books" : list(clss.books) if clss.books else [] - } - - -def truncate(docid): - """ - Truncate a document id to 12 digits - The document ID should be based on a - hash of unique identifiers - """ - return int(str(docid)[0:12]) - -def hashsec(course): - """ - Hash a course into a usable id - """ - if not course["code"]: - code = "" - else: - code = course["code"] - if not course["title"]: - title = "" - else: - title = course["title"] - - if not course["sections"] or len(course["sections"]) < 1: - course["sections"][0] = "" - - if not (code or title): - raise UnIndexable(course) - - h = sha1() - h.update(code + title + course["sections"][0]["sem"]) - return int(h.hexdigest(), 16) - -def createIndex(name): - """ - This creates a new index in elasticsearch - An index is like a schema in a regular database - Create an elasticsearch index - - """ - indices = elasticsearch.client.IndicesClient(es) - - print indices.create(name) - with open("./course.json", "r") as mapping: - print indices.put_mapping("course", loads(mapping.read()), name) - -def indexListing(course): - """ - Index a specific course in the database (using the courses index) - example, - { - 'books': [], - 'dept': 'COLLAB', - 'code': '2C03', - 'sections': [ - { - 'prof': 'Lisa Pender', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Mo' - }, - { - 'prof': 'Staff', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Th' - } - ], - 'title': 'COLLAB 2C03 - Sociology I' - } - - """ - courseID = hashsec(course) - print es.index(index="oersearch", - doc_type="course", - id=courseID, - body=course) - - # For every course we index, we also create a resource for it - # This should be an idempotent operation because we're putting it in couchdb - # And we're using the id obtained from the hash function, so it should just update the document - # no need to delete anything - #try: - #courseDept = course[0]["title"].strip().split(" ")[0].strip() - #courseCode = course[0]["title"].strip().split(" ")[1].strip() - #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode) - #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID) - #except: - #print "Couldn't create the resource associated with %s" % course - -def termSearch(field): - """ - Make a term search (exact match) - """ - def t(term): - q = Q("term", - **{ - "sections."+field : term - }) - return q - return t - -def search(field): - """ - Make a match search - """ - def s(term): - q = Q("match", - **{ - field : term - }) - return q - return s - -def join(x, y): - """ - Join two queries - """ - return x & y - -def filterSections(secs): - """ - Get rid of tutorial sections - because they almost always have "Staff" as the instructor - This is just a heuristic of course - """ - filtered = [s for s in secs.sections if "Staff" not in s.prof] - if len(filtered) > 0: - return filtered - return False - -def searchTerms(terms): - """ - Run a search for courses - """ - - # A list of all the queries we want to run - qs = [searchers[field](term) for - field, term in - terms.iteritems() if - term and searchers.has_key(field)] - - if not qs: - # No queries = no results - return dumps([]) - - # Reduce joins all of the queries into one query - # It will search for the conjunction of all of them - # So that means it cares about each query equally - q = reduce(join, qs) - - s = (Search(using=es, index="oersearch") - .query(q))[0:100] # only return up to 100 results for now - - results = s.execute() - - filtered = [ - (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials - for secs in results - if filterSections(secs) - ] - results = [] - for obj, secs in filtered: - # Add the truncated course id - # This is used to point to the resource page for that course - secs["id"] = truncate(obj.meta.id) - secs["title"] = obj.title - if obj["dept"] not in secs["title"]: - secs["dept"] = obj.dept - if obj.books: - secs["books"] = [ - { - "booktitle" : summarize(book[0].encode("ASCII")), - "bookauthor" : book[1].encode("ASCII"), - "bookprice" : book[2].encode("ASCII") - } - for book in obj.books - ] - else: - secs["books"] = "" - results.append(secs) - - return dumps(results) - - -searchers = { - "title" : search("title"), - "loc" : search("loc"), - "time" : search("time"), - "prof" : search("prof"), - "day" : search("day"), - } - -#print searchTerms({"title" : "PHILOS"}) - -#for c in imap(classToJSON, allCourses()): - #try: - #print indexListing(c) - #except UnIndexable as e: diff --git a/textbookExceptions.py b/textbookExceptions.py deleted file mode 100644 index 999ff3e..0000000 --- a/textbookExceptions.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python2 - -class UnIndexable(Exception): - def __init__(self, course): - self.course = course - - @property - def reason(self): - course = self.course - if not course["code"] and not course["title"]: - message = "there was no course code and no title defined" - if not course["code"]: - message = "there was no course code defined" - if not course["title"]: - message = "there was no course title defined" - if not course["sections"]: - message = "there were no sections defined" - return """ - There was a problem with indexing this course. - %s - There could be several reasons why, my best guess is that %s - We need at least the course code, title, and one or more sections to index - - """ % (course, message) diff --git a/visualize.py b/visualize.py deleted file mode 100755 index b46a67d..0000000 --- a/visualize.py +++ /dev/null @@ -1,97 +0,0 @@ -#! /usr/bin/python2 - -from json import loads, load -from re import sub, split -from itertools import groupby -from numpy import mean -from operator import attrgetter - -import pygal -import csv - -class Textbook(object): - def __init__(self, dept, code, title, author, price): - self.dept = dept - self.code = code - self.title = title - self.author = author - self.price = float(price) - - def __repr__(self): - return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept, - self.code, - self.title, - self.author, - self.price) - - -def courses(): - with open("./books.csv", "r") as books: - booksreader = csv.reader(books) - for row in booksreader: - yield row - - -def groupDept(courselist): - sortedCourses = sorted(courselist, key=attrgetter("dept")) - for course in groupby(sortedCourses, attrgetter("dept")): - yield course[0], list(course[1]) - -def meanPrice(books): - return mean([book.price for book in books]) - -# Questions, -# mean cost per department -# mean cost per faculty -# mean difference between book store copies and other copies per dept and faculty -# number of overlapping books per faculty, do eng students benefit from that? - -# maybe a survey for students to see how often they buy books from other sources -# correlate with how much they could be saving? - -facultyDesc = { - "hum" : "Humanities", - "bus" : "Business", - "hlth" : "Health Science", - "eng" : "Engineering", - "sci" : "Science", - "socsci" : "Social Sciences", - "artsci" : "Arts & Sciences", - "meld" : "MELD" -} - -faculties = load(open("./faculties.json")) - -def categorize(dept): - # faculties - return facultyDesc.get(faculties.get(dept, False), False) - -def byFaculty(): - for dept, books in groupDept(courses()): - yield (categorize(dept), dept, books) - -def meanFacultyCosts(): - byfac = list(byFaculty()) - graph = pygal.Bar() - graph.title = "Mean textbook cost by faculty" - sortedFacs = sorted(byfac, key=lambda x: x[0]) - for fac in groupby(sortedFacs, lambda x: x[0]): - graph.add(fac[0], meanPrice(list(fac[1])[0][2])) - graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" - return graph.render(transpose=True) - -def meanCosts(): - cs = groupDept(courses()) - graph = pygal.Bar() - graph.title = "Mean textbook cost by department" - for c in cs: - dept, books = c - graph.add(dept, meanPrice(books)) - #graph.render_to_file("./test_graph.svg") - graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" - return graph.render_table(style=True, transpose=True) - -for x in courses(): - print x -#print meanCosts() -#print meanFacultyCosts() diff --git a/website.py b/website.py deleted file mode 100755 index 1fc9374..0000000 --- a/website.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/python2 -from functools import partial -from couchdb import ResourceConflict - -from flask import Flask, render_template, flash, request, send_from_directory -from flask_bootstrap import Bootstrap -from flask_appconfig import AppConfig -from urllib import unquote -from search import searchTerms - -from openlibrary import bookUrls - -from archive import searchIA -from urllib import quote, unquote -from json import dumps, loads - -from werkzeug.contrib.cache import MemcachedCache -cache = MemcachedCache(['127.0.0.1:11211']) - -import os - -def predict(fieldtype, term): - print fieldtype - print term - if not term: - return "[]" - else: - try: - cs = completers[fieldtype](term.lower()) - except KeyError: - return "[]" - if cs: - return cs - return "[]" - -def predictor(fieldtype): - def inner(request): - params = dict(request.args.items()) - return predict(fieldtype, params["term"]) - return inner - -def cacheit(key, thunk): - """ - Tries to find a cached version of ``key'' - If there is no cached version then it will - evaluate thunk (which must be a generator) - and cache that, then return the result - """ - cached = cache.get(quote(key)) - if cached is None: - result = list(thunk()) - cache.set(quote(key), result) - return result - return cached - -def ClassSearch(configfile=None): - defaults = {"Day", "Building", "Exact Location", "Department"} - app = Flask(__name__) - AppConfig(app, configfile) # Flask-Appconfig is not necessary, but - # highly recommend =) - # https://github.com/mbr/flask-appconfig - Bootstrap(app) - - app.config["scripts"] = "/home/wes/MGOAL/scripts" - app.config["styles"] = "/home/wes/MGOAL/styles" - - @app.route('/favicon.ico') - def favicon(): - return send_from_directory("/srv/http/goal/favicon.ico", - 'favicon.ico', mimetype='image/vnd.microsoft.icon') - - - @app.route("/buildpred", methods=("GET", "POST")) - def buildpred(): - return predictbuild(request) - - @app.route("/locpred", methods=("GET", "POST")) - def locpred(): - return predictloc(request) - - @app.route("/daypred", methods=("GET", "POST")) - def daypred(): - return predictday(request) - - @app.route("/deptpred", methods=("GET", "POST")) - def deptpred(): - return predictdept(request) - - @app.route("/titlepred", methods=("GET", "POST")) - def titlepred(): - return predicttitle(request) - - @app.route("/", methods=("GET", "POST")) - def index(): - return render_template("search.html") - - @app.route("/fc", methods=("GET", "POST")) - def fc(): - """ Filter Courses """ - print "trying to get courses" - params = dict(request.args.items()) - for key, val in params.iteritems(): - if val in defaults: - del params[key] - results = searchTerms(params) - return results - - @app.route("/resources", methods=("GET", "POST")) - def resources(): - """ Get Resources """ - notRequired = False - params = loads(dict(request.args.items())["data"]) - print params - author = params["author"] - title = params["title"] - - if ("No Textbooks" in title or - "No Adoption" in title): - return dumps("false") - - # Cache the result of the open library search - openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author)) - print openlib - - # cache the result of an internet archive search - iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author)) - print iarchive - - if not (any(openlib) or any(iarchive)): - # We literally could not find ANYTHING - return dumps("false") - - return dumps({ - "iarchive" : iarchive, - "openlib" : openlib - }) - - @app.route("/scripts/") - def send_script(filename): - return send_from_directory(app.config["scripts"], filename) - - @app.route("/styles/") - def send_style(filename): - return send_from_directory(app.config["styles"], filename) - return app - -if __name__ == "__main__": - ClassSearch().run(port=8001, debug=True)