25 changed files with 6 additions and 2061 deletions
@ -0,0 +1,5 @@ |
|||||
|
*.swp |
||||
|
*.pyc |
||||
|
*.swo |
||||
|
.vim-session |
||||
|
*~ |
Binary file not shown.
@ -0,0 +1 @@ |
|||||
|
,wes,ismeta,12.03.2016 20:10,file:///home/wes/.config/libreoffice/4; |
@ -1,34 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from urllib import quote |
|
||||
from json import loads, dumps |
|
||||
|
|
||||
import requests as req |
|
||||
|
|
||||
searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw" |
|
||||
|
|
||||
def searchIA(title, author): |
|
||||
""" |
|
||||
Do a search on The Internet Archive for a book |
|
||||
""" |
|
||||
print "running a search" |
|
||||
requrl = searchUrl.format(quote(title + " " + author)) |
|
||||
try: |
|
||||
results = loads(req.get(requrl).text[9:][0:-1]) |
|
||||
except ValueError: |
|
||||
return [] |
|
||||
|
|
||||
rownum = results["responseHeader"]["params"]["rows"] |
|
||||
if rownum < 1: |
|
||||
print "Couldn't find results for %s %s" % (title, author) |
|
||||
return [] |
|
||||
docs = results["response"]["docs"] |
|
||||
urls = [] |
|
||||
for result in results["response"]["docs"][0:3]: |
|
||||
urls.append("https://archive.org/details/%s" % result["identifier"]) |
|
||||
return urls |
|
||||
|
|
||||
|
|
||||
# Example, search for David Hume's Enquiry Concerning Human Understanding |
|
||||
#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"): |
|
||||
#print url |
|
@ -1,62 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from sys import argv |
|
||||
from hashlib import sha1 |
|
||||
|
|
||||
def truncate(docid): |
|
||||
""" |
|
||||
Truncate a document id to 12 digits |
|
||||
The document ID should be based on a |
|
||||
hash of unique identifiers |
|
||||
""" |
|
||||
return int(str(docid)[0:12]) |
|
||||
|
|
||||
def createResource(textbookInfo, course, dept, coursecode, docid): |
|
||||
""" |
|
||||
Create a document associated with a course |
|
||||
This document contains any/all resources associated |
|
||||
with that course |
|
||||
|
|
||||
example, |
|
||||
{ |
|
||||
'books': [], |
|
||||
'dept': 'COLLAB', |
|
||||
'code': '2C03', |
|
||||
'sections': [ |
|
||||
{ |
|
||||
'prof': 'Lisa Pender', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Mo' |
|
||||
}, |
|
||||
{ |
|
||||
'prof': 'Staff', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Th' |
|
||||
} |
|
||||
], |
|
||||
'title': 'COLLAB 2C03 - Sociology I' |
|
||||
} |
|
||||
""" |
|
||||
textbooks = textbookInfo(dept.strip(), coursecode.strip()) |
|
||||
|
|
||||
# We truncate the id so we can have nicer looking URLs |
|
||||
# Since the id will be used to point to the resource page for that course |
|
||||
_id = str(truncate(docid)) |
|
||||
|
|
||||
fields = { |
|
||||
"_id" : _id, |
|
||||
"textbooks" : textbooks, |
|
||||
"coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()), |
|
||||
"courseinfo" : course |
|
||||
#"Syllabus" : "blah" |
|
||||
} |
|
||||
try: |
|
||||
revisions = list(localdb.revisions(_id)) |
|
||||
if not revisions: |
|
||||
return localdb.save(fields) |
|
||||
else: |
|
||||
rev = dict(revisions[0])["_rev"] |
|
||||
fields["_rev"] = rev |
|
||||
return localdb.save(fields) |
|
||||
except ResourceConflict: |
|
||||
print "Resource for %s already exists, not creating a new one" % (docid) |
|
@ -1,14 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
# predictive data |
|
||||
# switch to elasticsearch's prediction |
|
||||
|
|
||||
|
|
||||
|
|
||||
import database |
|
||||
import predictions |
|
||||
|
|
||||
class GOASearch(object): |
|
||||
def __init__(self): |
|
||||
return self |
|
||||
|
|
@ -1,349 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from sys import argv |
|
||||
from itertools import chain, islice, izip as zip |
|
||||
from re import search, sub |
|
||||
from functools import total_ordering |
|
||||
|
|
||||
from sylla import textbookInfo |
|
||||
from collections import MutableMapping |
|
||||
|
|
||||
import datetime as dt |
|
||||
import lxml.html as lxh |
|
||||
import requests |
|
||||
import sys |
|
||||
import copy |
|
||||
|
|
||||
fall = "2159" |
|
||||
spring_summer = "2165" |
|
||||
winter = "2161" |
|
||||
|
|
||||
# threading stuff |
|
||||
import Queue as q |
|
||||
import threading as thd |
|
||||
|
|
||||
baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" |
|
||||
|
|
||||
searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" |
|
||||
|
|
||||
custom_headers = { |
|
||||
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0", |
|
||||
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", |
|
||||
} |
|
||||
|
|
||||
courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" |
|
||||
|
|
||||
courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}" |
|
||||
|
|
||||
payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" |
|
||||
|
|
||||
payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}" |
|
||||
|
|
||||
|
|
||||
year = dt.date.today().year |
|
||||
month = dt.date.today().month |
|
||||
|
|
||||
days = { |
|
||||
"Mo" : 0, |
|
||||
"Tu" : 1, |
|
||||
"We" : 2, |
|
||||
"Th" : 3, |
|
||||
"Fr" : 4, |
|
||||
"Sa" : 5, |
|
||||
"Su" : 6 |
|
||||
} |
|
||||
|
|
||||
day_descs = { |
|
||||
"Mo" : "Monday Mon Mo", |
|
||||
"Tu" : "Tuesday Tues Tu Tue", |
|
||||
"We" : "Wednesday Wed We", |
|
||||
"Th" : "Thursday Th Thurs", |
|
||||
"Fr" : "Friday Fr Fri", |
|
||||
"Sa" : "Saturday Sat Sa", |
|
||||
"Su" : "Sunday Su Sun", |
|
||||
"T" : "TBA" |
|
||||
} |
|
||||
|
|
||||
def timeparse(time): |
|
||||
""" |
|
||||
Parse the time into numbers |
|
||||
""" |
|
||||
if len(time) == 7: |
|
||||
hour = int(time[0:2]) |
|
||||
minutes = int(time[3:5]) |
|
||||
half = time[5:7] |
|
||||
else: |
|
||||
hour = int(time[0]) |
|
||||
minutes = int(time[2:4]) |
|
||||
half = time[4:6] |
|
||||
if half == "PM": |
|
||||
if hour < 12: |
|
||||
hour = hour + 12 |
|
||||
|
|
||||
return (str(hour), str(minutes), half) |
|
||||
|
|
||||
class Class(object): |
|
||||
def __init__(self, dept, title, sections): |
|
||||
self.title = title.encode("UTF-8") |
|
||||
self.sections = sections |
|
||||
self.dept = dept |
|
||||
|
|
||||
def __repr__(self): |
|
||||
return repr((self.title, self.sections)) |
|
||||
|
|
||||
def __iter__(self): |
|
||||
return iter((self.title, sec) for sec in self.sections) |
|
||||
|
|
||||
def hasCode(self): |
|
||||
splitted = self.title.strip().split(" ") |
|
||||
return ((len(splitted) >= 2) and |
|
||||
(splitted[0].upper() == splitted[0]) and |
|
||||
(splitted[1].upper() == splitted[1])) |
|
||||
|
|
||||
@property |
|
||||
def code(self): |
|
||||
if self.hasCode(): |
|
||||
return self.title.strip().split(" ")[1].strip() |
|
||||
return False |
|
||||
|
|
||||
@property |
|
||||
def books(self): |
|
||||
if self.dept and self.code: |
|
||||
return textbookInfo(self.dept, self.code, withPrices=True) |
|
||||
return False |
|
||||
|
|
||||
@total_ordering |
|
||||
class Section(dict): |
|
||||
def __init__(self, time, loc, prof, sem): |
|
||||
self.time = time.encode("UTF-8") |
|
||||
self.loc = loc.encode("UTF-8") |
|
||||
self.prof = prof.encode("UTF-8") |
|
||||
self.sem = sem.encode("UTF-8") |
|
||||
self._date = False |
|
||||
self._day = False |
|
||||
|
|
||||
@property |
|
||||
def date(self): |
|
||||
if self.time != "TBA": |
|
||||
day, start, _, end = self.time.split() |
|
||||
|
|
||||
if self._day: |
|
||||
assert len(self._day) == 2 |
|
||||
day = self._day |
|
||||
else: |
|
||||
day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)] |
|
||||
|
|
||||
self._date = (day, timeparse(start), timeparse(end)) |
|
||||
|
|
||||
return self._date |
|
||||
|
|
||||
return self.time |
|
||||
|
|
||||
@property |
|
||||
def day(self): |
|
||||
return self.date[0] |
|
||||
|
|
||||
@property |
|
||||
def start(self): |
|
||||
return self.date[1][0] + self.date[1][1] |
|
||||
|
|
||||
def __repr__(self): |
|
||||
return (""" |
|
||||
Time = %s, Location = %s, Instructor = %s, Semester Running = %s |
|
||||
""" % (self.date, self.loc, self.prof, self.sem)) |
|
||||
def __gt__(self, x): |
|
||||
if isinstance(self.day, list): |
|
||||
raise NotImplementedError |
|
||||
|
|
||||
if (self.date == "TBA" or |
|
||||
x.date == "TBA"): |
|
||||
return False |
|
||||
|
|
||||
return ((days[self.day] > days[x.day]) or |
|
||||
((self.day == x.day) and |
|
||||
(self.start > x.start))) |
|
||||
|
|
||||
def __eq__(self, x): |
|
||||
return (x.date == self.date and |
|
||||
x.prof == self.prof and |
|
||||
x.loc == self.loc and |
|
||||
x.sem == self.sem) |
|
||||
|
|
||||
|
|
||||
def getStateNum(html): |
|
||||
""" |
|
||||
Get the state num from Mosaic |
|
||||
This is unique to each requester |
|
||||
""" |
|
||||
parsed = lxh.fromstring(html) |
|
||||
return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value |
|
||||
|
|
||||
def parseSection(section): |
|
||||
cols = section.xpath(".//td") |
|
||||
assert len(cols) == 4 |
|
||||
time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols] |
|
||||
|
|
||||
classinfo = Section(time, loc, prof, sem) |
|
||||
return classinfo |
|
||||
|
|
||||
def getSectionInfo(table): |
|
||||
trs = table.xpath(".//tr") |
|
||||
for tr in trs: |
|
||||
if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): |
|
||||
yield parseSection(tr) |
|
||||
|
|
||||
def parseColumns(subject, html): |
|
||||
parsed = lxh.fromstring(html) |
|
||||
|
|
||||
classInfo = (list(getSectionInfo(table)) for table in |
|
||||
islice((table for table in parsed.xpath(".//table") |
|
||||
if table.xpath("@id") and |
|
||||
search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint)) |
|
||||
|
|
||||
classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span") |
|
||||
if span.xpath("@id") and |
|
||||
search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0])) |
|
||||
|
|
||||
return zip(classNames, classInfo) |
|
||||
|
|
||||
def getCodes(html): |
|
||||
parsed = lxh.fromstring(html) |
|
||||
|
|
||||
return (code.text_content().encode("UTF-8") for code in |
|
||||
parsed.xpath("//span") |
|
||||
if code.xpath("@id") and |
|
||||
search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0])) |
|
||||
|
|
||||
class MosReq(object): |
|
||||
def __init__(self, semester): |
|
||||
self.semester = semester |
|
||||
s = requests.Session() |
|
||||
resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content |
|
||||
|
|
||||
# Let the server set some cookies before doing the searching |
|
||||
cookies = {} |
|
||||
for key, val in s.cookies.iteritems(): |
|
||||
cookies[key] = val |
|
||||
self.cookies = cookies |
|
||||
self.statenum = False |
|
||||
self.codes_ = [] |
|
||||
|
|
||||
def getlist(self, subject): |
|
||||
sys.stderr.write("Getting " + subject + "\n") |
|
||||
first_req = requests.get(searchurl, cookies=self.cookies).content |
|
||||
# for some reason Mosaic wants us to request it twice, ?????????????????? |
|
||||
self.statenum = getStateNum(first_req) |
|
||||
first_req = requests.post(searchurl, |
|
||||
data=payload.format(self.statenum, subject, self.semester), |
|
||||
cookies=self.cookies, |
|
||||
allow_redirects=False, |
|
||||
headers=custom_headers).content |
|
||||
# we make a first request to get the ICStateNum in case it thinks there are too many results |
|
||||
try: |
|
||||
self.statenum = getStateNum(first_req) |
|
||||
except IndexError: |
|
||||
pass |
|
||||
if "Your search will return over" in first_req: |
|
||||
|
|
||||
return requests.post(searchurl, |
|
||||
data=payload2.format(self.statenum, self.semester), |
|
||||
cookies=self.cookies, |
|
||||
allow_redirects=False, |
|
||||
headers=custom_headers).content |
|
||||
else: |
|
||||
return first_req |
|
||||
|
|
||||
def classes(self, subject): |
|
||||
return list(parseColumns(subject, self.getlist(subject))) |
|
||||
|
|
||||
def getCodes(self, letter): |
|
||||
sys.stderr.write("Getting letter " + letter + "\n") |
|
||||
first_req = requests.get(searchurl, cookies=self.cookies).content |
|
||||
self.statenum = getStateNum(first_req) |
|
||||
|
|
||||
self.statenum = getStateNum(requests.post(searchurl, |
|
||||
data=courseCodes1.format(self.statenum, self.semester), |
|
||||
cookies=self.cookies, |
|
||||
headers=custom_headers).content) |
|
||||
|
|
||||
return getCodes(requests.post(searchurl, |
|
||||
data=courseCodes2.format(self.statenum, letter, self.semester), |
|
||||
cookies=self.cookies, |
|
||||
allow_redirects=False, |
|
||||
headers=custom_headers).content) |
|
||||
@property |
|
||||
def codes(self): |
|
||||
if not self.codes_: |
|
||||
self.codes_ = list(chain.from_iterable( |
|
||||
map((lambda l: |
|
||||
self.getCodes(chr(l))), |
|
||||
xrange(65, 91)))) |
|
||||
return self.codes_ |
|
||||
|
|
||||
def request(codes, lists, semester): |
|
||||
requester = MosReq(semester) |
|
||||
while not codes.empty(): |
|
||||
code = codes.get() |
|
||||
try: |
|
||||
lists.put(requester.classes(code)) |
|
||||
except: |
|
||||
codes.task_done() |
|
||||
return |
|
||||
codes.task_done() |
|
||||
|
|
||||
|
|
||||
class CourseInfo(object): |
|
||||
def __init__(self, threadcount, semester): |
|
||||
self._codes = False |
|
||||
self.threadcount = threadcount |
|
||||
self.semester = semester |
|
||||
|
|
||||
@property |
|
||||
def codes(self): |
|
||||
if not self._codes: |
|
||||
req = MosReq(self.semester) |
|
||||
self._codes = req.codes |
|
||||
return self._codes |
|
||||
|
|
||||
def classes(self): |
|
||||
qcodes = q.Queue() |
|
||||
for code in self.codes: |
|
||||
qcodes.put(code) |
|
||||
lists = q.Queue() |
|
||||
threads = [] |
|
||||
thread = None |
|
||||
for i in xrange(self.threadcount): |
|
||||
thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester)) |
|
||||
threads.append(thread) |
|
||||
thread.start() |
|
||||
qcodes.join() |
|
||||
for t in threads: |
|
||||
t.join() |
|
||||
|
|
||||
sections = [] |
|
||||
while not lists.empty(): |
|
||||
sections.append(lists.get()) |
|
||||
|
|
||||
for cl in chain.from_iterable(sections): |
|
||||
new_sections = [] |
|
||||
for sec in cl[1]: |
|
||||
if len(sec.day) > 1: |
|
||||
for day in sec.day: |
|
||||
new_sections.append(copy.deepcopy(sec)) |
|
||||
new_sections[-1]._day = day |
|
||||
else: |
|
||||
sec._day = sec.day[0] |
|
||||
new_sections.append(sec) |
|
||||
yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections)) |
|
||||
|
|
||||
def getCourses(semester, threadcount=10): |
|
||||
return CourseInfo(threadcount, semester).classes() |
|
||||
|
|
||||
def allCourses(): |
|
||||
return chain.from_iterable( |
|
||||
(getCourses(sem, threadcount=10) |
|
||||
for sem in (fall, winter, spring_summer))) |
|
||||
|
|
||||
#for course in allCourses(): |
|
||||
#sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books)) |
|
||||
#print course.sections |
|
@ -1,9 +0,0 @@ |
|||||
from oersearch import Search |
|
||||
from classes import getCourses |
|
||||
from sylla import getTextbooks |
|
||||
|
|
||||
mcmasterSearch = Search("McMaster") |
|
||||
|
|
||||
mcmasterSearch.setup(getCourses) |
|
||||
|
|
||||
mcmasterSearch.run() |
|
@ -1,117 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from sys import argv |
|
||||
from itertools import chain, islice, izip_longest, izip as zip |
|
||||
from re import search, sub |
|
||||
from functools import total_ordering |
|
||||
from re import sub |
|
||||
|
|
||||
import datetime as dt |
|
||||
import lxml.html as lxh |
|
||||
import requests |
|
||||
|
|
||||
# Purpose of this module is to download and parse syllabi from various departments |
|
||||
# In order to be corellated with individual courses |
|
||||
|
|
||||
class Price(object): |
|
||||
def __init__(self, amnt, status): |
|
||||
self.dollars = float(amnt[1:]) |
|
||||
self.status = status |
|
||||
|
|
||||
def __repr__(self): |
|
||||
return "$%s %s" % (repr(self.dollars), self.status) |
|
||||
|
|
||||
|
|
||||
class Book(object): |
|
||||
def __init__(self, title, price): |
|
||||
self.title = title |
|
||||
self.price = price |
|
||||
|
|
||||
def __repr__(self): |
|
||||
return '["%s", "%s"]' % (self.title, repr(self.price)) |
|
||||
|
|
||||
|
|
||||
def grouper(n, iterable, fillvalue=None): |
|
||||
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" |
|
||||
args = [iter(iterable)] * n |
|
||||
return izip_longest(fillvalue=fillvalue, *args) |
|
||||
|
|
||||
searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1" |
|
||||
|
|
||||
def normalize(word): |
|
||||
if len(word) > 1: |
|
||||
return ("%s%s" % |
|
||||
(word[0].upper(), |
|
||||
"".join(word[1:]).lower())) |
|
||||
return word |
|
||||
|
|
||||
def parseAuthor(author): |
|
||||
split = author.split(" ") |
|
||||
if len(split) <= 1: |
|
||||
return author |
|
||||
lastname = split[0] |
|
||||
firstname = split[1] |
|
||||
return "%s %s" % (firstname, lastname) |
|
||||
|
|
||||
def normwords(phrase): |
|
||||
words = phrase.split(" ") |
|
||||
return " ".join(map(normalize, words)) |
|
||||
|
|
||||
def books(dept, code, withPrices): |
|
||||
""" |
|
||||
Snatch me up a book title or three |
|
||||
""" |
|
||||
req = searchUrl % (dept, code) |
|
||||
|
|
||||
html = requests.get(req).text |
|
||||
|
|
||||
parsed = lxh.fromstring(html) |
|
||||
|
|
||||
pricelist = prices(parsed) |
|
||||
|
|
||||
for div in parsed.xpath(".//div"): |
|
||||
if (div.attrib.has_key("id") and |
|
||||
"prodDesc" in div.attrib["id"]): |
|
||||
|
|
||||
textbook = div.text_content() |
|
||||
author = sub(r',', '', |
|
||||
"".join( |
|
||||
(div.getparent() |
|
||||
.xpath(".//span[@class='inline']") |
|
||||
[0].text_content() |
|
||||
.split(":")[1:])).strip()) |
|
||||
price = pricelist.pop() |
|
||||
if withPrices: |
|
||||
yield (normwords(textbook), normwords(author), repr(price)) |
|
||||
else: |
|
||||
yield (normwords(textbook), normwords(author)) |
|
||||
|
|
||||
def prices(html): |
|
||||
""" |
|
||||
Get the prices from a search result page |
|
||||
""" |
|
||||
ps = [ |
|
||||
p.getparent().text_content().split()[0] |
|
||||
for p in html.xpath("//p/input[@type='checkbox']") |
|
||||
] |
|
||||
|
|
||||
try: |
|
||||
amts, stats = zip(*list(reversed(list(grouper(2, ps))))) |
|
||||
return map(Price, amts, stats) |
|
||||
except ValueError: |
|
||||
return [] |
|
||||
|
|
||||
def textbookInfo(dept, code, withPrices=False): |
|
||||
""" |
|
||||
Return all the textbooks for a course |
|
||||
""" |
|
||||
return list(books(dept, code, withPrices)) |
|
||||
|
|
||||
def humanities(): |
|
||||
""" |
|
||||
Download humanities syllabi |
|
||||
""" |
|
||||
return [] |
|
||||
|
|
||||
# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code) |
|
||||
# print list(courseInfo("PSYCH", "2B03")) |
|
@ -1,24 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from urllib import quote |
|
||||
from json import loads, dumps |
|
||||
|
|
||||
import requests as req |
|
||||
|
|
||||
#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s" |
|
||||
searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s' |
|
||||
|
|
||||
def bookUrls(title, author): |
|
||||
print title, author |
|
||||
if ":" in title: |
|
||||
title = title.split(":")[0] |
|
||||
requrl = searchurl % (quote(author), quote(title)) |
|
||||
results = loads(req.get(requrl).text) |
|
||||
for result in results["docs"][0:2]: |
|
||||
if result.has_key("edition_key"): |
|
||||
yield "https://openlibrary.org/books/%s" % result["edition_key"][0] |
|
||||
|
|
||||
# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle' |
|
||||
|
|
||||
#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"): |
|
||||
#print book |
|
@ -1,153 +0,0 @@ |
|||||
##! /usr/bin/python2 |
|
||||
from itertools import groupby, chain |
|
||||
from sys import stdout |
|
||||
from functools import partial |
|
||||
from json import dumps |
|
||||
|
|
||||
def gensymer(): |
|
||||
n = [0] |
|
||||
def inner(): |
|
||||
result = str(n[0]) |
|
||||
n[0] += 1 |
|
||||
return result |
|
||||
return inner |
|
||||
|
|
||||
gensym = gensymer() |
|
||||
|
|
||||
def printTrie(graph, prev, trie, weight): |
|
||||
new_node = str(gensym()) |
|
||||
graph.node(new_node, "%s" % trie.letter) |
|
||||
graph.edge(prev, new_node, label="%.2f" % weight) |
|
||||
if not trie.children: |
|
||||
return |
|
||||
for child, weight in zip(trie.children, trie.ws): |
|
||||
printTrie(graph, new_node, child, weight) |
|
||||
|
|
||||
|
|
||||
class Trie(object): |
|
||||
def __init__(self, letter, children, ws): |
|
||||
self.letter = letter |
|
||||
self.children = children |
|
||||
self.ws = ws |
|
||||
|
|
||||
def probweight(suffixes): |
|
||||
weights = [float(s["value"]) for s in suffixes] |
|
||||
s = float(sum(weights)) |
|
||||
ws = [w/s for w in weights] |
|
||||
return ws |
|
||||
|
|
||||
def buildtrie(trie, suffixes): |
|
||||
""" |
|
||||
Build a trie, also known as a prefix tree, of all the possible completions |
|
||||
""" |
|
||||
trie.children = [] |
|
||||
for letter, suffs in suffixes: |
|
||||
ped = partition(suffs) |
|
||||
if any(map(lambda p: p[0], ped)): |
|
||||
# check if there are any children |
|
||||
trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs))) |
|
||||
else: |
|
||||
# we've reached the end of this word so just include the final letter |
|
||||
# [1] = there is a probability of 1 of reaching this single leaf node, |
|
||||
# since it is the only possible completion here |
|
||||
trie.children.append(Trie(letter, [], [1])) |
|
||||
return trie |
|
||||
|
|
||||
|
|
||||
def keyf(x): |
|
||||
if not x["key"]: |
|
||||
return "" |
|
||||
return x["key"][0] |
|
||||
|
|
||||
def tails(words): |
|
||||
for word in words: |
|
||||
yield { |
|
||||
"key" : word["key"][1:], |
|
||||
"value" : word["value"] |
|
||||
} |
|
||||
|
|
||||
def partition(words): |
|
||||
""" |
|
||||
Partition the words into different prefixes based on the first character |
|
||||
""" |
|
||||
groups = [ |
|
||||
(g[0], list(tails(g[1]))) |
|
||||
for g in groupby( |
|
||||
sorted(words, key=keyf), |
|
||||
key=keyf) |
|
||||
] |
|
||||
return groups |
|
||||
|
|
||||
|
|
||||
def flatten_helper(letter, trie): |
|
||||
return ([letter + child.letter for |
|
||||
child in trie.children], trie.children) |
|
||||
|
|
||||
def flatten(trie): |
|
||||
if not trie.children: |
|
||||
return trie.letter |
|
||||
prefixes, suffixes = flatten_helper(trie.letter, trie) |
|
||||
return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)] |
|
||||
|
|
||||
def flattenlist(xs): |
|
||||
locs = [] |
|
||||
for x in xs: |
|
||||
if not isinstance(x, list): |
|
||||
locs.append(x) |
|
||||
else: |
|
||||
locs.extend(flattenlist(x)) |
|
||||
return locs |
|
||||
|
|
||||
def matchc(trie, prefix): |
|
||||
c = None |
|
||||
if len(prefix) > 1: |
|
||||
c = prefix[0] |
|
||||
else: |
|
||||
c = prefix |
|
||||
return [ch for ch in trie.children if ch.letter == c] |
|
||||
|
|
||||
def match(trie, word): |
|
||||
if not word: |
|
||||
return [] |
|
||||
m = matchc(trie, word[0]) |
|
||||
if not m: |
|
||||
return [] |
|
||||
else: |
|
||||
return [m[0]] + match(m[0], word[1:]) |
|
||||
|
|
||||
def complete(trie, word): |
|
||||
m = match(trie, word) |
|
||||
if len(word) != len(m): |
|
||||
return False |
|
||||
completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))] |
|
||||
if len(completions) > 10: |
|
||||
return dumps(completions[0:10]) |
|
||||
return dumps(completions) |
|
||||
|
|
||||
def sortTrie(trie): |
|
||||
""" |
|
||||
Sort the children of each node in descending order |
|
||||
of the probability that each child would be the completion |
|
||||
of whatever that word is |
|
||||
""" |
|
||||
if not trie.children: |
|
||||
return |
|
||||
sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True) |
|
||||
trie.children = [x[0] for x in sortedChilds] |
|
||||
trie.ws = [x[1] for x in sortedChilds] |
|
||||
for child in trie.children: |
|
||||
sortTrie(child) |
|
||||
|
|
||||
def toTrie(words): |
|
||||
for word in words: |
|
||||
word["key"] = word["key"].lower() |
|
||||
trie = buildtrie(Trie("", [], [1]), partition(words)) |
|
||||
trie.ws = [1]*len(trie.children) |
|
||||
sortTrie(trie) |
|
||||
return trie |
|
||||
|
|
||||
def testkey(w): |
|
||||
return { |
|
||||
"key" : w, |
|
||||
"value" : "1" |
|
||||
} |
|
@ -1,237 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
import elasticsearch |
|
||||
|
|
||||
from elasticsearch_dsl import FacetedSearch, Search, Q |
|
||||
from elasticsearch_dsl.aggs import Terms, DateHistogram |
|
||||
from sys import exit, stderr |
|
||||
from json import dumps, loads |
|
||||
from itertools import chain, imap |
|
||||
|
|
||||
from hashlib import sha1 |
|
||||
|
|
||||
from textbookExceptions import UnIndexable |
|
||||
|
|
||||
from mcmaster.classes import allCourses |
|
||||
|
|
||||
# Generic instance of elasticsearch right now |
|
||||
es = elasticsearch.Elasticsearch() |
|
||||
|
|
||||
def summarize(text): |
|
||||
splitted = text.split(" ") |
|
||||
if len(splitted) > 4: |
|
||||
return " ".join(splitted[0:4]) + ".." |
|
||||
return text |
|
||||
|
|
||||
def sectionToJSON(section): |
|
||||
return { |
|
||||
"prof" : section.prof, |
|
||||
"sem" : section.sem, |
|
||||
"day" : section.day |
|
||||
} |
|
||||
|
|
||||
def classToJSON(clss): |
|
||||
return { |
|
||||
"title" : clss.title, |
|
||||
"sections" : map(sectionToJSON, clss.sections), |
|
||||
"dept" : clss.dept, |
|
||||
"code" : clss.code, |
|
||||
"books" : list(clss.books) if clss.books else [] |
|
||||
} |
|
||||
|
|
||||
|
|
||||
def truncate(docid): |
|
||||
""" |
|
||||
Truncate a document id to 12 digits |
|
||||
The document ID should be based on a |
|
||||
hash of unique identifiers |
|
||||
""" |
|
||||
return int(str(docid)[0:12]) |
|
||||
|
|
||||
def hashsec(course): |
|
||||
""" |
|
||||
Hash a course into a usable id |
|
||||
""" |
|
||||
if not course["code"]: |
|
||||
code = "" |
|
||||
else: |
|
||||
code = course["code"] |
|
||||
if not course["title"]: |
|
||||
title = "" |
|
||||
else: |
|
||||
title = course["title"] |
|
||||
|
|
||||
if not course["sections"] or len(course["sections"]) < 1: |
|
||||
course["sections"][0] = "" |
|
||||
|
|
||||
if not (code or title): |
|
||||
raise UnIndexable(course) |
|
||||
|
|
||||
h = sha1() |
|
||||
h.update(code + title + course["sections"][0]["sem"]) |
|
||||
return int(h.hexdigest(), 16) |
|
||||
|
|
||||
def createIndex(name): |
|
||||
""" |
|
||||
This creates a new index in elasticsearch |
|
||||
An index is like a schema in a regular database |
|
||||
Create an elasticsearch index |
|
||||
|
|
||||
""" |
|
||||
indices = elasticsearch.client.IndicesClient(es) |
|
||||
|
|
||||
print indices.create(name) |
|
||||
with open("./course.json", "r") as mapping: |
|
||||
print indices.put_mapping("course", loads(mapping.read()), name) |
|
||||
|
|
||||
def indexListing(course): |
|
||||
""" |
|
||||
Index a specific course in the database (using the courses index) |
|
||||
example, |
|
||||
{ |
|
||||
'books': [], |
|
||||
'dept': 'COLLAB', |
|
||||
'code': '2C03', |
|
||||
'sections': [ |
|
||||
{ |
|
||||
'prof': 'Lisa Pender', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Mo' |
|
||||
}, |
|
||||
{ |
|
||||
'prof': 'Staff', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Th' |
|
||||
} |
|
||||
], |
|
||||
'title': 'COLLAB 2C03 - Sociology I' |
|
||||
} |
|
||||
|
|
||||
""" |
|
||||
courseID = hashsec(course) |
|
||||
print es.index(index="oersearch", |
|
||||
doc_type="course", |
|
||||
id=courseID, |
|
||||
body=course) |
|
||||
|
|
||||
# For every course we index, we also create a resource for it |
|
||||
# This should be an idempotent operation because we're putting it in couchdb |
|
||||
# And we're using the id obtained from the hash function, so it should just update the document |
|
||||
# no need to delete anything |
|
||||
#try: |
|
||||
#courseDept = course[0]["title"].strip().split(" ")[0].strip() |
|
||||
#courseCode = course[0]["title"].strip().split(" ")[1].strip() |
|
||||
#print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode) |
|
||||
#print createResource(textbookInfo, course[0], courseDept, courseCode, courseID) |
|
||||
#except: |
|
||||
#print "Couldn't create the resource associated with %s" % course |
|
||||
|
|
||||
def termSearch(field): |
|
||||
""" |
|
||||
Make a term search (exact match) |
|
||||
""" |
|
||||
def t(term): |
|
||||
q = Q("term", |
|
||||
**{ |
|
||||
"sections."+field : term |
|
||||
}) |
|
||||
return q |
|
||||
return t |
|
||||
|
|
||||
def search(field): |
|
||||
""" |
|
||||
Make a match search |
|
||||
""" |
|
||||
def s(term): |
|
||||
q = Q("match", |
|
||||
**{ |
|
||||
field : term |
|
||||
}) |
|
||||
return q |
|
||||
return s |
|
||||
|
|
||||
def join(x, y): |
|
||||
""" |
|
||||
Join two queries |
|
||||
""" |
|
||||
return x & y |
|
||||
|
|
||||
def filterSections(secs): |
|
||||
""" |
|
||||
Get rid of tutorial sections |
|
||||
because they almost always have "Staff" as the instructor |
|
||||
This is just a heuristic of course |
|
||||
""" |
|
||||
filtered = [s for s in secs.sections if "Staff" not in s.prof] |
|
||||
if len(filtered) > 0: |
|
||||
return filtered |
|
||||
return False |
|
||||
|
|
||||
def searchTerms(terms): |
|
||||
""" |
|
||||
Run a search for courses |
|
||||
""" |
|
||||
|
|
||||
# A list of all the queries we want to run |
|
||||
qs = [searchers[field](term) for |
|
||||
field, term in |
|
||||
terms.iteritems() if |
|
||||
term and searchers.has_key(field)] |
|
||||
|
|
||||
if not qs: |
|
||||
# No queries = no results |
|
||||
return dumps([]) |
|
||||
|
|
||||
# Reduce joins all of the queries into one query |
|
||||
# It will search for the conjunction of all of them |
|
||||
# So that means it cares about each query equally |
|
||||
q = reduce(join, qs) |
|
||||
|
|
||||
s = (Search(using=es, index="oersearch") |
|
||||
.query(q))[0:100] # only return up to 100 results for now |
|
||||
|
|
||||
results = s.execute() |
|
||||
|
|
||||
filtered = [ |
|
||||
(secs, filterSections(secs)[0].to_dict()) # get rid of tutorials |
|
||||
for secs in results |
|
||||
if filterSections(secs) |
|
||||
] |
|
||||
results = [] |
|
||||
for obj, secs in filtered: |
|
||||
# Add the truncated course id |
|
||||
# This is used to point to the resource page for that course |
|
||||
secs["id"] = truncate(obj.meta.id) |
|
||||
secs["title"] = obj.title |
|
||||
if obj["dept"] not in secs["title"]: |
|
||||
secs["dept"] = obj.dept |
|
||||
if obj.books: |
|
||||
secs["books"] = [ |
|
||||
{ |
|
||||
"booktitle" : summarize(book[0].encode("ASCII")), |
|
||||
"bookauthor" : book[1].encode("ASCII"), |
|
||||
"bookprice" : book[2].encode("ASCII") |
|
||||
} |
|
||||
for book in obj.books |
|
||||
] |
|
||||
else: |
|
||||
secs["books"] = "" |
|
||||
results.append(secs) |
|
||||
|
|
||||
return dumps(results) |
|
||||
|
|
||||
|
|
||||
searchers = { |
|
||||
"title" : search("title"), |
|
||||
"loc" : search("loc"), |
|
||||
"time" : search("time"), |
|
||||
"prof" : search("prof"), |
|
||||
"day" : search("day"), |
|
||||
} |
|
||||
|
|
||||
#print searchTerms({"title" : "PHILOS"}) |
|
||||
|
|
||||
#for c in imap(classToJSON, allCourses()): |
|
||||
#try: |
|
||||
#print indexListing(c) |
|
||||
#except UnIndexable as e: |
|
@ -1,34 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from urllib import quote |
|
||||
from json import loads, dumps |
|
||||
|
|
||||
import requests as req |
|
||||
|
|
||||
searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw" |
|
||||
|
|
||||
def searchIA(title, author): |
|
||||
""" |
|
||||
Do a search on The Internet Archive for a book |
|
||||
""" |
|
||||
print "running a search" |
|
||||
requrl = searchUrl.format(quote(title + " " + author)) |
|
||||
try: |
|
||||
results = loads(req.get(requrl).text[9:][0:-1]) |
|
||||
except ValueError: |
|
||||
return [] |
|
||||
|
|
||||
rownum = results["responseHeader"]["params"]["rows"] |
|
||||
if rownum < 1: |
|
||||
print "Couldn't find results for %s %s" % (title, author) |
|
||||
return [] |
|
||||
docs = results["response"]["docs"] |
|
||||
urls = [] |
|
||||
for result in results["response"]["docs"][0:3]: |
|
||||
urls.append("https://archive.org/details/%s" % result["identifier"]) |
|
||||
return urls |
|
||||
|
|
||||
|
|
||||
# Example, search for David Hume's Enquiry Concerning Human Understanding |
|
||||
#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"): |
|
||||
#print url |
|
@ -1,62 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from sys import argv |
|
||||
from hashlib import sha1 |
|
||||
|
|
||||
def truncate(docid): |
|
||||
""" |
|
||||
Truncate a document id to 12 digits |
|
||||
The document ID should be based on a |
|
||||
hash of unique identifiers |
|
||||
""" |
|
||||
return int(str(docid)[0:12]) |
|
||||
|
|
||||
def createResource(textbookInfo, course, dept, coursecode, docid): |
|
||||
""" |
|
||||
Create a document associated with a course |
|
||||
This document contains any/all resources associated |
|
||||
with that course |
|
||||
|
|
||||
example, |
|
||||
{ |
|
||||
'books': [], |
|
||||
'dept': 'COLLAB', |
|
||||
'code': '2C03', |
|
||||
'sections': [ |
|
||||
{ |
|
||||
'prof': 'Lisa Pender', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Mo' |
|
||||
}, |
|
||||
{ |
|
||||
'prof': 'Staff', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Th' |
|
||||
} |
|
||||
], |
|
||||
'title': 'COLLAB 2C03 - Sociology I' |
|
||||
} |
|
||||
""" |
|
||||
textbooks = textbookInfo(dept.strip(), coursecode.strip()) |
|
||||
|
|
||||
# We truncate the id so we can have nicer looking URLs |
|
||||
# Since the id will be used to point to the resource page for that course |
|
||||
_id = str(truncate(docid)) |
|
||||
|
|
||||
fields = { |
|
||||
"_id" : _id, |
|
||||
"textbooks" : textbooks, |
|
||||
"coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()), |
|
||||
"courseinfo" : course |
|
||||
#"Syllabus" : "blah" |
|
||||
} |
|
||||
try: |
|
||||
revisions = list(localdb.revisions(_id)) |
|
||||
if not revisions: |
|
||||
return localdb.save(fields) |
|
||||
else: |
|
||||
rev = dict(revisions[0])["_rev"] |
|
||||
fields["_rev"] = rev |
|
||||
return localdb.save(fields) |
|
||||
except ResourceConflict: |
|
||||
print "Resource for %s already exists, not creating a new one" % (docid) |
|
@ -1,14 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
# predictive data |
|
||||
# switch to elasticsearch's prediction |
|
||||
|
|
||||
|
|
||||
|
|
||||
import database |
|
||||
import predictions |
|
||||
|
|
||||
class GOASearch(object): |
|
||||
def __init__(self): |
|
||||
return self |
|
||||
|
|
@ -1,24 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from urllib import quote |
|
||||
from json import loads, dumps |
|
||||
|
|
||||
import requests as req |
|
||||
|
|
||||
#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s" |
|
||||
searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s' |
|
||||
|
|
||||
def bookUrls(title, author): |
|
||||
print title, author |
|
||||
if ":" in title: |
|
||||
title = title.split(":")[0] |
|
||||
requrl = searchurl % (quote(author), quote(title)) |
|
||||
results = loads(req.get(requrl).text) |
|
||||
for result in results["docs"][0:2]: |
|
||||
if result.has_key("edition_key"): |
|
||||
yield "https://openlibrary.org/books/%s" % result["edition_key"][0] |
|
||||
|
|
||||
# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle' |
|
||||
|
|
||||
#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"): |
|
||||
#print book |
|
@ -1,153 +0,0 @@ |
|||||
##! /usr/bin/python2 |
|
||||
from itertools import groupby, chain |
|
||||
from sys import stdout |
|
||||
from functools import partial |
|
||||
from json import dumps |
|
||||
|
|
||||
def gensymer(): |
|
||||
n = [0] |
|
||||
def inner(): |
|
||||
result = str(n[0]) |
|
||||
n[0] += 1 |
|
||||
return result |
|
||||
return inner |
|
||||
|
|
||||
gensym = gensymer() |
|
||||
|
|
||||
def printTrie(graph, prev, trie, weight): |
|
||||
new_node = str(gensym()) |
|
||||
graph.node(new_node, "%s" % trie.letter) |
|
||||
graph.edge(prev, new_node, label="%.2f" % weight) |
|
||||
if not trie.children: |
|
||||
return |
|
||||
for child, weight in zip(trie.children, trie.ws): |
|
||||
printTrie(graph, new_node, child, weight) |
|
||||
|
|
||||
|
|
||||
class Trie(object): |
|
||||
def __init__(self, letter, children, ws): |
|
||||
self.letter = letter |
|
||||
self.children = children |
|
||||
self.ws = ws |
|
||||
|
|
||||
def probweight(suffixes): |
|
||||
weights = [float(s["value"]) for s in suffixes] |
|
||||
s = float(sum(weights)) |
|
||||
ws = [w/s for w in weights] |
|
||||
return ws |
|
||||
|
|
||||
def buildtrie(trie, suffixes): |
|
||||
""" |
|
||||
Build a trie, also known as a prefix tree, of all the possible completions |
|
||||
""" |
|
||||
trie.children = [] |
|
||||
for letter, suffs in suffixes: |
|
||||
ped = partition(suffs) |
|
||||
if any(map(lambda p: p[0], ped)): |
|
||||
# check if there are any children |
|
||||
trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs))) |
|
||||
else: |
|
||||
# we've reached the end of this word so just include the final letter |
|
||||
# [1] = there is a probability of 1 of reaching this single leaf node, |
|
||||
# since it is the only possible completion here |
|
||||
trie.children.append(Trie(letter, [], [1])) |
|
||||
return trie |
|
||||
|
|
||||
|
|
||||
def keyf(x): |
|
||||
if not x["key"]: |
|
||||
return "" |
|
||||
return x["key"][0] |
|
||||
|
|
||||
def tails(words): |
|
||||
for word in words: |
|
||||
yield { |
|
||||
"key" : word["key"][1:], |
|
||||
"value" : word["value"] |
|
||||
} |
|
||||
|
|
||||
def partition(words): |
|
||||
""" |
|
||||
Partition the words into different prefixes based on the first character |
|
||||
""" |
|
||||
groups = [ |
|
||||
(g[0], list(tails(g[1]))) |
|
||||
for g in groupby( |
|
||||
sorted(words, key=keyf), |
|
||||
key=keyf) |
|
||||
] |
|
||||
return groups |
|
||||
|
|
||||
|
|
||||
def flatten_helper(letter, trie): |
|
||||
return ([letter + child.letter for |
|
||||
child in trie.children], trie.children) |
|
||||
|
|
||||
def flatten(trie): |
|
||||
if not trie.children: |
|
||||
return trie.letter |
|
||||
prefixes, suffixes = flatten_helper(trie.letter, trie) |
|
||||
return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)] |
|
||||
|
|
||||
def flattenlist(xs): |
|
||||
locs = [] |
|
||||
for x in xs: |
|
||||
if not isinstance(x, list): |
|
||||
locs.append(x) |
|
||||
else: |
|
||||
locs.extend(flattenlist(x)) |
|
||||
return locs |
|
||||
|
|
||||
def matchc(trie, prefix): |
|
||||
c = None |
|
||||
if len(prefix) > 1: |
|
||||
c = prefix[0] |
|
||||
else: |
|
||||
c = prefix |
|
||||
return [ch for ch in trie.children if ch.letter == c] |
|
||||
|
|
||||
def match(trie, word): |
|
||||
if not word: |
|
||||
return [] |
|
||||
m = matchc(trie, word[0]) |
|
||||
if not m: |
|
||||
return [] |
|
||||
else: |
|
||||
return [m[0]] + match(m[0], word[1:]) |
|
||||
|
|
||||
def complete(trie, word): |
|
||||
m = match(trie, word) |
|
||||
if len(word) != len(m): |
|
||||
return False |
|
||||
completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))] |
|
||||
if len(completions) > 10: |
|
||||
return dumps(completions[0:10]) |
|
||||
return dumps(completions) |
|
||||
|
|
||||
def sortTrie(trie): |
|
||||
""" |
|
||||
Sort the children of each node in descending order |
|
||||
of the probability that each child would be the completion |
|
||||
of whatever that word is |
|
||||
""" |
|
||||
if not trie.children: |
|
||||
return |
|
||||
sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True) |
|
||||
trie.children = [x[0] for x in sortedChilds] |
|
||||
trie.ws = [x[1] for x in sortedChilds] |
|
||||
for child in trie.children: |
|
||||
sortTrie(child) |
|
||||
|
|
||||
def toTrie(words): |
|
||||
for word in words: |
|
||||
word["key"] = word["key"].lower() |
|
||||
trie = buildtrie(Trie("", [], [1]), partition(words)) |
|
||||
trie.ws = [1]*len(trie.children) |
|
||||
sortTrie(trie) |
|
||||
return trie |
|
||||
|
|
||||
def testkey(w): |
|
||||
return { |
|
||||
"key" : w, |
|
||||
"value" : "1" |
|
||||
} |
|
@ -1,237 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
import elasticsearch |
|
||||
|
|
||||
from elasticsearch_dsl import FacetedSearch, Search, Q |
|
||||
from elasticsearch_dsl.aggs import Terms, DateHistogram |
|
||||
from sys import exit, stderr |
|
||||
from json import dumps, loads |
|
||||
from itertools import chain, imap |
|
||||
|
|
||||
from hashlib import sha1 |
|
||||
|
|
||||
from textbookExceptions import UnIndexable |
|
||||
|
|
||||
from mcmaster.classes import allCourses |
|
||||
|
|
||||
# Generic instance of elasticsearch right now |
|
||||
es = elasticsearch.Elasticsearch() |
|
||||
|
|
||||
def summarize(text): |
|
||||
splitted = text.split(" ") |
|
||||
if len(splitted) > 4: |
|
||||
return " ".join(splitted[0:4]) + ".." |
|
||||
return text |
|
||||
|
|
||||
def sectionToJSON(section): |
|
||||
return { |
|
||||
"prof" : section.prof, |
|
||||
"sem" : section.sem, |
|
||||
"day" : section.day |
|
||||
} |
|
||||
|
|
||||
def classToJSON(clss): |
|
||||
return { |
|
||||
"title" : clss.title, |
|
||||
"sections" : map(sectionToJSON, clss.sections), |
|
||||
"dept" : clss.dept, |
|
||||
"code" : clss.code, |
|
||||
"books" : list(clss.books) if clss.books else [] |
|
||||
} |
|
||||
|
|
||||
|
|
||||
def truncate(docid): |
|
||||
""" |
|
||||
Truncate a document id to 12 digits |
|
||||
The document ID should be based on a |
|
||||
hash of unique identifiers |
|
||||
""" |
|
||||
return int(str(docid)[0:12]) |
|
||||
|
|
||||
def hashsec(course): |
|
||||
""" |
|
||||
Hash a course into a usable id |
|
||||
""" |
|
||||
if not course["code"]: |
|
||||
code = "" |
|
||||
else: |
|
||||
code = course["code"] |
|
||||
if not course["title"]: |
|
||||
title = "" |
|
||||
else: |
|
||||
title = course["title"] |
|
||||
|
|
||||
if not course["sections"] or len(course["sections"]) < 1: |
|
||||
course["sections"][0] = "" |
|
||||
|
|
||||
if not (code or title): |
|
||||
raise UnIndexable(course) |
|
||||
|
|
||||
h = sha1() |
|
||||
h.update(code + title + course["sections"][0]["sem"]) |
|
||||
return int(h.hexdigest(), 16) |
|
||||
|
|
||||
def createIndex(name): |
|
||||
""" |
|
||||
This creates a new index in elasticsearch |
|
||||
An index is like a schema in a regular database |
|
||||
Create an elasticsearch index |
|
||||
|
|
||||
""" |
|
||||
indices = elasticsearch.client.IndicesClient(es) |
|
||||
|
|
||||
print indices.create(name) |
|
||||
with open("./course.json", "r") as mapping: |
|
||||
print indices.put_mapping("course", loads(mapping.read()), name) |
|
||||
|
|
||||
def indexListing(course): |
|
||||
""" |
|
||||
Index a specific course in the database (using the courses index) |
|
||||
example, |
|
||||
{ |
|
||||
'books': [], |
|
||||
'dept': 'COLLAB', |
|
||||
'code': '2C03', |
|
||||
'sections': [ |
|
||||
{ |
|
||||
'prof': 'Lisa Pender', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Mo' |
|
||||
}, |
|
||||
{ |
|
||||
'prof': 'Staff', |
|
||||
'sem': '2015/09/08 - 2015/12/08', |
|
||||
'day': 'Th' |
|
||||
} |
|
||||
], |
|
||||
'title': 'COLLAB 2C03 - Sociology I' |
|
||||
} |
|
||||
|
|
||||
""" |
|
||||
courseID = hashsec(course) |
|
||||
print es.index(index="oersearch", |
|
||||
doc_type="course", |
|
||||
id=courseID, |
|
||||
body=course) |
|
||||
|
|
||||
# For every course we index, we also create a resource for it |
|
||||
# This should be an idempotent operation because we're putting it in couchdb |
|
||||
# And we're using the id obtained from the hash function, so it should just update the document |
|
||||
# no need to delete anything |
|
||||
#try: |
|
||||
#courseDept = course[0]["title"].strip().split(" ")[0].strip() |
|
||||
#courseCode = course[0]["title"].strip().split(" ")[1].strip() |
|
||||
#print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode) |
|
||||
#print createResource(textbookInfo, course[0], courseDept, courseCode, courseID) |
|
||||
#except: |
|
||||
#print "Couldn't create the resource associated with %s" % course |
|
||||
|
|
||||
def termSearch(field): |
|
||||
""" |
|
||||
Make a term search (exact match) |
|
||||
""" |
|
||||
def t(term): |
|
||||
q = Q("term", |
|
||||
**{ |
|
||||
"sections."+field : term |
|
||||
}) |
|
||||
return q |
|
||||
return t |
|
||||
|
|
||||
def search(field): |
|
||||
""" |
|
||||
Make a match search |
|
||||
""" |
|
||||
def s(term): |
|
||||
q = Q("match", |
|
||||
**{ |
|
||||
field : term |
|
||||
}) |
|
||||
return q |
|
||||
return s |
|
||||
|
|
||||
def join(x, y): |
|
||||
""" |
|
||||
Join two queries |
|
||||
""" |
|
||||
return x & y |
|
||||
|
|
||||
def filterSections(secs): |
|
||||
""" |
|
||||
Get rid of tutorial sections |
|
||||
because they almost always have "Staff" as the instructor |
|
||||
This is just a heuristic of course |
|
||||
""" |
|
||||
filtered = [s for s in secs.sections if "Staff" not in s.prof] |
|
||||
if len(filtered) > 0: |
|
||||
return filtered |
|
||||
return False |
|
||||
|
|
||||
def searchTerms(terms): |
|
||||
""" |
|
||||
Run a search for courses |
|
||||
""" |
|
||||
|
|
||||
# A list of all the queries we want to run |
|
||||
qs = [searchers[field](term) for |
|
||||
field, term in |
|
||||
terms.iteritems() if |
|
||||
term and searchers.has_key(field)] |
|
||||
|
|
||||
if not qs: |
|
||||
# No queries = no results |
|
||||
return dumps([]) |
|
||||
|
|
||||
# Reduce joins all of the queries into one query |
|
||||
# It will search for the conjunction of all of them |
|
||||
# So that means it cares about each query equally |
|
||||
q = reduce(join, qs) |
|
||||
|
|
||||
s = (Search(using=es, index="oersearch") |
|
||||
.query(q))[0:100] # only return up to 100 results for now |
|
||||
|
|
||||
results = s.execute() |
|
||||
|
|
||||
filtered = [ |
|
||||
(secs, filterSections(secs)[0].to_dict()) # get rid of tutorials |
|
||||
for secs in results |
|
||||
if filterSections(secs) |
|
||||
] |
|
||||
results = [] |
|
||||
for obj, secs in filtered: |
|
||||
# Add the truncated course id |
|
||||
# This is used to point to the resource page for that course |
|
||||
secs["id"] = truncate(obj.meta.id) |
|
||||
secs["title"] = obj.title |
|
||||
if obj["dept"] not in secs["title"]: |
|
||||
secs["dept"] = obj.dept |
|
||||
if obj.books: |
|
||||
secs["books"] = [ |
|
||||
{ |
|
||||
"booktitle" : summarize(book[0].encode("ASCII")), |
|
||||
"bookauthor" : book[1].encode("ASCII"), |
|
||||
"bookprice" : book[2].encode("ASCII") |
|
||||
} |
|
||||
for book in obj.books |
|
||||
] |
|
||||
else: |
|
||||
secs["books"] = "" |
|
||||
results.append(secs) |
|
||||
|
|
||||
return dumps(results) |
|
||||
|
|
||||
|
|
||||
searchers = { |
|
||||
"title" : search("title"), |
|
||||
"loc" : search("loc"), |
|
||||
"time" : search("time"), |
|
||||
"prof" : search("prof"), |
|
||||
"day" : search("day"), |
|
||||
} |
|
||||
|
|
||||
#print searchTerms({"title" : "PHILOS"}) |
|
||||
|
|
||||
#for c in imap(classToJSON, allCourses()): |
|
||||
#try: |
|
||||
#print indexListing(c) |
|
||||
#except UnIndexable as e: |
|
@ -1,24 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
class UnIndexable(Exception): |
|
||||
def __init__(self, course): |
|
||||
self.course = course |
|
||||
|
|
||||
@property |
|
||||
def reason(self): |
|
||||
course = self.course |
|
||||
if not course["code"] and not course["title"]: |
|
||||
message = "there was no course code and no title defined" |
|
||||
if not course["code"]: |
|
||||
message = "there was no course code defined" |
|
||||
if not course["title"]: |
|
||||
message = "there was no course title defined" |
|
||||
if not course["sections"]: |
|
||||
message = "there were no sections defined" |
|
||||
return """ |
|
||||
There was a problem with indexing this course. |
|
||||
%s |
|
||||
There could be several reasons why, my best guess is that %s |
|
||||
We need at least the course code, title, and one or more sections to index |
|
||||
|
|
||||
""" % (course, message) |
|
@ -1,97 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from json import loads, load |
|
||||
from re import sub, split |
|
||||
from itertools import groupby |
|
||||
from numpy import mean |
|
||||
from operator import attrgetter |
|
||||
|
|
||||
import pygal |
|
||||
import csv |
|
||||
|
|
||||
class Textbook(object): |
|
||||
def __init__(self, dept, code, title, author, price): |
|
||||
self.dept = dept |
|
||||
self.code = code |
|
||||
self.title = title |
|
||||
self.author = author |
|
||||
self.price = float(price) |
|
||||
|
|
||||
def __repr__(self): |
|
||||
return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept, |
|
||||
self.code, |
|
||||
self.title, |
|
||||
self.author, |
|
||||
self.price) |
|
||||
|
|
||||
|
|
||||
def courses(): |
|
||||
with open("./books.csv", "r") as books: |
|
||||
booksreader = csv.reader(books) |
|
||||
for row in booksreader: |
|
||||
yield row |
|
||||
|
|
||||
|
|
||||
def groupDept(courselist): |
|
||||
sortedCourses = sorted(courselist, key=attrgetter("dept")) |
|
||||
for course in groupby(sortedCourses, attrgetter("dept")): |
|
||||
yield course[0], list(course[1]) |
|
||||
|
|
||||
def meanPrice(books): |
|
||||
return mean([book.price for book in books]) |
|
||||
|
|
||||
# Questions, |
|
||||
# mean cost per department |
|
||||
# mean cost per faculty |
|
||||
# mean difference between book store copies and other copies per dept and faculty |
|
||||
# number of overlapping books per faculty, do eng students benefit from that? |
|
||||
|
|
||||
# maybe a survey for students to see how often they buy books from other sources |
|
||||
# correlate with how much they could be saving? |
|
||||
|
|
||||
facultyDesc = { |
|
||||
"hum" : "Humanities", |
|
||||
"bus" : "Business", |
|
||||
"hlth" : "Health Science", |
|
||||
"eng" : "Engineering", |
|
||||
"sci" : "Science", |
|
||||
"socsci" : "Social Sciences", |
|
||||
"artsci" : "Arts & Sciences", |
|
||||
"meld" : "MELD" |
|
||||
} |
|
||||
|
|
||||
faculties = load(open("./faculties.json")) |
|
||||
|
|
||||
def categorize(dept): |
|
||||
# faculties |
|
||||
return facultyDesc.get(faculties.get(dept, False), False) |
|
||||
|
|
||||
def byFaculty(): |
|
||||
for dept, books in groupDept(courses()): |
|
||||
yield (categorize(dept), dept, books) |
|
||||
|
|
||||
def meanFacultyCosts(): |
|
||||
byfac = list(byFaculty()) |
|
||||
graph = pygal.Bar() |
|
||||
graph.title = "Mean textbook cost by faculty" |
|
||||
sortedFacs = sorted(byfac, key=lambda x: x[0]) |
|
||||
for fac in groupby(sortedFacs, lambda x: x[0]): |
|
||||
graph.add(fac[0], meanPrice(list(fac[1])[0][2])) |
|
||||
graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" |
|
||||
return graph.render(transpose=True) |
|
||||
|
|
||||
def meanCosts(): |
|
||||
cs = groupDept(courses()) |
|
||||
graph = pygal.Bar() |
|
||||
graph.title = "Mean textbook cost by department" |
|
||||
for c in cs: |
|
||||
dept, books = c |
|
||||
graph.add(dept, meanPrice(books)) |
|
||||
#graph.render_to_file("./test_graph.svg") |
|
||||
graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" |
|
||||
return graph.render_table(style=True, transpose=True) |
|
||||
|
|
||||
for x in courses(): |
|
||||
print x |
|
||||
#print meanCosts() |
|
||||
#print meanFacultyCosts() |
|
@ -1,148 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
from functools import partial |
|
||||
from couchdb import ResourceConflict |
|
||||
|
|
||||
from flask import Flask, render_template, flash, request, send_from_directory |
|
||||
from flask_bootstrap import Bootstrap |
|
||||
from flask_appconfig import AppConfig |
|
||||
from urllib import unquote |
|
||||
from search import searchTerms |
|
||||
|
|
||||
from openlibrary import bookUrls |
|
||||
|
|
||||
from archive import searchIA |
|
||||
from urllib import quote, unquote |
|
||||
from json import dumps, loads |
|
||||
|
|
||||
from werkzeug.contrib.cache import MemcachedCache |
|
||||
cache = MemcachedCache(['127.0.0.1:11211']) |
|
||||
|
|
||||
import os |
|
||||
|
|
||||
def predict(fieldtype, term): |
|
||||
print fieldtype |
|
||||
print term |
|
||||
if not term: |
|
||||
return "[]" |
|
||||
else: |
|
||||
try: |
|
||||
cs = completers[fieldtype](term.lower()) |
|
||||
except KeyError: |
|
||||
return "[]" |
|
||||
if cs: |
|
||||
return cs |
|
||||
return "[]" |
|
||||
|
|
||||
def predictor(fieldtype): |
|
||||
def inner(request): |
|
||||
params = dict(request.args.items()) |
|
||||
return predict(fieldtype, params["term"]) |
|
||||
return inner |
|
||||
|
|
||||
def cacheit(key, thunk): |
|
||||
""" |
|
||||
Tries to find a cached version of ``key'' |
|
||||
If there is no cached version then it will |
|
||||
evaluate thunk (which must be a generator) |
|
||||
and cache that, then return the result |
|
||||
""" |
|
||||
cached = cache.get(quote(key)) |
|
||||
if cached is None: |
|
||||
result = list(thunk()) |
|
||||
cache.set(quote(key), result) |
|
||||
return result |
|
||||
return cached |
|
||||
|
|
||||
def ClassSearch(configfile=None): |
|
||||
defaults = {"Day", "Building", "Exact Location", "Department"} |
|
||||
app = Flask(__name__) |
|
||||
AppConfig(app, configfile) # Flask-Appconfig is not necessary, but |
|
||||
# highly recommend =) |
|
||||
# https://github.com/mbr/flask-appconfig |
|
||||
Bootstrap(app) |
|
||||
|
|
||||
app.config["scripts"] = "/home/wes/MGOAL/scripts" |
|
||||
app.config["styles"] = "/home/wes/MGOAL/styles" |
|
||||
|
|
||||
@app.route('/favicon.ico') |
|
||||
def favicon(): |
|
||||
return send_from_directory("/srv/http/goal/favicon.ico", |
|
||||
'favicon.ico', mimetype='image/vnd.microsoft.icon') |
|
||||
|
|
||||
|
|
||||
@app.route("/buildpred", methods=("GET", "POST")) |
|
||||
def buildpred(): |
|
||||
return predictbuild(request) |
|
||||
|
|
||||
@app.route("/locpred", methods=("GET", "POST")) |
|
||||
def locpred(): |
|
||||
return predictloc(request) |
|
||||
|
|
||||
@app.route("/daypred", methods=("GET", "POST")) |
|
||||
def daypred(): |
|
||||
return predictday(request) |
|
||||
|
|
||||
@app.route("/deptpred", methods=("GET", "POST")) |
|
||||
def deptpred(): |
|
||||
return predictdept(request) |
|
||||
|
|
||||
@app.route("/titlepred", methods=("GET", "POST")) |
|
||||
def titlepred(): |
|
||||
return predicttitle(request) |
|
||||
|
|
||||
@app.route("/", methods=("GET", "POST")) |
|
||||
def index(): |
|
||||
return render_template("search.html") |
|
||||
|
|
||||
@app.route("/fc", methods=("GET", "POST")) |
|
||||
def fc(): |
|
||||
""" Filter Courses """ |
|
||||
print "trying to get courses" |
|
||||
params = dict(request.args.items()) |
|
||||
for key, val in params.iteritems(): |
|
||||
if val in defaults: |
|
||||
del params[key] |
|
||||
results = searchTerms(params) |
|
||||
return results |
|
||||
|
|
||||
@app.route("/resources", methods=("GET", "POST")) |
|
||||
def resources(): |
|
||||
""" Get Resources """ |
|
||||
notRequired = False |
|
||||
params = loads(dict(request.args.items())["data"]) |
|
||||
print params |
|
||||
author = params["author"] |
|
||||
title = params["title"] |
|
||||
|
|
||||
if ("No Textbooks" in title or |
|
||||
"No Adoption" in title): |
|
||||
return dumps("false") |
|
||||
|
|
||||
# Cache the result of the open library search |
|
||||
openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author)) |
|
||||
print openlib |
|
||||
|
|
||||
# cache the result of an internet archive search |
|
||||
iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author)) |
|
||||
print iarchive |
|
||||
|
|
||||
if not (any(openlib) or any(iarchive)): |
|
||||
# We literally could not find ANYTHING |
|
||||
return dumps("false") |
|
||||
|
|
||||
return dumps({ |
|
||||
"iarchive" : iarchive, |
|
||||
"openlib" : openlib |
|
||||
}) |
|
||||
|
|
||||
@app.route("/scripts/<filename>") |
|
||||
def send_script(filename): |
|
||||
return send_from_directory(app.config["scripts"], filename) |
|
||||
|
|
||||
@app.route("/styles/<filename>") |
|
||||
def send_style(filename): |
|
||||
return send_from_directory(app.config["styles"], filename) |
|
||||
return app |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
ClassSearch().run(port=8001, debug=True) |
|
@ -1,24 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
class UnIndexable(Exception): |
|
||||
def __init__(self, course): |
|
||||
self.course = course |
|
||||
|
|
||||
@property |
|
||||
def reason(self): |
|
||||
course = self.course |
|
||||
if not course["code"] and not course["title"]: |
|
||||
message = "there was no course code and no title defined" |
|
||||
if not course["code"]: |
|
||||
message = "there was no course code defined" |
|
||||
if not course["title"]: |
|
||||
message = "there was no course title defined" |
|
||||
if not course["sections"]: |
|
||||
message = "there were no sections defined" |
|
||||
return """ |
|
||||
There was a problem with indexing this course. |
|
||||
%s |
|
||||
There could be several reasons why, my best guess is that %s |
|
||||
We need at least the course code, title, and one or more sections to index |
|
||||
|
|
||||
""" % (course, message) |
|
@ -1,97 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
|
|
||||
from json import loads, load |
|
||||
from re import sub, split |
|
||||
from itertools import groupby |
|
||||
from numpy import mean |
|
||||
from operator import attrgetter |
|
||||
|
|
||||
import pygal |
|
||||
import csv |
|
||||
|
|
||||
class Textbook(object): |
|
||||
def __init__(self, dept, code, title, author, price): |
|
||||
self.dept = dept |
|
||||
self.code = code |
|
||||
self.title = title |
|
||||
self.author = author |
|
||||
self.price = float(price) |
|
||||
|
|
||||
def __repr__(self): |
|
||||
return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept, |
|
||||
self.code, |
|
||||
self.title, |
|
||||
self.author, |
|
||||
self.price) |
|
||||
|
|
||||
|
|
||||
def courses(): |
|
||||
with open("./books.csv", "r") as books: |
|
||||
booksreader = csv.reader(books) |
|
||||
for row in booksreader: |
|
||||
yield row |
|
||||
|
|
||||
|
|
||||
def groupDept(courselist): |
|
||||
sortedCourses = sorted(courselist, key=attrgetter("dept")) |
|
||||
for course in groupby(sortedCourses, attrgetter("dept")): |
|
||||
yield course[0], list(course[1]) |
|
||||
|
|
||||
def meanPrice(books): |
|
||||
return mean([book.price for book in books]) |
|
||||
|
|
||||
# Questions, |
|
||||
# mean cost per department |
|
||||
# mean cost per faculty |
|
||||
# mean difference between book store copies and other copies per dept and faculty |
|
||||
# number of overlapping books per faculty, do eng students benefit from that? |
|
||||
|
|
||||
# maybe a survey for students to see how often they buy books from other sources |
|
||||
# correlate with how much they could be saving? |
|
||||
|
|
||||
facultyDesc = { |
|
||||
"hum" : "Humanities", |
|
||||
"bus" : "Business", |
|
||||
"hlth" : "Health Science", |
|
||||
"eng" : "Engineering", |
|
||||
"sci" : "Science", |
|
||||
"socsci" : "Social Sciences", |
|
||||
"artsci" : "Arts & Sciences", |
|
||||
"meld" : "MELD" |
|
||||
} |
|
||||
|
|
||||
faculties = load(open("./faculties.json")) |
|
||||
|
|
||||
def categorize(dept): |
|
||||
# faculties |
|
||||
return facultyDesc.get(faculties.get(dept, False), False) |
|
||||
|
|
||||
def byFaculty(): |
|
||||
for dept, books in groupDept(courses()): |
|
||||
yield (categorize(dept), dept, books) |
|
||||
|
|
||||
def meanFacultyCosts(): |
|
||||
byfac = list(byFaculty()) |
|
||||
graph = pygal.Bar() |
|
||||
graph.title = "Mean textbook cost by faculty" |
|
||||
sortedFacs = sorted(byfac, key=lambda x: x[0]) |
|
||||
for fac in groupby(sortedFacs, lambda x: x[0]): |
|
||||
graph.add(fac[0], meanPrice(list(fac[1])[0][2])) |
|
||||
graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" |
|
||||
return graph.render(transpose=True) |
|
||||
|
|
||||
def meanCosts(): |
|
||||
cs = groupDept(courses()) |
|
||||
graph = pygal.Bar() |
|
||||
graph.title = "Mean textbook cost by department" |
|
||||
for c in cs: |
|
||||
dept, books = c |
|
||||
graph.add(dept, meanPrice(books)) |
|
||||
#graph.render_to_file("./test_graph.svg") |
|
||||
graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" |
|
||||
return graph.render_table(style=True, transpose=True) |
|
||||
|
|
||||
for x in courses(): |
|
||||
print x |
|
||||
#print meanCosts() |
|
||||
#print meanFacultyCosts() |
|
@ -1,148 +0,0 @@ |
|||||
#! /usr/bin/python2 |
|
||||
from functools import partial |
|
||||
from couchdb import ResourceConflict |
|
||||
|
|
||||
from flask import Flask, render_template, flash, request, send_from_directory |
|
||||
from flask_bootstrap import Bootstrap |
|
||||
from flask_appconfig import AppConfig |
|
||||
from urllib import unquote |
|
||||
from search import searchTerms |
|
||||
|
|
||||
from openlibrary import bookUrls |
|
||||
|
|
||||
from archive import searchIA |
|
||||
from urllib import quote, unquote |
|
||||
from json import dumps, loads |
|
||||
|
|
||||
from werkzeug.contrib.cache import MemcachedCache |
|
||||
cache = MemcachedCache(['127.0.0.1:11211']) |
|
||||
|
|
||||
import os |
|
||||
|
|
||||
def predict(fieldtype, term): |
|
||||
print fieldtype |
|
||||
print term |
|
||||
if not term: |
|
||||
return "[]" |
|
||||
else: |
|
||||
try: |
|
||||
cs = completers[fieldtype](term.lower()) |
|
||||
except KeyError: |
|
||||
return "[]" |
|
||||
if cs: |
|
||||
return cs |
|
||||
return "[]" |
|
||||
|
|
||||
def predictor(fieldtype): |
|
||||
def inner(request): |
|
||||
params = dict(request.args.items()) |
|
||||
return predict(fieldtype, params["term"]) |
|
||||
return inner |
|
||||
|
|
||||
def cacheit(key, thunk): |
|
||||
""" |
|
||||
Tries to find a cached version of ``key'' |
|
||||
If there is no cached version then it will |
|
||||
evaluate thunk (which must be a generator) |
|
||||
and cache that, then return the result |
|
||||
""" |
|
||||
cached = cache.get(quote(key)) |
|
||||
if cached is None: |
|
||||
result = list(thunk()) |
|
||||
cache.set(quote(key), result) |
|
||||
return result |
|
||||
return cached |
|
||||
|
|
||||
def ClassSearch(configfile=None): |
|
||||
defaults = {"Day", "Building", "Exact Location", "Department"} |
|
||||
app = Flask(__name__) |
|
||||
AppConfig(app, configfile) # Flask-Appconfig is not necessary, but |
|
||||
# highly recommend =) |
|
||||
# https://github.com/mbr/flask-appconfig |
|
||||
Bootstrap(app) |
|
||||
|
|
||||
app.config["scripts"] = "/home/wes/MGOAL/scripts" |
|
||||
app.config["styles"] = "/home/wes/MGOAL/styles" |
|
||||
|
|
||||
@app.route('/favicon.ico') |
|
||||
def favicon(): |
|
||||
return send_from_directory("/srv/http/goal/favicon.ico", |
|
||||
'favicon.ico', mimetype='image/vnd.microsoft.icon') |
|
||||
|
|
||||
|
|
||||
@app.route("/buildpred", methods=("GET", "POST")) |
|
||||
def buildpred(): |
|
||||
return predictbuild(request) |
|
||||
|
|
||||
@app.route("/locpred", methods=("GET", "POST")) |
|
||||
def locpred(): |
|
||||
return predictloc(request) |
|
||||
|
|
||||
@app.route("/daypred", methods=("GET", "POST")) |
|
||||
def daypred(): |
|
||||
return predictday(request) |
|
||||
|
|
||||
@app.route("/deptpred", methods=("GET", "POST")) |
|
||||
def deptpred(): |
|
||||
return predictdept(request) |
|
||||
|
|
||||
@app.route("/titlepred", methods=("GET", "POST")) |
|
||||
def titlepred(): |
|
||||
return predicttitle(request) |
|
||||
|
|
||||
@app.route("/", methods=("GET", "POST")) |
|
||||
def index(): |
|
||||
return render_template("search.html") |
|
||||
|
|
||||
@app.route("/fc", methods=("GET", "POST")) |
|
||||
def fc(): |
|
||||
""" Filter Courses """ |
|
||||
print "trying to get courses" |
|
||||
params = dict(request.args.items()) |
|
||||
for key, val in params.iteritems(): |
|
||||
if val in defaults: |
|
||||
del params[key] |
|
||||
results = searchTerms(params) |
|
||||
return results |
|
||||
|
|
||||
@app.route("/resources", methods=("GET", "POST")) |
|
||||
def resources(): |
|
||||
""" Get Resources """ |
|
||||
notRequired = False |
|
||||
params = loads(dict(request.args.items())["data"]) |
|
||||
print params |
|
||||
author = params["author"] |
|
||||
title = params["title"] |
|
||||
|
|
||||
if ("No Textbooks" in title or |
|
||||
"No Adoption" in title): |
|
||||
return dumps("false") |
|
||||
|
|
||||
# Cache the result of the open library search |
|
||||
openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author)) |
|
||||
print openlib |
|
||||
|
|
||||
# cache the result of an internet archive search |
|
||||
iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author)) |
|
||||
print iarchive |
|
||||
|
|
||||
if not (any(openlib) or any(iarchive)): |
|
||||
# We literally could not find ANYTHING |
|
||||
return dumps("false") |
|
||||
|
|
||||
return dumps({ |
|
||||
"iarchive" : iarchive, |
|
||||
"openlib" : openlib |
|
||||
}) |
|
||||
|
|
||||
@app.route("/scripts/<filename>") |
|
||||
def send_script(filename): |
|
||||
return send_from_directory(app.config["scripts"], filename) |
|
||||
|
|
||||
@app.route("/styles/<filename>") |
|
||||
def send_style(filename): |
|
||||
return send_from_directory(app.config["styles"], filename) |
|
||||
return app |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
ClassSearch().run(port=8001, debug=True) |
|
Loading…
Reference in new issue