From 144d493b870ac7f11e8b3294f16fd0f4ac2b6228 Mon Sep 17 00:00:00 2001 From: wes Date: Mon, 20 Jun 2016 04:19:24 -0400 Subject: [PATCH] test --- .gitignore | 5 + .search.py.swn | Bin 0 -> 16384 bytes .~lock.books.csv# | 1 + archive.py | 34 ---- database.py | 62 ------- goasearch.py | 14 -- mcmaster/__init__.py | 0 mcmaster/classes.py | 349 -------------------------------------- mcmaster/site.py | 9 - mcmaster/sylla.py | 117 ------------- openlibrary.py | 24 --- predictions.py | 153 ----------------- search.py | 237 -------------------------- src/archive.py | 34 ---- src/database.py | 62 ------- src/goasearch.py | 14 -- src/openlibrary.py | 24 --- src/predictions.py | 153 ----------------- src/search.py | 237 -------------------------- src/textbookExceptions.py | 24 --- src/visualize.py | 97 ----------- src/website.py | 148 ---------------- textbookExceptions.py | 24 --- visualize.py | 97 ----------- website.py | 148 ---------------- 25 files changed, 6 insertions(+), 2061 deletions(-) create mode 100644 .gitignore create mode 100644 .search.py.swn create mode 100644 .~lock.books.csv# delete mode 100755 archive.py delete mode 100755 database.py delete mode 100755 goasearch.py delete mode 100644 mcmaster/__init__.py delete mode 100755 mcmaster/classes.py delete mode 100644 mcmaster/site.py delete mode 100755 mcmaster/sylla.py delete mode 100755 openlibrary.py delete mode 100755 predictions.py delete mode 100755 search.py delete mode 100755 src/archive.py delete mode 100755 src/database.py delete mode 100755 src/goasearch.py delete mode 100755 src/openlibrary.py delete mode 100755 src/predictions.py delete mode 100755 src/search.py delete mode 100644 src/textbookExceptions.py delete mode 100755 src/visualize.py delete mode 100755 src/website.py delete mode 100644 textbookExceptions.py delete mode 100755 visualize.py delete mode 100755 website.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69243cd --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.swp +*.pyc +*.swo +.vim-session +*~ diff --git a/.search.py.swn b/.search.py.swn new file mode 100644 index 0000000000000000000000000000000000000000..4e90bb678f3402575138e50842030cef4aab0e37 GIT binary patch literal 16384 zcmeI3U5q4E6~_ydFJ%>7jWH5(sdnk=-QJp>0cD*qgk@&g8Qf)-odJxqlilj7+tamM zUDZ={YiBy^Dn@-kSRPOx#4jQ|h%pfe4}=FJ`edRBn1F6j5H&_okp~kWc)vqeyqMK3se@UEKpgXvOr~l$^w-IDhpH=s4P%fpt3+^f&W1Zc!6bo2zrgt z1pwavoB024c>n)b@Emv+d>eGY9M}ae0T+XRyvMR$0Y3#l0r!KigA-sD><7ER72vgP zmUR|90lp6&2KRw4fn(sKzyiOy)Uv(_?geMSJzxzaU_ZD5y#8*>`WJW(JPUpdo&aA1 zXFv*$f!o0za5=aHy#6lBdJ((;9ssAoXTULVJ(vP6_~#{-^-J&!coIAYz6cHh7wiT* zz^fO-2H7$4G+v}i3rO9D?YwC*tx+*Gkq(pOOgO*&L@ zL3o|cR>e+PT9MJO(_2V2>K;tCQ1`#owovv!n0f3FhexGbsH`< zAWem_GPUagmL*D5<5(oqLMH3#AyZ1FCSG}Jjzs541^|>E*Lf^PNCIzjpm~&p+ z!>nmoWv8_Plw~xPsI+>|omvwBw;EESj>d@_xT*qi9B-D|G#HOOdb$ZgFl-gI&kQOMa-j zS8StZ50&ciDC5IJf+@Lq+cMW)9u}8<8p@?ddwCPFy4>bcRV^jd9HwSzd-R5r6C19F zEtQOm&1A$2HZ1+;`T$M}4)&QaXe_lR$0zhanwI27oOD&J3D+@6-=?2(~j*8%VXz<;^Pwx9vBKG&^WKq<+W;1TZ&=&GrG6f za%HnAZxWLV&g@57=E~FtgNFTW~letxTE5hPujoR3BtPnV<^kDSJxIYphF|5-Iyxo-9;-gKeOx#`g1= zr`U1AI1^rsGd#R*bP-L%m9D^Qv4&`q4vIa$hZUNqu1^+0BO#7?slbsenwBSh{DrzY zQR-9tK6&QH>8~hST9tHJa`-508(FlN>6{Qvi6n`N%6=E8)+S1Nebo-_JEmfg_xf2= z?O!T48x%G9dWg6o6WH=ol{wPWqd^wA-pWc*ei88ygw0?A&BDEtmDKBPQO~r0ljq|A z9;hg)$;|hymZhyU(jxxm_ewUFXX#iwjK}&ov?uX|Me+Y15Hp@Y3`+68dB6WMVtb1H z9|re<&jSH6a14A3+yM4~I=BqHh8X`WpnHHv!9(Cd@D*@3I0<5~432;s!6f)Fcph>8 zN8bXLgA)oEODBGP+xV6W%lPghe>%9 zq?;y3Cg_u_t&WMMOqpPEMcAV=HXTG&RheLNqr|7f^)QzbOr|~>4>MH)vXv}EBRF85 zf>V$g^%+m%(PMMR&C160&;~Oi)M2e;jp2R##h7 z9OrF0&&n68y1iAD#t7s-caCGsn`~9PPz{%*YMguAqbLId?x8*AwUb;h?)hCN^<;cC zN0_1_u^O8bcuf&Sur^H%?JXRh5Fgif;AAsYkfy_JkyN_I4of8%DSL@6%N8i^5oDtz$TiO`v2*9}y#dYtTOC`zxYRFO=)zU`1e79?rYHKnr!1@8 mtH_s?HhImd&7$6DkuaU`miklN(z^Dp|Jb!ndPXKd$Nmju5H4!~ literal 0 HcmV?d00001 diff --git a/.~lock.books.csv# b/.~lock.books.csv# new file mode 100644 index 0000000..3a148d9 --- /dev/null +++ b/.~lock.books.csv# @@ -0,0 +1 @@ +,wes,ismeta,12.03.2016 20:10,file:///home/wes/.config/libreoffice/4; \ No newline at end of file diff --git a/archive.py b/archive.py deleted file mode 100755 index 73fcde7..0000000 --- a/archive.py +++ /dev/null @@ -1,34 +0,0 @@ -#! /usr/bin/python2 - -from urllib import quote -from json import loads, dumps - -import requests as req - -searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw" - -def searchIA(title, author): - """ - Do a search on The Internet Archive for a book - """ - print "running a search" - requrl = searchUrl.format(quote(title + " " + author)) - try: - results = loads(req.get(requrl).text[9:][0:-1]) - except ValueError: - return [] - - rownum = results["responseHeader"]["params"]["rows"] - if rownum < 1: - print "Couldn't find results for %s %s" % (title, author) - return [] - docs = results["response"]["docs"] - urls = [] - for result in results["response"]["docs"][0:3]: - urls.append("https://archive.org/details/%s" % result["identifier"]) - return urls - - -# Example, search for David Hume's Enquiry Concerning Human Understanding -#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"): - #print url diff --git a/database.py b/database.py deleted file mode 100755 index a19272c..0000000 --- a/database.py +++ /dev/null @@ -1,62 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from hashlib import sha1 - -def truncate(docid): - """ - Truncate a document id to 12 digits - The document ID should be based on a - hash of unique identifiers - """ - return int(str(docid)[0:12]) - -def createResource(textbookInfo, course, dept, coursecode, docid): - """ - Create a document associated with a course - This document contains any/all resources associated - with that course - - example, - { - 'books': [], - 'dept': 'COLLAB', - 'code': '2C03', - 'sections': [ - { - 'prof': 'Lisa Pender', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Mo' - }, - { - 'prof': 'Staff', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Th' - } - ], - 'title': 'COLLAB 2C03 - Sociology I' - } - """ - textbooks = textbookInfo(dept.strip(), coursecode.strip()) - - # We truncate the id so we can have nicer looking URLs - # Since the id will be used to point to the resource page for that course - _id = str(truncate(docid)) - - fields = { - "_id" : _id, - "textbooks" : textbooks, - "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()), - "courseinfo" : course - #"Syllabus" : "blah" - } - try: - revisions = list(localdb.revisions(_id)) - if not revisions: - return localdb.save(fields) - else: - rev = dict(revisions[0])["_rev"] - fields["_rev"] = rev - return localdb.save(fields) - except ResourceConflict: - print "Resource for %s already exists, not creating a new one" % (docid) diff --git a/goasearch.py b/goasearch.py deleted file mode 100755 index 3dca7eb..0000000 --- a/goasearch.py +++ /dev/null @@ -1,14 +0,0 @@ -#! /usr/bin/python2 - -# predictive data -# switch to elasticsearch's prediction - - - -import database -import predictions - -class GOASearch(object): - def __init__(self): - return self - diff --git a/mcmaster/__init__.py b/mcmaster/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/mcmaster/classes.py b/mcmaster/classes.py deleted file mode 100755 index 54687df..0000000 --- a/mcmaster/classes.py +++ /dev/null @@ -1,349 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from itertools import chain, islice, izip as zip -from re import search, sub -from functools import total_ordering - -from sylla import textbookInfo -from collections import MutableMapping - -import datetime as dt -import lxml.html as lxh -import requests -import sys -import copy - -fall = "2159" -spring_summer = "2165" -winter = "2161" - -# threading stuff -import Queue as q -import threading as thd - -baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" - -searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" - -custom_headers = { - "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0", - "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", - } - -courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" - -courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}" - -payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" - -payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}" - - -year = dt.date.today().year -month = dt.date.today().month - -days = { - "Mo" : 0, - "Tu" : 1, - "We" : 2, - "Th" : 3, - "Fr" : 4, - "Sa" : 5, - "Su" : 6 - } - -day_descs = { - "Mo" : "Monday Mon Mo", - "Tu" : "Tuesday Tues Tu Tue", - "We" : "Wednesday Wed We", - "Th" : "Thursday Th Thurs", - "Fr" : "Friday Fr Fri", - "Sa" : "Saturday Sat Sa", - "Su" : "Sunday Su Sun", - "T" : "TBA" - } - -def timeparse(time): - """ - Parse the time into numbers - """ - if len(time) == 7: - hour = int(time[0:2]) - minutes = int(time[3:5]) - half = time[5:7] - else: - hour = int(time[0]) - minutes = int(time[2:4]) - half = time[4:6] - if half == "PM": - if hour < 12: - hour = hour + 12 - - return (str(hour), str(minutes), half) - -class Class(object): - def __init__(self, dept, title, sections): - self.title = title.encode("UTF-8") - self.sections = sections - self.dept = dept - - def __repr__(self): - return repr((self.title, self.sections)) - - def __iter__(self): - return iter((self.title, sec) for sec in self.sections) - - def hasCode(self): - splitted = self.title.strip().split(" ") - return ((len(splitted) >= 2) and - (splitted[0].upper() == splitted[0]) and - (splitted[1].upper() == splitted[1])) - - @property - def code(self): - if self.hasCode(): - return self.title.strip().split(" ")[1].strip() - return False - - @property - def books(self): - if self.dept and self.code: - return textbookInfo(self.dept, self.code, withPrices=True) - return False - -@total_ordering -class Section(dict): - def __init__(self, time, loc, prof, sem): - self.time = time.encode("UTF-8") - self.loc = loc.encode("UTF-8") - self.prof = prof.encode("UTF-8") - self.sem = sem.encode("UTF-8") - self._date = False - self._day = False - - @property - def date(self): - if self.time != "TBA": - day, start, _, end = self.time.split() - - if self._day: - assert len(self._day) == 2 - day = self._day - else: - day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)] - - self._date = (day, timeparse(start), timeparse(end)) - - return self._date - - return self.time - - @property - def day(self): - return self.date[0] - - @property - def start(self): - return self.date[1][0] + self.date[1][1] - - def __repr__(self): - return (""" - Time = %s, Location = %s, Instructor = %s, Semester Running = %s - """ % (self.date, self.loc, self.prof, self.sem)) - def __gt__(self, x): - if isinstance(self.day, list): - raise NotImplementedError - - if (self.date == "TBA" or - x.date == "TBA"): - return False - - return ((days[self.day] > days[x.day]) or - ((self.day == x.day) and - (self.start > x.start))) - - def __eq__(self, x): - return (x.date == self.date and - x.prof == self.prof and - x.loc == self.loc and - x.sem == self.sem) - - -def getStateNum(html): - """ - Get the state num from Mosaic - This is unique to each requester - """ - parsed = lxh.fromstring(html) - return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value - -def parseSection(section): - cols = section.xpath(".//td") - assert len(cols) == 4 - time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols] - - classinfo = Section(time, loc, prof, sem) - return classinfo - -def getSectionInfo(table): - trs = table.xpath(".//tr") - for tr in trs: - if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): - yield parseSection(tr) - -def parseColumns(subject, html): - parsed = lxh.fromstring(html) - - classInfo = (list(getSectionInfo(table)) for table in - islice((table for table in parsed.xpath(".//table") - if table.xpath("@id") and - search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint)) - - classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span") - if span.xpath("@id") and - search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0])) - - return zip(classNames, classInfo) - -def getCodes(html): - parsed = lxh.fromstring(html) - - return (code.text_content().encode("UTF-8") for code in - parsed.xpath("//span") - if code.xpath("@id") and - search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0])) - -class MosReq(object): - def __init__(self, semester): - self.semester = semester - s = requests.Session() - resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content - - # Let the server set some cookies before doing the searching - cookies = {} - for key, val in s.cookies.iteritems(): - cookies[key] = val - self.cookies = cookies - self.statenum = False - self.codes_ = [] - - def getlist(self, subject): - sys.stderr.write("Getting " + subject + "\n") - first_req = requests.get(searchurl, cookies=self.cookies).content - # for some reason Mosaic wants us to request it twice, ?????????????????? - self.statenum = getStateNum(first_req) - first_req = requests.post(searchurl, - data=payload.format(self.statenum, subject, self.semester), - cookies=self.cookies, - allow_redirects=False, - headers=custom_headers).content - # we make a first request to get the ICStateNum in case it thinks there are too many results - try: - self.statenum = getStateNum(first_req) - except IndexError: - pass - if "Your search will return over" in first_req: - - return requests.post(searchurl, - data=payload2.format(self.statenum, self.semester), - cookies=self.cookies, - allow_redirects=False, - headers=custom_headers).content - else: - return first_req - - def classes(self, subject): - return list(parseColumns(subject, self.getlist(subject))) - - def getCodes(self, letter): - sys.stderr.write("Getting letter " + letter + "\n") - first_req = requests.get(searchurl, cookies=self.cookies).content - self.statenum = getStateNum(first_req) - - self.statenum = getStateNum(requests.post(searchurl, - data=courseCodes1.format(self.statenum, self.semester), - cookies=self.cookies, - headers=custom_headers).content) - - return getCodes(requests.post(searchurl, - data=courseCodes2.format(self.statenum, letter, self.semester), - cookies=self.cookies, - allow_redirects=False, - headers=custom_headers).content) - @property - def codes(self): - if not self.codes_: - self.codes_ = list(chain.from_iterable( - map((lambda l: - self.getCodes(chr(l))), - xrange(65, 91)))) - return self.codes_ - -def request(codes, lists, semester): - requester = MosReq(semester) - while not codes.empty(): - code = codes.get() - try: - lists.put(requester.classes(code)) - except: - codes.task_done() - return - codes.task_done() - - -class CourseInfo(object): - def __init__(self, threadcount, semester): - self._codes = False - self.threadcount = threadcount - self.semester = semester - - @property - def codes(self): - if not self._codes: - req = MosReq(self.semester) - self._codes = req.codes - return self._codes - - def classes(self): - qcodes = q.Queue() - for code in self.codes: - qcodes.put(code) - lists = q.Queue() - threads = [] - thread = None - for i in xrange(self.threadcount): - thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester)) - threads.append(thread) - thread.start() - qcodes.join() - for t in threads: - t.join() - - sections = [] - while not lists.empty(): - sections.append(lists.get()) - - for cl in chain.from_iterable(sections): - new_sections = [] - for sec in cl[1]: - if len(sec.day) > 1: - for day in sec.day: - new_sections.append(copy.deepcopy(sec)) - new_sections[-1]._day = day - else: - sec._day = sec.day[0] - new_sections.append(sec) - yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections)) - -def getCourses(semester, threadcount=10): - return CourseInfo(threadcount, semester).classes() - -def allCourses(): - return chain.from_iterable( - (getCourses(sem, threadcount=10) - for sem in (fall, winter, spring_summer))) - -#for course in allCourses(): - #sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books)) - #print course.sections diff --git a/mcmaster/site.py b/mcmaster/site.py deleted file mode 100644 index 42c07aa..0000000 --- a/mcmaster/site.py +++ /dev/null @@ -1,9 +0,0 @@ -from oersearch import Search -from classes import getCourses -from sylla import getTextbooks - -mcmasterSearch = Search("McMaster") - -mcmasterSearch.setup(getCourses) - -mcmasterSearch.run() diff --git a/mcmaster/sylla.py b/mcmaster/sylla.py deleted file mode 100755 index 6347e70..0000000 --- a/mcmaster/sylla.py +++ /dev/null @@ -1,117 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from itertools import chain, islice, izip_longest, izip as zip -from re import search, sub -from functools import total_ordering -from re import sub - -import datetime as dt -import lxml.html as lxh -import requests - -# Purpose of this module is to download and parse syllabi from various departments -# In order to be corellated with individual courses - -class Price(object): - def __init__(self, amnt, status): - self.dollars = float(amnt[1:]) - self.status = status - - def __repr__(self): - return "$%s %s" % (repr(self.dollars), self.status) - - -class Book(object): - def __init__(self, title, price): - self.title = title - self.price = price - - def __repr__(self): - return '["%s", "%s"]' % (self.title, repr(self.price)) - - -def grouper(n, iterable, fillvalue=None): - "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" - args = [iter(iterable)] * n - return izip_longest(fillvalue=fillvalue, *args) - -searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1" - -def normalize(word): - if len(word) > 1: - return ("%s%s" % - (word[0].upper(), - "".join(word[1:]).lower())) - return word - -def parseAuthor(author): - split = author.split(" ") - if len(split) <= 1: - return author - lastname = split[0] - firstname = split[1] - return "%s %s" % (firstname, lastname) - -def normwords(phrase): - words = phrase.split(" ") - return " ".join(map(normalize, words)) - -def books(dept, code, withPrices): - """ - Snatch me up a book title or three - """ - req = searchUrl % (dept, code) - - html = requests.get(req).text - - parsed = lxh.fromstring(html) - - pricelist = prices(parsed) - - for div in parsed.xpath(".//div"): - if (div.attrib.has_key("id") and - "prodDesc" in div.attrib["id"]): - - textbook = div.text_content() - author = sub(r',', '', - "".join( - (div.getparent() - .xpath(".//span[@class='inline']") - [0].text_content() - .split(":")[1:])).strip()) - price = pricelist.pop() - if withPrices: - yield (normwords(textbook), normwords(author), repr(price)) - else: - yield (normwords(textbook), normwords(author)) - -def prices(html): - """ - Get the prices from a search result page - """ - ps = [ - p.getparent().text_content().split()[0] - for p in html.xpath("//p/input[@type='checkbox']") - ] - - try: - amts, stats = zip(*list(reversed(list(grouper(2, ps))))) - return map(Price, amts, stats) - except ValueError: - return [] - -def textbookInfo(dept, code, withPrices=False): - """ - Return all the textbooks for a course - """ - return list(books(dept, code, withPrices)) - -def humanities(): - """ - Download humanities syllabi - """ - return [] - -# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code) -# print list(courseInfo("PSYCH", "2B03")) diff --git a/openlibrary.py b/openlibrary.py deleted file mode 100755 index d558c21..0000000 --- a/openlibrary.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python2 - -from urllib import quote -from json import loads, dumps - -import requests as req - -#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s" -searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s' - -def bookUrls(title, author): - print title, author - if ":" in title: - title = title.split(":")[0] - requrl = searchurl % (quote(author), quote(title)) - results = loads(req.get(requrl).text) - for result in results["docs"][0:2]: - if result.has_key("edition_key"): - yield "https://openlibrary.org/books/%s" % result["edition_key"][0] - -# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle' - -#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"): - #print book diff --git a/predictions.py b/predictions.py deleted file mode 100755 index b770a0b..0000000 --- a/predictions.py +++ /dev/null @@ -1,153 +0,0 @@ -##! /usr/bin/python2 -from itertools import groupby, chain -from sys import stdout -from functools import partial -from json import dumps - -def gensymer(): - n = [0] - def inner(): - result = str(n[0]) - n[0] += 1 - return result - return inner - -gensym = gensymer() - -def printTrie(graph, prev, trie, weight): - new_node = str(gensym()) - graph.node(new_node, "%s" % trie.letter) - graph.edge(prev, new_node, label="%.2f" % weight) - if not trie.children: - return - for child, weight in zip(trie.children, trie.ws): - printTrie(graph, new_node, child, weight) - - -class Trie(object): - def __init__(self, letter, children, ws): - self.letter = letter - self.children = children - self.ws = ws - -def probweight(suffixes): - weights = [float(s["value"]) for s in suffixes] - s = float(sum(weights)) - ws = [w/s for w in weights] - return ws - -def buildtrie(trie, suffixes): - """ - Build a trie, also known as a prefix tree, of all the possible completions - """ - trie.children = [] - for letter, suffs in suffixes: - ped = partition(suffs) - if any(map(lambda p: p[0], ped)): - # check if there are any children - trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs))) - else: - # we've reached the end of this word so just include the final letter - # [1] = there is a probability of 1 of reaching this single leaf node, - # since it is the only possible completion here - trie.children.append(Trie(letter, [], [1])) - return trie - - -def keyf(x): - if not x["key"]: - return "" - return x["key"][0] - -def tails(words): - for word in words: - yield { - "key" : word["key"][1:], - "value" : word["value"] - } - -def partition(words): - """ - Partition the words into different prefixes based on the first character - """ - groups = [ - (g[0], list(tails(g[1]))) - for g in groupby( - sorted(words, key=keyf), - key=keyf) - ] - return groups - - -def flatten_helper(letter, trie): - return ([letter + child.letter for - child in trie.children], trie.children) - -def flatten(trie): - if not trie.children: - return trie.letter - prefixes, suffixes = flatten_helper(trie.letter, trie) - return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)] - -def flattenlist(xs): - locs = [] - for x in xs: - if not isinstance(x, list): - locs.append(x) - else: - locs.extend(flattenlist(x)) - return locs - -def matchc(trie, prefix): - c = None - if len(prefix) > 1: - c = prefix[0] - else: - c = prefix - return [ch for ch in trie.children if ch.letter == c] - -def match(trie, word): - if not word: - return [] - m = matchc(trie, word[0]) - if not m: - return [] - else: - return [m[0]] + match(m[0], word[1:]) - -def complete(trie, word): - m = match(trie, word) - if len(word) != len(m): - return False - completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))] - if len(completions) > 10: - return dumps(completions[0:10]) - return dumps(completions) - -def sortTrie(trie): - """ - Sort the children of each node in descending order - of the probability that each child would be the completion - of whatever that word is - """ - if not trie.children: - return - sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True) - trie.children = [x[0] for x in sortedChilds] - trie.ws = [x[1] for x in sortedChilds] - for child in trie.children: - sortTrie(child) - -def toTrie(words): - for word in words: - word["key"] = word["key"].lower() - trie = buildtrie(Trie("", [], [1]), partition(words)) - trie.ws = [1]*len(trie.children) - sortTrie(trie) - return trie - -def testkey(w): - return { - "key" : w, - "value" : "1" - } diff --git a/search.py b/search.py deleted file mode 100755 index 777222f..0000000 --- a/search.py +++ /dev/null @@ -1,237 +0,0 @@ -#! /usr/bin/python2 - -import elasticsearch - -from elasticsearch_dsl import FacetedSearch, Search, Q -from elasticsearch_dsl.aggs import Terms, DateHistogram -from sys import exit, stderr -from json import dumps, loads -from itertools import chain, imap - -from hashlib import sha1 - -from textbookExceptions import UnIndexable - -from mcmaster.classes import allCourses - -# Generic instance of elasticsearch right now -es = elasticsearch.Elasticsearch() - -def summarize(text): - splitted = text.split(" ") - if len(splitted) > 4: - return " ".join(splitted[0:4]) + ".." - return text - -def sectionToJSON(section): - return { - "prof" : section.prof, - "sem" : section.sem, - "day" : section.day - } - -def classToJSON(clss): - return { - "title" : clss.title, - "sections" : map(sectionToJSON, clss.sections), - "dept" : clss.dept, - "code" : clss.code, - "books" : list(clss.books) if clss.books else [] - } - - -def truncate(docid): - """ - Truncate a document id to 12 digits - The document ID should be based on a - hash of unique identifiers - """ - return int(str(docid)[0:12]) - -def hashsec(course): - """ - Hash a course into a usable id - """ - if not course["code"]: - code = "" - else: - code = course["code"] - if not course["title"]: - title = "" - else: - title = course["title"] - - if not course["sections"] or len(course["sections"]) < 1: - course["sections"][0] = "" - - if not (code or title): - raise UnIndexable(course) - - h = sha1() - h.update(code + title + course["sections"][0]["sem"]) - return int(h.hexdigest(), 16) - -def createIndex(name): - """ - This creates a new index in elasticsearch - An index is like a schema in a regular database - Create an elasticsearch index - - """ - indices = elasticsearch.client.IndicesClient(es) - - print indices.create(name) - with open("./course.json", "r") as mapping: - print indices.put_mapping("course", loads(mapping.read()), name) - -def indexListing(course): - """ - Index a specific course in the database (using the courses index) - example, - { - 'books': [], - 'dept': 'COLLAB', - 'code': '2C03', - 'sections': [ - { - 'prof': 'Lisa Pender', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Mo' - }, - { - 'prof': 'Staff', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Th' - } - ], - 'title': 'COLLAB 2C03 - Sociology I' - } - - """ - courseID = hashsec(course) - print es.index(index="oersearch", - doc_type="course", - id=courseID, - body=course) - - # For every course we index, we also create a resource for it - # This should be an idempotent operation because we're putting it in couchdb - # And we're using the id obtained from the hash function, so it should just update the document - # no need to delete anything - #try: - #courseDept = course[0]["title"].strip().split(" ")[0].strip() - #courseCode = course[0]["title"].strip().split(" ")[1].strip() - #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode) - #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID) - #except: - #print "Couldn't create the resource associated with %s" % course - -def termSearch(field): - """ - Make a term search (exact match) - """ - def t(term): - q = Q("term", - **{ - "sections."+field : term - }) - return q - return t - -def search(field): - """ - Make a match search - """ - def s(term): - q = Q("match", - **{ - field : term - }) - return q - return s - -def join(x, y): - """ - Join two queries - """ - return x & y - -def filterSections(secs): - """ - Get rid of tutorial sections - because they almost always have "Staff" as the instructor - This is just a heuristic of course - """ - filtered = [s for s in secs.sections if "Staff" not in s.prof] - if len(filtered) > 0: - return filtered - return False - -def searchTerms(terms): - """ - Run a search for courses - """ - - # A list of all the queries we want to run - qs = [searchers[field](term) for - field, term in - terms.iteritems() if - term and searchers.has_key(field)] - - if not qs: - # No queries = no results - return dumps([]) - - # Reduce joins all of the queries into one query - # It will search for the conjunction of all of them - # So that means it cares about each query equally - q = reduce(join, qs) - - s = (Search(using=es, index="oersearch") - .query(q))[0:100] # only return up to 100 results for now - - results = s.execute() - - filtered = [ - (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials - for secs in results - if filterSections(secs) - ] - results = [] - for obj, secs in filtered: - # Add the truncated course id - # This is used to point to the resource page for that course - secs["id"] = truncate(obj.meta.id) - secs["title"] = obj.title - if obj["dept"] not in secs["title"]: - secs["dept"] = obj.dept - if obj.books: - secs["books"] = [ - { - "booktitle" : summarize(book[0].encode("ASCII")), - "bookauthor" : book[1].encode("ASCII"), - "bookprice" : book[2].encode("ASCII") - } - for book in obj.books - ] - else: - secs["books"] = "" - results.append(secs) - - return dumps(results) - - -searchers = { - "title" : search("title"), - "loc" : search("loc"), - "time" : search("time"), - "prof" : search("prof"), - "day" : search("day"), - } - -#print searchTerms({"title" : "PHILOS"}) - -#for c in imap(classToJSON, allCourses()): - #try: - #print indexListing(c) - #except UnIndexable as e: diff --git a/src/archive.py b/src/archive.py deleted file mode 100755 index 73fcde7..0000000 --- a/src/archive.py +++ /dev/null @@ -1,34 +0,0 @@ -#! /usr/bin/python2 - -from urllib import quote -from json import loads, dumps - -import requests as req - -searchUrl = "https://archive.org/advancedsearch.php?q={0}&fl%5B%5D=avg_rating&fl%5B%5D=description&fl%5B%5D=identifier&fl%5B%5D=type&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&page=1&output=json&callback=callback&save=yes#raw" - -def searchIA(title, author): - """ - Do a search on The Internet Archive for a book - """ - print "running a search" - requrl = searchUrl.format(quote(title + " " + author)) - try: - results = loads(req.get(requrl).text[9:][0:-1]) - except ValueError: - return [] - - rownum = results["responseHeader"]["params"]["rows"] - if rownum < 1: - print "Couldn't find results for %s %s" % (title, author) - return [] - docs = results["response"]["docs"] - urls = [] - for result in results["response"]["docs"][0:3]: - urls.append("https://archive.org/details/%s" % result["identifier"]) - return urls - - -# Example, search for David Hume's Enquiry Concerning Human Understanding -#for url in searchIA("Hume", "Enquiry Concerning Human Understanding"): - #print url diff --git a/src/database.py b/src/database.py deleted file mode 100755 index a19272c..0000000 --- a/src/database.py +++ /dev/null @@ -1,62 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from hashlib import sha1 - -def truncate(docid): - """ - Truncate a document id to 12 digits - The document ID should be based on a - hash of unique identifiers - """ - return int(str(docid)[0:12]) - -def createResource(textbookInfo, course, dept, coursecode, docid): - """ - Create a document associated with a course - This document contains any/all resources associated - with that course - - example, - { - 'books': [], - 'dept': 'COLLAB', - 'code': '2C03', - 'sections': [ - { - 'prof': 'Lisa Pender', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Mo' - }, - { - 'prof': 'Staff', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Th' - } - ], - 'title': 'COLLAB 2C03 - Sociology I' - } - """ - textbooks = textbookInfo(dept.strip(), coursecode.strip()) - - # We truncate the id so we can have nicer looking URLs - # Since the id will be used to point to the resource page for that course - _id = str(truncate(docid)) - - fields = { - "_id" : _id, - "textbooks" : textbooks, - "coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()), - "courseinfo" : course - #"Syllabus" : "blah" - } - try: - revisions = list(localdb.revisions(_id)) - if not revisions: - return localdb.save(fields) - else: - rev = dict(revisions[0])["_rev"] - fields["_rev"] = rev - return localdb.save(fields) - except ResourceConflict: - print "Resource for %s already exists, not creating a new one" % (docid) diff --git a/src/goasearch.py b/src/goasearch.py deleted file mode 100755 index 3dca7eb..0000000 --- a/src/goasearch.py +++ /dev/null @@ -1,14 +0,0 @@ -#! /usr/bin/python2 - -# predictive data -# switch to elasticsearch's prediction - - - -import database -import predictions - -class GOASearch(object): - def __init__(self): - return self - diff --git a/src/openlibrary.py b/src/openlibrary.py deleted file mode 100755 index d558c21..0000000 --- a/src/openlibrary.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python2 - -from urllib import quote -from json import loads, dumps - -import requests as req - -#query = "https://openlibrary.org/query.json?type=/type/edition&title=%s&author=%s" -searchurl = 'http://openlibrary.org/search.json?author=%s&title=%s' - -def bookUrls(title, author): - print title, author - if ":" in title: - title = title.split(":")[0] - requrl = searchurl % (quote(author), quote(title)) - results = loads(req.get(requrl).text) - for result in results["docs"][0:2]: - if result.has_key("edition_key"): - yield "https://openlibrary.org/books/%s" % result["edition_key"][0] - -# 'http://openlibrary.org/query.json?type=/type/edition&title=The+Personality+Puzzle' - -#for book in bookUrls("Philosophy Of Physics", "Tim Maudlin"): - #print book diff --git a/src/predictions.py b/src/predictions.py deleted file mode 100755 index b770a0b..0000000 --- a/src/predictions.py +++ /dev/null @@ -1,153 +0,0 @@ -##! /usr/bin/python2 -from itertools import groupby, chain -from sys import stdout -from functools import partial -from json import dumps - -def gensymer(): - n = [0] - def inner(): - result = str(n[0]) - n[0] += 1 - return result - return inner - -gensym = gensymer() - -def printTrie(graph, prev, trie, weight): - new_node = str(gensym()) - graph.node(new_node, "%s" % trie.letter) - graph.edge(prev, new_node, label="%.2f" % weight) - if not trie.children: - return - for child, weight in zip(trie.children, trie.ws): - printTrie(graph, new_node, child, weight) - - -class Trie(object): - def __init__(self, letter, children, ws): - self.letter = letter - self.children = children - self.ws = ws - -def probweight(suffixes): - weights = [float(s["value"]) for s in suffixes] - s = float(sum(weights)) - ws = [w/s for w in weights] - return ws - -def buildtrie(trie, suffixes): - """ - Build a trie, also known as a prefix tree, of all the possible completions - """ - trie.children = [] - for letter, suffs in suffixes: - ped = partition(suffs) - if any(map(lambda p: p[0], ped)): - # check if there are any children - trie.children.append(buildtrie(Trie(letter, [], probweight(suffs)), partition(suffs))) - else: - # we've reached the end of this word so just include the final letter - # [1] = there is a probability of 1 of reaching this single leaf node, - # since it is the only possible completion here - trie.children.append(Trie(letter, [], [1])) - return trie - - -def keyf(x): - if not x["key"]: - return "" - return x["key"][0] - -def tails(words): - for word in words: - yield { - "key" : word["key"][1:], - "value" : word["value"] - } - -def partition(words): - """ - Partition the words into different prefixes based on the first character - """ - groups = [ - (g[0], list(tails(g[1]))) - for g in groupby( - sorted(words, key=keyf), - key=keyf) - ] - return groups - - -def flatten_helper(letter, trie): - return ([letter + child.letter for - child in trie.children], trie.children) - -def flatten(trie): - if not trie.children: - return trie.letter - prefixes, suffixes = flatten_helper(trie.letter, trie) - return [flatten(Trie(p, s2.children, s2.ws)) for p, s2 in zip(prefixes, suffixes)] - -def flattenlist(xs): - locs = [] - for x in xs: - if not isinstance(x, list): - locs.append(x) - else: - locs.extend(flattenlist(x)) - return locs - -def matchc(trie, prefix): - c = None - if len(prefix) > 1: - c = prefix[0] - else: - c = prefix - return [ch for ch in trie.children if ch.letter == c] - -def match(trie, word): - if not word: - return [] - m = matchc(trie, word[0]) - if not m: - return [] - else: - return [m[0]] + match(m[0], word[1:]) - -def complete(trie, word): - m = match(trie, word) - if len(word) != len(m): - return False - completions = [word+x[1:] for x in flattenlist(flatten(m[-1]))] - if len(completions) > 10: - return dumps(completions[0:10]) - return dumps(completions) - -def sortTrie(trie): - """ - Sort the children of each node in descending order - of the probability that each child would be the completion - of whatever that word is - """ - if not trie.children: - return - sortedChilds = sorted(zip(trie.children, trie.ws), key=lambda x: x[1], reverse=True) - trie.children = [x[0] for x in sortedChilds] - trie.ws = [x[1] for x in sortedChilds] - for child in trie.children: - sortTrie(child) - -def toTrie(words): - for word in words: - word["key"] = word["key"].lower() - trie = buildtrie(Trie("", [], [1]), partition(words)) - trie.ws = [1]*len(trie.children) - sortTrie(trie) - return trie - -def testkey(w): - return { - "key" : w, - "value" : "1" - } diff --git a/src/search.py b/src/search.py deleted file mode 100755 index 777222f..0000000 --- a/src/search.py +++ /dev/null @@ -1,237 +0,0 @@ -#! /usr/bin/python2 - -import elasticsearch - -from elasticsearch_dsl import FacetedSearch, Search, Q -from elasticsearch_dsl.aggs import Terms, DateHistogram -from sys import exit, stderr -from json import dumps, loads -from itertools import chain, imap - -from hashlib import sha1 - -from textbookExceptions import UnIndexable - -from mcmaster.classes import allCourses - -# Generic instance of elasticsearch right now -es = elasticsearch.Elasticsearch() - -def summarize(text): - splitted = text.split(" ") - if len(splitted) > 4: - return " ".join(splitted[0:4]) + ".." - return text - -def sectionToJSON(section): - return { - "prof" : section.prof, - "sem" : section.sem, - "day" : section.day - } - -def classToJSON(clss): - return { - "title" : clss.title, - "sections" : map(sectionToJSON, clss.sections), - "dept" : clss.dept, - "code" : clss.code, - "books" : list(clss.books) if clss.books else [] - } - - -def truncate(docid): - """ - Truncate a document id to 12 digits - The document ID should be based on a - hash of unique identifiers - """ - return int(str(docid)[0:12]) - -def hashsec(course): - """ - Hash a course into a usable id - """ - if not course["code"]: - code = "" - else: - code = course["code"] - if not course["title"]: - title = "" - else: - title = course["title"] - - if not course["sections"] or len(course["sections"]) < 1: - course["sections"][0] = "" - - if not (code or title): - raise UnIndexable(course) - - h = sha1() - h.update(code + title + course["sections"][0]["sem"]) - return int(h.hexdigest(), 16) - -def createIndex(name): - """ - This creates a new index in elasticsearch - An index is like a schema in a regular database - Create an elasticsearch index - - """ - indices = elasticsearch.client.IndicesClient(es) - - print indices.create(name) - with open("./course.json", "r") as mapping: - print indices.put_mapping("course", loads(mapping.read()), name) - -def indexListing(course): - """ - Index a specific course in the database (using the courses index) - example, - { - 'books': [], - 'dept': 'COLLAB', - 'code': '2C03', - 'sections': [ - { - 'prof': 'Lisa Pender', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Mo' - }, - { - 'prof': 'Staff', - 'sem': '2015/09/08 - 2015/12/08', - 'day': 'Th' - } - ], - 'title': 'COLLAB 2C03 - Sociology I' - } - - """ - courseID = hashsec(course) - print es.index(index="oersearch", - doc_type="course", - id=courseID, - body=course) - - # For every course we index, we also create a resource for it - # This should be an idempotent operation because we're putting it in couchdb - # And we're using the id obtained from the hash function, so it should just update the document - # no need to delete anything - #try: - #courseDept = course[0]["title"].strip().split(" ")[0].strip() - #courseCode = course[0]["title"].strip().split(" ")[1].strip() - #print "DEPARTMENT = \"%s\", COURSECODE = \"%s\"" % (courseDept, courseCode) - #print createResource(textbookInfo, course[0], courseDept, courseCode, courseID) - #except: - #print "Couldn't create the resource associated with %s" % course - -def termSearch(field): - """ - Make a term search (exact match) - """ - def t(term): - q = Q("term", - **{ - "sections."+field : term - }) - return q - return t - -def search(field): - """ - Make a match search - """ - def s(term): - q = Q("match", - **{ - field : term - }) - return q - return s - -def join(x, y): - """ - Join two queries - """ - return x & y - -def filterSections(secs): - """ - Get rid of tutorial sections - because they almost always have "Staff" as the instructor - This is just a heuristic of course - """ - filtered = [s for s in secs.sections if "Staff" not in s.prof] - if len(filtered) > 0: - return filtered - return False - -def searchTerms(terms): - """ - Run a search for courses - """ - - # A list of all the queries we want to run - qs = [searchers[field](term) for - field, term in - terms.iteritems() if - term and searchers.has_key(field)] - - if not qs: - # No queries = no results - return dumps([]) - - # Reduce joins all of the queries into one query - # It will search for the conjunction of all of them - # So that means it cares about each query equally - q = reduce(join, qs) - - s = (Search(using=es, index="oersearch") - .query(q))[0:100] # only return up to 100 results for now - - results = s.execute() - - filtered = [ - (secs, filterSections(secs)[0].to_dict()) # get rid of tutorials - for secs in results - if filterSections(secs) - ] - results = [] - for obj, secs in filtered: - # Add the truncated course id - # This is used to point to the resource page for that course - secs["id"] = truncate(obj.meta.id) - secs["title"] = obj.title - if obj["dept"] not in secs["title"]: - secs["dept"] = obj.dept - if obj.books: - secs["books"] = [ - { - "booktitle" : summarize(book[0].encode("ASCII")), - "bookauthor" : book[1].encode("ASCII"), - "bookprice" : book[2].encode("ASCII") - } - for book in obj.books - ] - else: - secs["books"] = "" - results.append(secs) - - return dumps(results) - - -searchers = { - "title" : search("title"), - "loc" : search("loc"), - "time" : search("time"), - "prof" : search("prof"), - "day" : search("day"), - } - -#print searchTerms({"title" : "PHILOS"}) - -#for c in imap(classToJSON, allCourses()): - #try: - #print indexListing(c) - #except UnIndexable as e: diff --git a/src/textbookExceptions.py b/src/textbookExceptions.py deleted file mode 100644 index 999ff3e..0000000 --- a/src/textbookExceptions.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python2 - -class UnIndexable(Exception): - def __init__(self, course): - self.course = course - - @property - def reason(self): - course = self.course - if not course["code"] and not course["title"]: - message = "there was no course code and no title defined" - if not course["code"]: - message = "there was no course code defined" - if not course["title"]: - message = "there was no course title defined" - if not course["sections"]: - message = "there were no sections defined" - return """ - There was a problem with indexing this course. - %s - There could be several reasons why, my best guess is that %s - We need at least the course code, title, and one or more sections to index - - """ % (course, message) diff --git a/src/visualize.py b/src/visualize.py deleted file mode 100755 index b46a67d..0000000 --- a/src/visualize.py +++ /dev/null @@ -1,97 +0,0 @@ -#! /usr/bin/python2 - -from json import loads, load -from re import sub, split -from itertools import groupby -from numpy import mean -from operator import attrgetter - -import pygal -import csv - -class Textbook(object): - def __init__(self, dept, code, title, author, price): - self.dept = dept - self.code = code - self.title = title - self.author = author - self.price = float(price) - - def __repr__(self): - return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept, - self.code, - self.title, - self.author, - self.price) - - -def courses(): - with open("./books.csv", "r") as books: - booksreader = csv.reader(books) - for row in booksreader: - yield row - - -def groupDept(courselist): - sortedCourses = sorted(courselist, key=attrgetter("dept")) - for course in groupby(sortedCourses, attrgetter("dept")): - yield course[0], list(course[1]) - -def meanPrice(books): - return mean([book.price for book in books]) - -# Questions, -# mean cost per department -# mean cost per faculty -# mean difference between book store copies and other copies per dept and faculty -# number of overlapping books per faculty, do eng students benefit from that? - -# maybe a survey for students to see how often they buy books from other sources -# correlate with how much they could be saving? - -facultyDesc = { - "hum" : "Humanities", - "bus" : "Business", - "hlth" : "Health Science", - "eng" : "Engineering", - "sci" : "Science", - "socsci" : "Social Sciences", - "artsci" : "Arts & Sciences", - "meld" : "MELD" -} - -faculties = load(open("./faculties.json")) - -def categorize(dept): - # faculties - return facultyDesc.get(faculties.get(dept, False), False) - -def byFaculty(): - for dept, books in groupDept(courses()): - yield (categorize(dept), dept, books) - -def meanFacultyCosts(): - byfac = list(byFaculty()) - graph = pygal.Bar() - graph.title = "Mean textbook cost by faculty" - sortedFacs = sorted(byfac, key=lambda x: x[0]) - for fac in groupby(sortedFacs, lambda x: x[0]): - graph.add(fac[0], meanPrice(list(fac[1])[0][2])) - graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" - return graph.render(transpose=True) - -def meanCosts(): - cs = groupDept(courses()) - graph = pygal.Bar() - graph.title = "Mean textbook cost by department" - for c in cs: - dept, books = c - graph.add(dept, meanPrice(books)) - #graph.render_to_file("./test_graph.svg") - graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" - return graph.render_table(style=True, transpose=True) - -for x in courses(): - print x -#print meanCosts() -#print meanFacultyCosts() diff --git a/src/website.py b/src/website.py deleted file mode 100755 index 1fc9374..0000000 --- a/src/website.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/python2 -from functools import partial -from couchdb import ResourceConflict - -from flask import Flask, render_template, flash, request, send_from_directory -from flask_bootstrap import Bootstrap -from flask_appconfig import AppConfig -from urllib import unquote -from search import searchTerms - -from openlibrary import bookUrls - -from archive import searchIA -from urllib import quote, unquote -from json import dumps, loads - -from werkzeug.contrib.cache import MemcachedCache -cache = MemcachedCache(['127.0.0.1:11211']) - -import os - -def predict(fieldtype, term): - print fieldtype - print term - if not term: - return "[]" - else: - try: - cs = completers[fieldtype](term.lower()) - except KeyError: - return "[]" - if cs: - return cs - return "[]" - -def predictor(fieldtype): - def inner(request): - params = dict(request.args.items()) - return predict(fieldtype, params["term"]) - return inner - -def cacheit(key, thunk): - """ - Tries to find a cached version of ``key'' - If there is no cached version then it will - evaluate thunk (which must be a generator) - and cache that, then return the result - """ - cached = cache.get(quote(key)) - if cached is None: - result = list(thunk()) - cache.set(quote(key), result) - return result - return cached - -def ClassSearch(configfile=None): - defaults = {"Day", "Building", "Exact Location", "Department"} - app = Flask(__name__) - AppConfig(app, configfile) # Flask-Appconfig is not necessary, but - # highly recommend =) - # https://github.com/mbr/flask-appconfig - Bootstrap(app) - - app.config["scripts"] = "/home/wes/MGOAL/scripts" - app.config["styles"] = "/home/wes/MGOAL/styles" - - @app.route('/favicon.ico') - def favicon(): - return send_from_directory("/srv/http/goal/favicon.ico", - 'favicon.ico', mimetype='image/vnd.microsoft.icon') - - - @app.route("/buildpred", methods=("GET", "POST")) - def buildpred(): - return predictbuild(request) - - @app.route("/locpred", methods=("GET", "POST")) - def locpred(): - return predictloc(request) - - @app.route("/daypred", methods=("GET", "POST")) - def daypred(): - return predictday(request) - - @app.route("/deptpred", methods=("GET", "POST")) - def deptpred(): - return predictdept(request) - - @app.route("/titlepred", methods=("GET", "POST")) - def titlepred(): - return predicttitle(request) - - @app.route("/", methods=("GET", "POST")) - def index(): - return render_template("search.html") - - @app.route("/fc", methods=("GET", "POST")) - def fc(): - """ Filter Courses """ - print "trying to get courses" - params = dict(request.args.items()) - for key, val in params.iteritems(): - if val in defaults: - del params[key] - results = searchTerms(params) - return results - - @app.route("/resources", methods=("GET", "POST")) - def resources(): - """ Get Resources """ - notRequired = False - params = loads(dict(request.args.items())["data"]) - print params - author = params["author"] - title = params["title"] - - if ("No Textbooks" in title or - "No Adoption" in title): - return dumps("false") - - # Cache the result of the open library search - openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author)) - print openlib - - # cache the result of an internet archive search - iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author)) - print iarchive - - if not (any(openlib) or any(iarchive)): - # We literally could not find ANYTHING - return dumps("false") - - return dumps({ - "iarchive" : iarchive, - "openlib" : openlib - }) - - @app.route("/scripts/") - def send_script(filename): - return send_from_directory(app.config["scripts"], filename) - - @app.route("/styles/") - def send_style(filename): - return send_from_directory(app.config["styles"], filename) - return app - -if __name__ == "__main__": - ClassSearch().run(port=8001, debug=True) diff --git a/textbookExceptions.py b/textbookExceptions.py deleted file mode 100644 index 999ff3e..0000000 --- a/textbookExceptions.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/python2 - -class UnIndexable(Exception): - def __init__(self, course): - self.course = course - - @property - def reason(self): - course = self.course - if not course["code"] and not course["title"]: - message = "there was no course code and no title defined" - if not course["code"]: - message = "there was no course code defined" - if not course["title"]: - message = "there was no course title defined" - if not course["sections"]: - message = "there were no sections defined" - return """ - There was a problem with indexing this course. - %s - There could be several reasons why, my best guess is that %s - We need at least the course code, title, and one or more sections to index - - """ % (course, message) diff --git a/visualize.py b/visualize.py deleted file mode 100755 index b46a67d..0000000 --- a/visualize.py +++ /dev/null @@ -1,97 +0,0 @@ -#! /usr/bin/python2 - -from json import loads, load -from re import sub, split -from itertools import groupby -from numpy import mean -from operator import attrgetter - -import pygal -import csv - -class Textbook(object): - def __init__(self, dept, code, title, author, price): - self.dept = dept - self.code = code - self.title = title - self.author = author - self.price = float(price) - - def __repr__(self): - return "Dept = %s, Code = %s, %s by %s, costs $%s" % (self.dept, - self.code, - self.title, - self.author, - self.price) - - -def courses(): - with open("./books.csv", "r") as books: - booksreader = csv.reader(books) - for row in booksreader: - yield row - - -def groupDept(courselist): - sortedCourses = sorted(courselist, key=attrgetter("dept")) - for course in groupby(sortedCourses, attrgetter("dept")): - yield course[0], list(course[1]) - -def meanPrice(books): - return mean([book.price for book in books]) - -# Questions, -# mean cost per department -# mean cost per faculty -# mean difference between book store copies and other copies per dept and faculty -# number of overlapping books per faculty, do eng students benefit from that? - -# maybe a survey for students to see how often they buy books from other sources -# correlate with how much they could be saving? - -facultyDesc = { - "hum" : "Humanities", - "bus" : "Business", - "hlth" : "Health Science", - "eng" : "Engineering", - "sci" : "Science", - "socsci" : "Social Sciences", - "artsci" : "Arts & Sciences", - "meld" : "MELD" -} - -faculties = load(open("./faculties.json")) - -def categorize(dept): - # faculties - return facultyDesc.get(faculties.get(dept, False), False) - -def byFaculty(): - for dept, books in groupDept(courses()): - yield (categorize(dept), dept, books) - -def meanFacultyCosts(): - byfac = list(byFaculty()) - graph = pygal.Bar() - graph.title = "Mean textbook cost by faculty" - sortedFacs = sorted(byfac, key=lambda x: x[0]) - for fac in groupby(sortedFacs, lambda x: x[0]): - graph.add(fac[0], meanPrice(list(fac[1])[0][2])) - graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" - return graph.render(transpose=True) - -def meanCosts(): - cs = groupDept(courses()) - graph = pygal.Bar() - graph.title = "Mean textbook cost by department" - for c in cs: - dept, books = c - graph.add(dept, meanPrice(books)) - #graph.render_to_file("./test_graph.svg") - graph.value_formatter = lambda x: '$%.2f' % x if x is not None else "None" - return graph.render_table(style=True, transpose=True) - -for x in courses(): - print x -#print meanCosts() -#print meanFacultyCosts() diff --git a/website.py b/website.py deleted file mode 100755 index 1fc9374..0000000 --- a/website.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/python2 -from functools import partial -from couchdb import ResourceConflict - -from flask import Flask, render_template, flash, request, send_from_directory -from flask_bootstrap import Bootstrap -from flask_appconfig import AppConfig -from urllib import unquote -from search import searchTerms - -from openlibrary import bookUrls - -from archive import searchIA -from urllib import quote, unquote -from json import dumps, loads - -from werkzeug.contrib.cache import MemcachedCache -cache = MemcachedCache(['127.0.0.1:11211']) - -import os - -def predict(fieldtype, term): - print fieldtype - print term - if not term: - return "[]" - else: - try: - cs = completers[fieldtype](term.lower()) - except KeyError: - return "[]" - if cs: - return cs - return "[]" - -def predictor(fieldtype): - def inner(request): - params = dict(request.args.items()) - return predict(fieldtype, params["term"]) - return inner - -def cacheit(key, thunk): - """ - Tries to find a cached version of ``key'' - If there is no cached version then it will - evaluate thunk (which must be a generator) - and cache that, then return the result - """ - cached = cache.get(quote(key)) - if cached is None: - result = list(thunk()) - cache.set(quote(key), result) - return result - return cached - -def ClassSearch(configfile=None): - defaults = {"Day", "Building", "Exact Location", "Department"} - app = Flask(__name__) - AppConfig(app, configfile) # Flask-Appconfig is not necessary, but - # highly recommend =) - # https://github.com/mbr/flask-appconfig - Bootstrap(app) - - app.config["scripts"] = "/home/wes/MGOAL/scripts" - app.config["styles"] = "/home/wes/MGOAL/styles" - - @app.route('/favicon.ico') - def favicon(): - return send_from_directory("/srv/http/goal/favicon.ico", - 'favicon.ico', mimetype='image/vnd.microsoft.icon') - - - @app.route("/buildpred", methods=("GET", "POST")) - def buildpred(): - return predictbuild(request) - - @app.route("/locpred", methods=("GET", "POST")) - def locpred(): - return predictloc(request) - - @app.route("/daypred", methods=("GET", "POST")) - def daypred(): - return predictday(request) - - @app.route("/deptpred", methods=("GET", "POST")) - def deptpred(): - return predictdept(request) - - @app.route("/titlepred", methods=("GET", "POST")) - def titlepred(): - return predicttitle(request) - - @app.route("/", methods=("GET", "POST")) - def index(): - return render_template("search.html") - - @app.route("/fc", methods=("GET", "POST")) - def fc(): - """ Filter Courses """ - print "trying to get courses" - params = dict(request.args.items()) - for key, val in params.iteritems(): - if val in defaults: - del params[key] - results = searchTerms(params) - return results - - @app.route("/resources", methods=("GET", "POST")) - def resources(): - """ Get Resources """ - notRequired = False - params = loads(dict(request.args.items())["data"]) - print params - author = params["author"] - title = params["title"] - - if ("No Textbooks" in title or - "No Adoption" in title): - return dumps("false") - - # Cache the result of the open library search - openlib = cacheit("openlib"+title+author, lambda : bookUrls(title, author)) - print openlib - - # cache the result of an internet archive search - iarchive = cacheit("iarchive"+title+author, lambda : searchIA(title, author)) - print iarchive - - if not (any(openlib) or any(iarchive)): - # We literally could not find ANYTHING - return dumps("false") - - return dumps({ - "iarchive" : iarchive, - "openlib" : openlib - }) - - @app.route("/scripts/") - def send_script(filename): - return send_from_directory(app.config["scripts"], filename) - - @app.route("/styles/") - def send_style(filename): - return send_from_directory(app.config["styles"], filename) - return app - -if __name__ == "__main__": - ClassSearch().run(port=8001, debug=True)