From bca93d226c1f48ec88fc8fff6b3582a95a21d439 Mon Sep 17 00:00:00 2001 From: wes Date: Mon, 20 Jun 2016 04:31:01 -0400 Subject: [PATCH] move mcmaster directory --- src/__init__.py | 0 src/mcmaster/__init__.py | 0 src/mcmaster/classes.py | 349 +++++++++++++++++++++++++++++++++++++++ src/mcmaster/site.py | 9 + src/mcmaster/sylla.py | 117 +++++++++++++ 5 files changed, 475 insertions(+) create mode 100755 src/__init__.py create mode 100644 src/mcmaster/__init__.py create mode 100755 src/mcmaster/classes.py create mode 100644 src/mcmaster/site.py create mode 100755 src/mcmaster/sylla.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/mcmaster/__init__.py b/src/mcmaster/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/mcmaster/classes.py b/src/mcmaster/classes.py new file mode 100755 index 0000000..54687df --- /dev/null +++ b/src/mcmaster/classes.py @@ -0,0 +1,349 @@ +#! /usr/bin/python2 + +from sys import argv +from itertools import chain, islice, izip as zip +from re import search, sub +from functools import total_ordering + +from sylla import textbookInfo +from collections import MutableMapping + +import datetime as dt +import lxml.html as lxh +import requests +import sys +import copy + +fall = "2159" +spring_summer = "2165" +winter = "2161" + +# threading stuff +import Queue as q +import threading as thd + +baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" + +searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" + +custom_headers = { + "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0", + "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", + } + +courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" + +courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}" + +payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" + +payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}" + + +year = dt.date.today().year +month = dt.date.today().month + +days = { + "Mo" : 0, + "Tu" : 1, + "We" : 2, + "Th" : 3, + "Fr" : 4, + "Sa" : 5, + "Su" : 6 + } + +day_descs = { + "Mo" : "Monday Mon Mo", + "Tu" : "Tuesday Tues Tu Tue", + "We" : "Wednesday Wed We", + "Th" : "Thursday Th Thurs", + "Fr" : "Friday Fr Fri", + "Sa" : "Saturday Sat Sa", + "Su" : "Sunday Su Sun", + "T" : "TBA" + } + +def timeparse(time): + """ + Parse the time into numbers + """ + if len(time) == 7: + hour = int(time[0:2]) + minutes = int(time[3:5]) + half = time[5:7] + else: + hour = int(time[0]) + minutes = int(time[2:4]) + half = time[4:6] + if half == "PM": + if hour < 12: + hour = hour + 12 + + return (str(hour), str(minutes), half) + +class Class(object): + def __init__(self, dept, title, sections): + self.title = title.encode("UTF-8") + self.sections = sections + self.dept = dept + + def __repr__(self): + return repr((self.title, self.sections)) + + def __iter__(self): + return iter((self.title, sec) for sec in self.sections) + + def hasCode(self): + splitted = self.title.strip().split(" ") + return ((len(splitted) >= 2) and + (splitted[0].upper() == splitted[0]) and + (splitted[1].upper() == splitted[1])) + + @property + def code(self): + if self.hasCode(): + return self.title.strip().split(" ")[1].strip() + return False + + @property + def books(self): + if self.dept and self.code: + return textbookInfo(self.dept, self.code, withPrices=True) + return False + +@total_ordering +class Section(dict): + def __init__(self, time, loc, prof, sem): + self.time = time.encode("UTF-8") + self.loc = loc.encode("UTF-8") + self.prof = prof.encode("UTF-8") + self.sem = sem.encode("UTF-8") + self._date = False + self._day = False + + @property + def date(self): + if self.time != "TBA": + day, start, _, end = self.time.split() + + if self._day: + assert len(self._day) == 2 + day = self._day + else: + day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)] + + self._date = (day, timeparse(start), timeparse(end)) + + return self._date + + return self.time + + @property + def day(self): + return self.date[0] + + @property + def start(self): + return self.date[1][0] + self.date[1][1] + + def __repr__(self): + return (""" + Time = %s, Location = %s, Instructor = %s, Semester Running = %s + """ % (self.date, self.loc, self.prof, self.sem)) + def __gt__(self, x): + if isinstance(self.day, list): + raise NotImplementedError + + if (self.date == "TBA" or + x.date == "TBA"): + return False + + return ((days[self.day] > days[x.day]) or + ((self.day == x.day) and + (self.start > x.start))) + + def __eq__(self, x): + return (x.date == self.date and + x.prof == self.prof and + x.loc == self.loc and + x.sem == self.sem) + + +def getStateNum(html): + """ + Get the state num from Mosaic + This is unique to each requester + """ + parsed = lxh.fromstring(html) + return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value + +def parseSection(section): + cols = section.xpath(".//td") + assert len(cols) == 4 + time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols] + + classinfo = Section(time, loc, prof, sem) + return classinfo + +def getSectionInfo(table): + trs = table.xpath(".//tr") + for tr in trs: + if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): + yield parseSection(tr) + +def parseColumns(subject, html): + parsed = lxh.fromstring(html) + + classInfo = (list(getSectionInfo(table)) for table in + islice((table for table in parsed.xpath(".//table") + if table.xpath("@id") and + search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint)) + + classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span") + if span.xpath("@id") and + search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0])) + + return zip(classNames, classInfo) + +def getCodes(html): + parsed = lxh.fromstring(html) + + return (code.text_content().encode("UTF-8") for code in + parsed.xpath("//span") + if code.xpath("@id") and + search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0])) + +class MosReq(object): + def __init__(self, semester): + self.semester = semester + s = requests.Session() + resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content + + # Let the server set some cookies before doing the searching + cookies = {} + for key, val in s.cookies.iteritems(): + cookies[key] = val + self.cookies = cookies + self.statenum = False + self.codes_ = [] + + def getlist(self, subject): + sys.stderr.write("Getting " + subject + "\n") + first_req = requests.get(searchurl, cookies=self.cookies).content + # for some reason Mosaic wants us to request it twice, ?????????????????? + self.statenum = getStateNum(first_req) + first_req = requests.post(searchurl, + data=payload.format(self.statenum, subject, self.semester), + cookies=self.cookies, + allow_redirects=False, + headers=custom_headers).content + # we make a first request to get the ICStateNum in case it thinks there are too many results + try: + self.statenum = getStateNum(first_req) + except IndexError: + pass + if "Your search will return over" in first_req: + + return requests.post(searchurl, + data=payload2.format(self.statenum, self.semester), + cookies=self.cookies, + allow_redirects=False, + headers=custom_headers).content + else: + return first_req + + def classes(self, subject): + return list(parseColumns(subject, self.getlist(subject))) + + def getCodes(self, letter): + sys.stderr.write("Getting letter " + letter + "\n") + first_req = requests.get(searchurl, cookies=self.cookies).content + self.statenum = getStateNum(first_req) + + self.statenum = getStateNum(requests.post(searchurl, + data=courseCodes1.format(self.statenum, self.semester), + cookies=self.cookies, + headers=custom_headers).content) + + return getCodes(requests.post(searchurl, + data=courseCodes2.format(self.statenum, letter, self.semester), + cookies=self.cookies, + allow_redirects=False, + headers=custom_headers).content) + @property + def codes(self): + if not self.codes_: + self.codes_ = list(chain.from_iterable( + map((lambda l: + self.getCodes(chr(l))), + xrange(65, 91)))) + return self.codes_ + +def request(codes, lists, semester): + requester = MosReq(semester) + while not codes.empty(): + code = codes.get() + try: + lists.put(requester.classes(code)) + except: + codes.task_done() + return + codes.task_done() + + +class CourseInfo(object): + def __init__(self, threadcount, semester): + self._codes = False + self.threadcount = threadcount + self.semester = semester + + @property + def codes(self): + if not self._codes: + req = MosReq(self.semester) + self._codes = req.codes + return self._codes + + def classes(self): + qcodes = q.Queue() + for code in self.codes: + qcodes.put(code) + lists = q.Queue() + threads = [] + thread = None + for i in xrange(self.threadcount): + thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester)) + threads.append(thread) + thread.start() + qcodes.join() + for t in threads: + t.join() + + sections = [] + while not lists.empty(): + sections.append(lists.get()) + + for cl in chain.from_iterable(sections): + new_sections = [] + for sec in cl[1]: + if len(sec.day) > 1: + for day in sec.day: + new_sections.append(copy.deepcopy(sec)) + new_sections[-1]._day = day + else: + sec._day = sec.day[0] + new_sections.append(sec) + yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections)) + +def getCourses(semester, threadcount=10): + return CourseInfo(threadcount, semester).classes() + +def allCourses(): + return chain.from_iterable( + (getCourses(sem, threadcount=10) + for sem in (fall, winter, spring_summer))) + +#for course in allCourses(): + #sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books)) + #print course.sections diff --git a/src/mcmaster/site.py b/src/mcmaster/site.py new file mode 100644 index 0000000..42c07aa --- /dev/null +++ b/src/mcmaster/site.py @@ -0,0 +1,9 @@ +from oersearch import Search +from classes import getCourses +from sylla import getTextbooks + +mcmasterSearch = Search("McMaster") + +mcmasterSearch.setup(getCourses) + +mcmasterSearch.run() diff --git a/src/mcmaster/sylla.py b/src/mcmaster/sylla.py new file mode 100755 index 0000000..6347e70 --- /dev/null +++ b/src/mcmaster/sylla.py @@ -0,0 +1,117 @@ +#! /usr/bin/python2 + +from sys import argv +from itertools import chain, islice, izip_longest, izip as zip +from re import search, sub +from functools import total_ordering +from re import sub + +import datetime as dt +import lxml.html as lxh +import requests + +# Purpose of this module is to download and parse syllabi from various departments +# In order to be corellated with individual courses + +class Price(object): + def __init__(self, amnt, status): + self.dollars = float(amnt[1:]) + self.status = status + + def __repr__(self): + return "$%s %s" % (repr(self.dollars), self.status) + + +class Book(object): + def __init__(self, title, price): + self.title = title + self.price = price + + def __repr__(self): + return '["%s", "%s"]' % (self.title, repr(self.price)) + + +def grouper(n, iterable, fillvalue=None): + "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" + args = [iter(iterable)] * n + return izip_longest(fillvalue=fillvalue, *args) + +searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1" + +def normalize(word): + if len(word) > 1: + return ("%s%s" % + (word[0].upper(), + "".join(word[1:]).lower())) + return word + +def parseAuthor(author): + split = author.split(" ") + if len(split) <= 1: + return author + lastname = split[0] + firstname = split[1] + return "%s %s" % (firstname, lastname) + +def normwords(phrase): + words = phrase.split(" ") + return " ".join(map(normalize, words)) + +def books(dept, code, withPrices): + """ + Snatch me up a book title or three + """ + req = searchUrl % (dept, code) + + html = requests.get(req).text + + parsed = lxh.fromstring(html) + + pricelist = prices(parsed) + + for div in parsed.xpath(".//div"): + if (div.attrib.has_key("id") and + "prodDesc" in div.attrib["id"]): + + textbook = div.text_content() + author = sub(r',', '', + "".join( + (div.getparent() + .xpath(".//span[@class='inline']") + [0].text_content() + .split(":")[1:])).strip()) + price = pricelist.pop() + if withPrices: + yield (normwords(textbook), normwords(author), repr(price)) + else: + yield (normwords(textbook), normwords(author)) + +def prices(html): + """ + Get the prices from a search result page + """ + ps = [ + p.getparent().text_content().split()[0] + for p in html.xpath("//p/input[@type='checkbox']") + ] + + try: + amts, stats = zip(*list(reversed(list(grouper(2, ps))))) + return map(Price, amts, stats) + except ValueError: + return [] + +def textbookInfo(dept, code, withPrices=False): + """ + Return all the textbooks for a course + """ + return list(books(dept, code, withPrices)) + +def humanities(): + """ + Download humanities syllabi + """ + return [] + +# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code) +# print list(courseInfo("PSYCH", "2B03"))