From 83eec084680584c0ecf50cb316bc3cbe1075c3c8 Mon Sep 17 00:00:00 2001 From: wes Date: Mon, 20 Jun 2016 04:31:42 -0400 Subject: [PATCH] remove old dir --- mcmaster/__init__.py | 0 mcmaster/classes.py | 349 ------------------------------------------- mcmaster/site.py | 9 -- mcmaster/sylla.py | 117 --------------- 4 files changed, 475 deletions(-) delete mode 100644 mcmaster/__init__.py delete mode 100755 mcmaster/classes.py delete mode 100644 mcmaster/site.py delete mode 100755 mcmaster/sylla.py diff --git a/mcmaster/__init__.py b/mcmaster/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/mcmaster/classes.py b/mcmaster/classes.py deleted file mode 100755 index 54687df..0000000 --- a/mcmaster/classes.py +++ /dev/null @@ -1,349 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from itertools import chain, islice, izip as zip -from re import search, sub -from functools import total_ordering - -from sylla import textbookInfo -from collections import MutableMapping - -import datetime as dt -import lxml.html as lxh -import requests -import sys -import copy - -fall = "2159" -spring_summer = "2165" -winter = "2161" - -# threading stuff -import Queue as q -import threading as thd - -baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" - -searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" - -custom_headers = { - "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0", - "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", - } - -courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" - -courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}" - -payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" - -payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}" - - -year = dt.date.today().year -month = dt.date.today().month - -days = { - "Mo" : 0, - "Tu" : 1, - "We" : 2, - "Th" : 3, - "Fr" : 4, - "Sa" : 5, - "Su" : 6 - } - -day_descs = { - "Mo" : "Monday Mon Mo", - "Tu" : "Tuesday Tues Tu Tue", - "We" : "Wednesday Wed We", - "Th" : "Thursday Th Thurs", - "Fr" : "Friday Fr Fri", - "Sa" : "Saturday Sat Sa", - "Su" : "Sunday Su Sun", - "T" : "TBA" - } - -def timeparse(time): - """ - Parse the time into numbers - """ - if len(time) == 7: - hour = int(time[0:2]) - minutes = int(time[3:5]) - half = time[5:7] - else: - hour = int(time[0]) - minutes = int(time[2:4]) - half = time[4:6] - if half == "PM": - if hour < 12: - hour = hour + 12 - - return (str(hour), str(minutes), half) - -class Class(object): - def __init__(self, dept, title, sections): - self.title = title.encode("UTF-8") - self.sections = sections - self.dept = dept - - def __repr__(self): - return repr((self.title, self.sections)) - - def __iter__(self): - return iter((self.title, sec) for sec in self.sections) - - def hasCode(self): - splitted = self.title.strip().split(" ") - return ((len(splitted) >= 2) and - (splitted[0].upper() == splitted[0]) and - (splitted[1].upper() == splitted[1])) - - @property - def code(self): - if self.hasCode(): - return self.title.strip().split(" ")[1].strip() - return False - - @property - def books(self): - if self.dept and self.code: - return textbookInfo(self.dept, self.code, withPrices=True) - return False - -@total_ordering -class Section(dict): - def __init__(self, time, loc, prof, sem): - self.time = time.encode("UTF-8") - self.loc = loc.encode("UTF-8") - self.prof = prof.encode("UTF-8") - self.sem = sem.encode("UTF-8") - self._date = False - self._day = False - - @property - def date(self): - if self.time != "TBA": - day, start, _, end = self.time.split() - - if self._day: - assert len(self._day) == 2 - day = self._day - else: - day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)] - - self._date = (day, timeparse(start), timeparse(end)) - - return self._date - - return self.time - - @property - def day(self): - return self.date[0] - - @property - def start(self): - return self.date[1][0] + self.date[1][1] - - def __repr__(self): - return (""" - Time = %s, Location = %s, Instructor = %s, Semester Running = %s - """ % (self.date, self.loc, self.prof, self.sem)) - def __gt__(self, x): - if isinstance(self.day, list): - raise NotImplementedError - - if (self.date == "TBA" or - x.date == "TBA"): - return False - - return ((days[self.day] > days[x.day]) or - ((self.day == x.day) and - (self.start > x.start))) - - def __eq__(self, x): - return (x.date == self.date and - x.prof == self.prof and - x.loc == self.loc and - x.sem == self.sem) - - -def getStateNum(html): - """ - Get the state num from Mosaic - This is unique to each requester - """ - parsed = lxh.fromstring(html) - return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value - -def parseSection(section): - cols = section.xpath(".//td") - assert len(cols) == 4 - time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols] - - classinfo = Section(time, loc, prof, sem) - return classinfo - -def getSectionInfo(table): - trs = table.xpath(".//tr") - for tr in trs: - if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): - yield parseSection(tr) - -def parseColumns(subject, html): - parsed = lxh.fromstring(html) - - classInfo = (list(getSectionInfo(table)) for table in - islice((table for table in parsed.xpath(".//table") - if table.xpath("@id") and - search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint)) - - classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span") - if span.xpath("@id") and - search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0])) - - return zip(classNames, classInfo) - -def getCodes(html): - parsed = lxh.fromstring(html) - - return (code.text_content().encode("UTF-8") for code in - parsed.xpath("//span") - if code.xpath("@id") and - search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0])) - -class MosReq(object): - def __init__(self, semester): - self.semester = semester - s = requests.Session() - resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content - - # Let the server set some cookies before doing the searching - cookies = {} - for key, val in s.cookies.iteritems(): - cookies[key] = val - self.cookies = cookies - self.statenum = False - self.codes_ = [] - - def getlist(self, subject): - sys.stderr.write("Getting " + subject + "\n") - first_req = requests.get(searchurl, cookies=self.cookies).content - # for some reason Mosaic wants us to request it twice, ?????????????????? - self.statenum = getStateNum(first_req) - first_req = requests.post(searchurl, - data=payload.format(self.statenum, subject, self.semester), - cookies=self.cookies, - allow_redirects=False, - headers=custom_headers).content - # we make a first request to get the ICStateNum in case it thinks there are too many results - try: - self.statenum = getStateNum(first_req) - except IndexError: - pass - if "Your search will return over" in first_req: - - return requests.post(searchurl, - data=payload2.format(self.statenum, self.semester), - cookies=self.cookies, - allow_redirects=False, - headers=custom_headers).content - else: - return first_req - - def classes(self, subject): - return list(parseColumns(subject, self.getlist(subject))) - - def getCodes(self, letter): - sys.stderr.write("Getting letter " + letter + "\n") - first_req = requests.get(searchurl, cookies=self.cookies).content - self.statenum = getStateNum(first_req) - - self.statenum = getStateNum(requests.post(searchurl, - data=courseCodes1.format(self.statenum, self.semester), - cookies=self.cookies, - headers=custom_headers).content) - - return getCodes(requests.post(searchurl, - data=courseCodes2.format(self.statenum, letter, self.semester), - cookies=self.cookies, - allow_redirects=False, - headers=custom_headers).content) - @property - def codes(self): - if not self.codes_: - self.codes_ = list(chain.from_iterable( - map((lambda l: - self.getCodes(chr(l))), - xrange(65, 91)))) - return self.codes_ - -def request(codes, lists, semester): - requester = MosReq(semester) - while not codes.empty(): - code = codes.get() - try: - lists.put(requester.classes(code)) - except: - codes.task_done() - return - codes.task_done() - - -class CourseInfo(object): - def __init__(self, threadcount, semester): - self._codes = False - self.threadcount = threadcount - self.semester = semester - - @property - def codes(self): - if not self._codes: - req = MosReq(self.semester) - self._codes = req.codes - return self._codes - - def classes(self): - qcodes = q.Queue() - for code in self.codes: - qcodes.put(code) - lists = q.Queue() - threads = [] - thread = None - for i in xrange(self.threadcount): - thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester)) - threads.append(thread) - thread.start() - qcodes.join() - for t in threads: - t.join() - - sections = [] - while not lists.empty(): - sections.append(lists.get()) - - for cl in chain.from_iterable(sections): - new_sections = [] - for sec in cl[1]: - if len(sec.day) > 1: - for day in sec.day: - new_sections.append(copy.deepcopy(sec)) - new_sections[-1]._day = day - else: - sec._day = sec.day[0] - new_sections.append(sec) - yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections)) - -def getCourses(semester, threadcount=10): - return CourseInfo(threadcount, semester).classes() - -def allCourses(): - return chain.from_iterable( - (getCourses(sem, threadcount=10) - for sem in (fall, winter, spring_summer))) - -#for course in allCourses(): - #sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books)) - #print course.sections diff --git a/mcmaster/site.py b/mcmaster/site.py deleted file mode 100644 index 42c07aa..0000000 --- a/mcmaster/site.py +++ /dev/null @@ -1,9 +0,0 @@ -from oersearch import Search -from classes import getCourses -from sylla import getTextbooks - -mcmasterSearch = Search("McMaster") - -mcmasterSearch.setup(getCourses) - -mcmasterSearch.run() diff --git a/mcmaster/sylla.py b/mcmaster/sylla.py deleted file mode 100755 index 6347e70..0000000 --- a/mcmaster/sylla.py +++ /dev/null @@ -1,117 +0,0 @@ -#! /usr/bin/python2 - -from sys import argv -from itertools import chain, islice, izip_longest, izip as zip -from re import search, sub -from functools import total_ordering -from re import sub - -import datetime as dt -import lxml.html as lxh -import requests - -# Purpose of this module is to download and parse syllabi from various departments -# In order to be corellated with individual courses - -class Price(object): - def __init__(self, amnt, status): - self.dollars = float(amnt[1:]) - self.status = status - - def __repr__(self): - return "$%s %s" % (repr(self.dollars), self.status) - - -class Book(object): - def __init__(self, title, price): - self.title = title - self.price = price - - def __repr__(self): - return '["%s", "%s"]' % (self.title, repr(self.price)) - - -def grouper(n, iterable, fillvalue=None): - "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" - args = [iter(iterable)] * n - return izip_longest(fillvalue=fillvalue, *args) - -searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1" - -def normalize(word): - if len(word) > 1: - return ("%s%s" % - (word[0].upper(), - "".join(word[1:]).lower())) - return word - -def parseAuthor(author): - split = author.split(" ") - if len(split) <= 1: - return author - lastname = split[0] - firstname = split[1] - return "%s %s" % (firstname, lastname) - -def normwords(phrase): - words = phrase.split(" ") - return " ".join(map(normalize, words)) - -def books(dept, code, withPrices): - """ - Snatch me up a book title or three - """ - req = searchUrl % (dept, code) - - html = requests.get(req).text - - parsed = lxh.fromstring(html) - - pricelist = prices(parsed) - - for div in parsed.xpath(".//div"): - if (div.attrib.has_key("id") and - "prodDesc" in div.attrib["id"]): - - textbook = div.text_content() - author = sub(r',', '', - "".join( - (div.getparent() - .xpath(".//span[@class='inline']") - [0].text_content() - .split(":")[1:])).strip()) - price = pricelist.pop() - if withPrices: - yield (normwords(textbook), normwords(author), repr(price)) - else: - yield (normwords(textbook), normwords(author)) - -def prices(html): - """ - Get the prices from a search result page - """ - ps = [ - p.getparent().text_content().split()[0] - for p in html.xpath("//p/input[@type='checkbox']") - ] - - try: - amts, stats = zip(*list(reversed(list(grouper(2, ps))))) - return map(Price, amts, stats) - except ValueError: - return [] - -def textbookInfo(dept, code, withPrices=False): - """ - Return all the textbooks for a course - """ - return list(books(dept, code, withPrices)) - -def humanities(): - """ - Download humanities syllabi - """ - return [] - -# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code) -# print list(courseInfo("PSYCH", "2B03"))