5 changed files with 475 additions and 0 deletions
@ -0,0 +1,349 @@ |
|||||
|
#! /usr/bin/python2 |
||||
|
|
||||
|
from sys import argv |
||||
|
from itertools import chain, islice, izip as zip |
||||
|
from re import search, sub |
||||
|
from functools import total_ordering |
||||
|
|
||||
|
from sylla import textbookInfo |
||||
|
from collections import MutableMapping |
||||
|
|
||||
|
import datetime as dt |
||||
|
import lxml.html as lxh |
||||
|
import requests |
||||
|
import sys |
||||
|
import copy |
||||
|
|
||||
|
fall = "2159" |
||||
|
spring_summer = "2165" |
||||
|
winter = "2161" |
||||
|
|
||||
|
# threading stuff |
||||
|
import Queue as q |
||||
|
import threading as thd |
||||
|
|
||||
|
baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" |
||||
|
|
||||
|
searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" |
||||
|
|
||||
|
custom_headers = { |
||||
|
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0", |
||||
|
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", |
||||
|
} |
||||
|
|
||||
|
courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" |
||||
|
|
||||
|
courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}" |
||||
|
|
||||
|
payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}" |
||||
|
|
||||
|
payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}" |
||||
|
|
||||
|
|
||||
|
year = dt.date.today().year |
||||
|
month = dt.date.today().month |
||||
|
|
||||
|
days = { |
||||
|
"Mo" : 0, |
||||
|
"Tu" : 1, |
||||
|
"We" : 2, |
||||
|
"Th" : 3, |
||||
|
"Fr" : 4, |
||||
|
"Sa" : 5, |
||||
|
"Su" : 6 |
||||
|
} |
||||
|
|
||||
|
day_descs = { |
||||
|
"Mo" : "Monday Mon Mo", |
||||
|
"Tu" : "Tuesday Tues Tu Tue", |
||||
|
"We" : "Wednesday Wed We", |
||||
|
"Th" : "Thursday Th Thurs", |
||||
|
"Fr" : "Friday Fr Fri", |
||||
|
"Sa" : "Saturday Sat Sa", |
||||
|
"Su" : "Sunday Su Sun", |
||||
|
"T" : "TBA" |
||||
|
} |
||||
|
|
||||
|
def timeparse(time): |
||||
|
""" |
||||
|
Parse the time into numbers |
||||
|
""" |
||||
|
if len(time) == 7: |
||||
|
hour = int(time[0:2]) |
||||
|
minutes = int(time[3:5]) |
||||
|
half = time[5:7] |
||||
|
else: |
||||
|
hour = int(time[0]) |
||||
|
minutes = int(time[2:4]) |
||||
|
half = time[4:6] |
||||
|
if half == "PM": |
||||
|
if hour < 12: |
||||
|
hour = hour + 12 |
||||
|
|
||||
|
return (str(hour), str(minutes), half) |
||||
|
|
||||
|
class Class(object): |
||||
|
def __init__(self, dept, title, sections): |
||||
|
self.title = title.encode("UTF-8") |
||||
|
self.sections = sections |
||||
|
self.dept = dept |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return repr((self.title, self.sections)) |
||||
|
|
||||
|
def __iter__(self): |
||||
|
return iter((self.title, sec) for sec in self.sections) |
||||
|
|
||||
|
def hasCode(self): |
||||
|
splitted = self.title.strip().split(" ") |
||||
|
return ((len(splitted) >= 2) and |
||||
|
(splitted[0].upper() == splitted[0]) and |
||||
|
(splitted[1].upper() == splitted[1])) |
||||
|
|
||||
|
@property |
||||
|
def code(self): |
||||
|
if self.hasCode(): |
||||
|
return self.title.strip().split(" ")[1].strip() |
||||
|
return False |
||||
|
|
||||
|
@property |
||||
|
def books(self): |
||||
|
if self.dept and self.code: |
||||
|
return textbookInfo(self.dept, self.code, withPrices=True) |
||||
|
return False |
||||
|
|
||||
|
@total_ordering |
||||
|
class Section(dict): |
||||
|
def __init__(self, time, loc, prof, sem): |
||||
|
self.time = time.encode("UTF-8") |
||||
|
self.loc = loc.encode("UTF-8") |
||||
|
self.prof = prof.encode("UTF-8") |
||||
|
self.sem = sem.encode("UTF-8") |
||||
|
self._date = False |
||||
|
self._day = False |
||||
|
|
||||
|
@property |
||||
|
def date(self): |
||||
|
if self.time != "TBA": |
||||
|
day, start, _, end = self.time.split() |
||||
|
|
||||
|
if self._day: |
||||
|
assert len(self._day) == 2 |
||||
|
day = self._day |
||||
|
else: |
||||
|
day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)] |
||||
|
|
||||
|
self._date = (day, timeparse(start), timeparse(end)) |
||||
|
|
||||
|
return self._date |
||||
|
|
||||
|
return self.time |
||||
|
|
||||
|
@property |
||||
|
def day(self): |
||||
|
return self.date[0] |
||||
|
|
||||
|
@property |
||||
|
def start(self): |
||||
|
return self.date[1][0] + self.date[1][1] |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return (""" |
||||
|
Time = %s, Location = %s, Instructor = %s, Semester Running = %s |
||||
|
""" % (self.date, self.loc, self.prof, self.sem)) |
||||
|
def __gt__(self, x): |
||||
|
if isinstance(self.day, list): |
||||
|
raise NotImplementedError |
||||
|
|
||||
|
if (self.date == "TBA" or |
||||
|
x.date == "TBA"): |
||||
|
return False |
||||
|
|
||||
|
return ((days[self.day] > days[x.day]) or |
||||
|
((self.day == x.day) and |
||||
|
(self.start > x.start))) |
||||
|
|
||||
|
def __eq__(self, x): |
||||
|
return (x.date == self.date and |
||||
|
x.prof == self.prof and |
||||
|
x.loc == self.loc and |
||||
|
x.sem == self.sem) |
||||
|
|
||||
|
|
||||
|
def getStateNum(html): |
||||
|
""" |
||||
|
Get the state num from Mosaic |
||||
|
This is unique to each requester |
||||
|
""" |
||||
|
parsed = lxh.fromstring(html) |
||||
|
return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value |
||||
|
|
||||
|
def parseSection(section): |
||||
|
cols = section.xpath(".//td") |
||||
|
assert len(cols) == 4 |
||||
|
time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols] |
||||
|
|
||||
|
classinfo = Section(time, loc, prof, sem) |
||||
|
return classinfo |
||||
|
|
||||
|
def getSectionInfo(table): |
||||
|
trs = table.xpath(".//tr") |
||||
|
for tr in trs: |
||||
|
if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): |
||||
|
yield parseSection(tr) |
||||
|
|
||||
|
def parseColumns(subject, html): |
||||
|
parsed = lxh.fromstring(html) |
||||
|
|
||||
|
classInfo = (list(getSectionInfo(table)) for table in |
||||
|
islice((table for table in parsed.xpath(".//table") |
||||
|
if table.xpath("@id") and |
||||
|
search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint)) |
||||
|
|
||||
|
classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span") |
||||
|
if span.xpath("@id") and |
||||
|
search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0])) |
||||
|
|
||||
|
return zip(classNames, classInfo) |
||||
|
|
||||
|
def getCodes(html): |
||||
|
parsed = lxh.fromstring(html) |
||||
|
|
||||
|
return (code.text_content().encode("UTF-8") for code in |
||||
|
parsed.xpath("//span") |
||||
|
if code.xpath("@id") and |
||||
|
search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0])) |
||||
|
|
||||
|
class MosReq(object): |
||||
|
def __init__(self, semester): |
||||
|
self.semester = semester |
||||
|
s = requests.Session() |
||||
|
resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content |
||||
|
|
||||
|
# Let the server set some cookies before doing the searching |
||||
|
cookies = {} |
||||
|
for key, val in s.cookies.iteritems(): |
||||
|
cookies[key] = val |
||||
|
self.cookies = cookies |
||||
|
self.statenum = False |
||||
|
self.codes_ = [] |
||||
|
|
||||
|
def getlist(self, subject): |
||||
|
sys.stderr.write("Getting " + subject + "\n") |
||||
|
first_req = requests.get(searchurl, cookies=self.cookies).content |
||||
|
# for some reason Mosaic wants us to request it twice, ?????????????????? |
||||
|
self.statenum = getStateNum(first_req) |
||||
|
first_req = requests.post(searchurl, |
||||
|
data=payload.format(self.statenum, subject, self.semester), |
||||
|
cookies=self.cookies, |
||||
|
allow_redirects=False, |
||||
|
headers=custom_headers).content |
||||
|
# we make a first request to get the ICStateNum in case it thinks there are too many results |
||||
|
try: |
||||
|
self.statenum = getStateNum(first_req) |
||||
|
except IndexError: |
||||
|
pass |
||||
|
if "Your search will return over" in first_req: |
||||
|
|
||||
|
return requests.post(searchurl, |
||||
|
data=payload2.format(self.statenum, self.semester), |
||||
|
cookies=self.cookies, |
||||
|
allow_redirects=False, |
||||
|
headers=custom_headers).content |
||||
|
else: |
||||
|
return first_req |
||||
|
|
||||
|
def classes(self, subject): |
||||
|
return list(parseColumns(subject, self.getlist(subject))) |
||||
|
|
||||
|
def getCodes(self, letter): |
||||
|
sys.stderr.write("Getting letter " + letter + "\n") |
||||
|
first_req = requests.get(searchurl, cookies=self.cookies).content |
||||
|
self.statenum = getStateNum(first_req) |
||||
|
|
||||
|
self.statenum = getStateNum(requests.post(searchurl, |
||||
|
data=courseCodes1.format(self.statenum, self.semester), |
||||
|
cookies=self.cookies, |
||||
|
headers=custom_headers).content) |
||||
|
|
||||
|
return getCodes(requests.post(searchurl, |
||||
|
data=courseCodes2.format(self.statenum, letter, self.semester), |
||||
|
cookies=self.cookies, |
||||
|
allow_redirects=False, |
||||
|
headers=custom_headers).content) |
||||
|
@property |
||||
|
def codes(self): |
||||
|
if not self.codes_: |
||||
|
self.codes_ = list(chain.from_iterable( |
||||
|
map((lambda l: |
||||
|
self.getCodes(chr(l))), |
||||
|
xrange(65, 91)))) |
||||
|
return self.codes_ |
||||
|
|
||||
|
def request(codes, lists, semester): |
||||
|
requester = MosReq(semester) |
||||
|
while not codes.empty(): |
||||
|
code = codes.get() |
||||
|
try: |
||||
|
lists.put(requester.classes(code)) |
||||
|
except: |
||||
|
codes.task_done() |
||||
|
return |
||||
|
codes.task_done() |
||||
|
|
||||
|
|
||||
|
class CourseInfo(object): |
||||
|
def __init__(self, threadcount, semester): |
||||
|
self._codes = False |
||||
|
self.threadcount = threadcount |
||||
|
self.semester = semester |
||||
|
|
||||
|
@property |
||||
|
def codes(self): |
||||
|
if not self._codes: |
||||
|
req = MosReq(self.semester) |
||||
|
self._codes = req.codes |
||||
|
return self._codes |
||||
|
|
||||
|
def classes(self): |
||||
|
qcodes = q.Queue() |
||||
|
for code in self.codes: |
||||
|
qcodes.put(code) |
||||
|
lists = q.Queue() |
||||
|
threads = [] |
||||
|
thread = None |
||||
|
for i in xrange(self.threadcount): |
||||
|
thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester)) |
||||
|
threads.append(thread) |
||||
|
thread.start() |
||||
|
qcodes.join() |
||||
|
for t in threads: |
||||
|
t.join() |
||||
|
|
||||
|
sections = [] |
||||
|
while not lists.empty(): |
||||
|
sections.append(lists.get()) |
||||
|
|
||||
|
for cl in chain.from_iterable(sections): |
||||
|
new_sections = [] |
||||
|
for sec in cl[1]: |
||||
|
if len(sec.day) > 1: |
||||
|
for day in sec.day: |
||||
|
new_sections.append(copy.deepcopy(sec)) |
||||
|
new_sections[-1]._day = day |
||||
|
else: |
||||
|
sec._day = sec.day[0] |
||||
|
new_sections.append(sec) |
||||
|
yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections)) |
||||
|
|
||||
|
def getCourses(semester, threadcount=10): |
||||
|
return CourseInfo(threadcount, semester).classes() |
||||
|
|
||||
|
def allCourses(): |
||||
|
return chain.from_iterable( |
||||
|
(getCourses(sem, threadcount=10) |
||||
|
for sem in (fall, winter, spring_summer))) |
||||
|
|
||||
|
#for course in allCourses(): |
||||
|
#sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books)) |
||||
|
#print course.sections |
@ -0,0 +1,9 @@ |
|||||
|
from oersearch import Search |
||||
|
from classes import getCourses |
||||
|
from sylla import getTextbooks |
||||
|
|
||||
|
mcmasterSearch = Search("McMaster") |
||||
|
|
||||
|
mcmasterSearch.setup(getCourses) |
||||
|
|
||||
|
mcmasterSearch.run() |
@ -0,0 +1,117 @@ |
|||||
|
#! /usr/bin/python2 |
||||
|
|
||||
|
from sys import argv |
||||
|
from itertools import chain, islice, izip_longest, izip as zip |
||||
|
from re import search, sub |
||||
|
from functools import total_ordering |
||||
|
from re import sub |
||||
|
|
||||
|
import datetime as dt |
||||
|
import lxml.html as lxh |
||||
|
import requests |
||||
|
|
||||
|
# Purpose of this module is to download and parse syllabi from various departments |
||||
|
# In order to be corellated with individual courses |
||||
|
|
||||
|
class Price(object): |
||||
|
def __init__(self, amnt, status): |
||||
|
self.dollars = float(amnt[1:]) |
||||
|
self.status = status |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "$%s %s" % (repr(self.dollars), self.status) |
||||
|
|
||||
|
|
||||
|
class Book(object): |
||||
|
def __init__(self, title, price): |
||||
|
self.title = title |
||||
|
self.price = price |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return '["%s", "%s"]' % (self.title, repr(self.price)) |
||||
|
|
||||
|
|
||||
|
def grouper(n, iterable, fillvalue=None): |
||||
|
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" |
||||
|
args = [iter(iterable)] * n |
||||
|
return izip_longest(fillvalue=fillvalue, *args) |
||||
|
|
||||
|
searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1" |
||||
|
|
||||
|
def normalize(word): |
||||
|
if len(word) > 1: |
||||
|
return ("%s%s" % |
||||
|
(word[0].upper(), |
||||
|
"".join(word[1:]).lower())) |
||||
|
return word |
||||
|
|
||||
|
def parseAuthor(author): |
||||
|
split = author.split(" ") |
||||
|
if len(split) <= 1: |
||||
|
return author |
||||
|
lastname = split[0] |
||||
|
firstname = split[1] |
||||
|
return "%s %s" % (firstname, lastname) |
||||
|
|
||||
|
def normwords(phrase): |
||||
|
words = phrase.split(" ") |
||||
|
return " ".join(map(normalize, words)) |
||||
|
|
||||
|
def books(dept, code, withPrices): |
||||
|
""" |
||||
|
Snatch me up a book title or three |
||||
|
""" |
||||
|
req = searchUrl % (dept, code) |
||||
|
|
||||
|
html = requests.get(req).text |
||||
|
|
||||
|
parsed = lxh.fromstring(html) |
||||
|
|
||||
|
pricelist = prices(parsed) |
||||
|
|
||||
|
for div in parsed.xpath(".//div"): |
||||
|
if (div.attrib.has_key("id") and |
||||
|
"prodDesc" in div.attrib["id"]): |
||||
|
|
||||
|
textbook = div.text_content() |
||||
|
author = sub(r',', '', |
||||
|
"".join( |
||||
|
(div.getparent() |
||||
|
.xpath(".//span[@class='inline']") |
||||
|
[0].text_content() |
||||
|
.split(":")[1:])).strip()) |
||||
|
price = pricelist.pop() |
||||
|
if withPrices: |
||||
|
yield (normwords(textbook), normwords(author), repr(price)) |
||||
|
else: |
||||
|
yield (normwords(textbook), normwords(author)) |
||||
|
|
||||
|
def prices(html): |
||||
|
""" |
||||
|
Get the prices from a search result page |
||||
|
""" |
||||
|
ps = [ |
||||
|
p.getparent().text_content().split()[0] |
||||
|
for p in html.xpath("//p/input[@type='checkbox']") |
||||
|
] |
||||
|
|
||||
|
try: |
||||
|
amts, stats = zip(*list(reversed(list(grouper(2, ps))))) |
||||
|
return map(Price, amts, stats) |
||||
|
except ValueError: |
||||
|
return [] |
||||
|
|
||||
|
def textbookInfo(dept, code, withPrices=False): |
||||
|
""" |
||||
|
Return all the textbooks for a course |
||||
|
""" |
||||
|
return list(books(dept, code, withPrices)) |
||||
|
|
||||
|
def humanities(): |
||||
|
""" |
||||
|
Download humanities syllabi |
||||
|
""" |
||||
|
return [] |
||||
|
|
||||
|
# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code) |
||||
|
# print list(courseInfo("PSYCH", "2B03")) |
Loading…
Reference in new issue