Browse Source

move mcmaster directory

master
wes 8 years ago
parent
commit
bca93d226c
  1. 0
      src/__init__.py
  2. 0
      src/mcmaster/__init__.py
  3. 349
      src/mcmaster/classes.py
  4. 9
      src/mcmaster/site.py
  5. 117
      src/mcmaster/sylla.py

0
src/__init__.py

0
src/mcmaster/__init__.py

349
src/mcmaster/classes.py

@ -0,0 +1,349 @@
#! /usr/bin/python2
from sys import argv
from itertools import chain, islice, izip as zip
from re import search, sub
from functools import total_ordering
from sylla import textbookInfo
from collections import MutableMapping
import datetime as dt
import lxml.html as lxh
import requests
import sys
import copy
fall = "2159"
spring_summer = "2165"
winter = "2161"
# threading stuff
import Queue as q
import threading as thd
baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
searchurl = "https://csprd.mcmaster.ca/psc/prcsprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
custom_headers = {
"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0",
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
}
courseCodes1 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_SUBJ_SRCH%240&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=5tq9x%2Fjt42mf62Sh5z%2BrjxT0gT15kiIyQ2cecCSmRB4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}"
courseCodes2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=SSR_CLSRCH_WRK2_SSR_ALPHANUM_{1}&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=vIUgl6ZXw045S07EPbQw4RDzv7NmKCDdJFdT4CTRQNM%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={2}"
payload2 = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=%23ICSave&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&CLASS_SRCH_WRK2_STRM$45$={1}"
payload = "ICAJAX=1&ICNAVTYPEDROPDOWN=1&ICType=Panel&ICElementNum=0&ICStateNum={0}&ICAction=CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH&ICXPos=0&ICYPos=0&ResponsetoDiffFrame=-1&TargetFrameName=None&FacetPath=None&ICFocus=&ICSaveWarningFilter=0&ICChanged=-1&ICResubmit=0&ICSID=aWx3w6lJ6d2wZui6hwRVSEnzsPgCA3afYJEFBLLkxe4%3D&ICActionPrompt=false&ICFind=&ICAddCount=&ICAPPCLSDATA=&SSR_CLSRCH_WRK_SUBJECT$75$$0={1}&CLASS_SRCH_WRK2_STRM$45$={2}"
year = dt.date.today().year
month = dt.date.today().month
days = {
"Mo" : 0,
"Tu" : 1,
"We" : 2,
"Th" : 3,
"Fr" : 4,
"Sa" : 5,
"Su" : 6
}
day_descs = {
"Mo" : "Monday Mon Mo",
"Tu" : "Tuesday Tues Tu Tue",
"We" : "Wednesday Wed We",
"Th" : "Thursday Th Thurs",
"Fr" : "Friday Fr Fri",
"Sa" : "Saturday Sat Sa",
"Su" : "Sunday Su Sun",
"T" : "TBA"
}
def timeparse(time):
"""
Parse the time into numbers
"""
if len(time) == 7:
hour = int(time[0:2])
minutes = int(time[3:5])
half = time[5:7]
else:
hour = int(time[0])
minutes = int(time[2:4])
half = time[4:6]
if half == "PM":
if hour < 12:
hour = hour + 12
return (str(hour), str(minutes), half)
class Class(object):
def __init__(self, dept, title, sections):
self.title = title.encode("UTF-8")
self.sections = sections
self.dept = dept
def __repr__(self):
return repr((self.title, self.sections))
def __iter__(self):
return iter((self.title, sec) for sec in self.sections)
def hasCode(self):
splitted = self.title.strip().split(" ")
return ((len(splitted) >= 2) and
(splitted[0].upper() == splitted[0]) and
(splitted[1].upper() == splitted[1]))
@property
def code(self):
if self.hasCode():
return self.title.strip().split(" ")[1].strip()
return False
@property
def books(self):
if self.dept and self.code:
return textbookInfo(self.dept, self.code, withPrices=True)
return False
@total_ordering
class Section(dict):
def __init__(self, time, loc, prof, sem):
self.time = time.encode("UTF-8")
self.loc = loc.encode("UTF-8")
self.prof = prof.encode("UTF-8")
self.sem = sem.encode("UTF-8")
self._date = False
self._day = False
@property
def date(self):
if self.time != "TBA":
day, start, _, end = self.time.split()
if self._day:
assert len(self._day) == 2
day = self._day
else:
day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)]
self._date = (day, timeparse(start), timeparse(end))
return self._date
return self.time
@property
def day(self):
return self.date[0]
@property
def start(self):
return self.date[1][0] + self.date[1][1]
def __repr__(self):
return ("""
Time = %s, Location = %s, Instructor = %s, Semester Running = %s
""" % (self.date, self.loc, self.prof, self.sem))
def __gt__(self, x):
if isinstance(self.day, list):
raise NotImplementedError
if (self.date == "TBA" or
x.date == "TBA"):
return False
return ((days[self.day] > days[x.day]) or
((self.day == x.day) and
(self.start > x.start)))
def __eq__(self, x):
return (x.date == self.date and
x.prof == self.prof and
x.loc == self.loc and
x.sem == self.sem)
def getStateNum(html):
"""
Get the state num from Mosaic
This is unique to each requester
"""
parsed = lxh.fromstring(html)
return parsed.xpath(".//input[@name=\"ICStateNum\"]")[0].value
def parseSection(section):
cols = section.xpath(".//td")
assert len(cols) == 4
time, loc, prof, sem = [col.text_content().encode("UTF-8").strip() for col in cols]
classinfo = Section(time, loc, prof, sem)
return classinfo
def getSectionInfo(table):
trs = table.xpath(".//tr")
for tr in trs:
if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]):
yield parseSection(tr)
def parseColumns(subject, html):
parsed = lxh.fromstring(html)
classInfo = (list(getSectionInfo(table)) for table in
islice((table for table in parsed.xpath(".//table")
if table.xpath("@id") and
search(r"ICField[0-9]+\$scroll", table.xpath("@id")[0])), 1, sys.maxint))
classNames = ((subject, span.text_content().strip()) for span in parsed.xpath(".//span")
if span.xpath("@id") and
search(r"DERIVED_CLSRCH_DESCR", span.xpath("@id")[0]))
return zip(classNames, classInfo)
def getCodes(html):
parsed = lxh.fromstring(html)
return (code.text_content().encode("UTF-8") for code in
parsed.xpath("//span")
if code.xpath("@id") and
search(r"SSR_CLSRCH_SUBJ_SUBJECT\$[0-9]+", code.xpath("@id")[0]))
class MosReq(object):
def __init__(self, semester):
self.semester = semester
s = requests.Session()
resp = s.get(baseurl, allow_redirects=True, headers=custom_headers).content
# Let the server set some cookies before doing the searching
cookies = {}
for key, val in s.cookies.iteritems():
cookies[key] = val
self.cookies = cookies
self.statenum = False
self.codes_ = []
def getlist(self, subject):
sys.stderr.write("Getting " + subject + "\n")
first_req = requests.get(searchurl, cookies=self.cookies).content
# for some reason Mosaic wants us to request it twice, ??????????????????
self.statenum = getStateNum(first_req)
first_req = requests.post(searchurl,
data=payload.format(self.statenum, subject, self.semester),
cookies=self.cookies,
allow_redirects=False,
headers=custom_headers).content
# we make a first request to get the ICStateNum in case it thinks there are too many results
try:
self.statenum = getStateNum(first_req)
except IndexError:
pass
if "Your search will return over" in first_req:
return requests.post(searchurl,
data=payload2.format(self.statenum, self.semester),
cookies=self.cookies,
allow_redirects=False,
headers=custom_headers).content
else:
return first_req
def classes(self, subject):
return list(parseColumns(subject, self.getlist(subject)))
def getCodes(self, letter):
sys.stderr.write("Getting letter " + letter + "\n")
first_req = requests.get(searchurl, cookies=self.cookies).content
self.statenum = getStateNum(first_req)
self.statenum = getStateNum(requests.post(searchurl,
data=courseCodes1.format(self.statenum, self.semester),
cookies=self.cookies,
headers=custom_headers).content)
return getCodes(requests.post(searchurl,
data=courseCodes2.format(self.statenum, letter, self.semester),
cookies=self.cookies,
allow_redirects=False,
headers=custom_headers).content)
@property
def codes(self):
if not self.codes_:
self.codes_ = list(chain.from_iterable(
map((lambda l:
self.getCodes(chr(l))),
xrange(65, 91))))
return self.codes_
def request(codes, lists, semester):
requester = MosReq(semester)
while not codes.empty():
code = codes.get()
try:
lists.put(requester.classes(code))
except:
codes.task_done()
return
codes.task_done()
class CourseInfo(object):
def __init__(self, threadcount, semester):
self._codes = False
self.threadcount = threadcount
self.semester = semester
@property
def codes(self):
if not self._codes:
req = MosReq(self.semester)
self._codes = req.codes
return self._codes
def classes(self):
qcodes = q.Queue()
for code in self.codes:
qcodes.put(code)
lists = q.Queue()
threads = []
thread = None
for i in xrange(self.threadcount):
thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester))
threads.append(thread)
thread.start()
qcodes.join()
for t in threads:
t.join()
sections = []
while not lists.empty():
sections.append(lists.get())
for cl in chain.from_iterable(sections):
new_sections = []
for sec in cl[1]:
if len(sec.day) > 1:
for day in sec.day:
new_sections.append(copy.deepcopy(sec))
new_sections[-1]._day = day
else:
sec._day = sec.day[0]
new_sections.append(sec)
yield Class(cl[0][0], sub("\xa0+", "", cl[0][1]), sorted(new_sections))
def getCourses(semester, threadcount=10):
return CourseInfo(threadcount, semester).classes()
def allCourses():
return chain.from_iterable(
(getCourses(sem, threadcount=10)
for sem in (fall, winter, spring_summer)))
#for course in allCourses():
#sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books))
#print course.sections

9
src/mcmaster/site.py

@ -0,0 +1,9 @@
from oersearch import Search
from classes import getCourses
from sylla import getTextbooks
mcmasterSearch = Search("McMaster")
mcmasterSearch.setup(getCourses)
mcmasterSearch.run()

117
src/mcmaster/sylla.py

@ -0,0 +1,117 @@
#! /usr/bin/python2
from sys import argv
from itertools import chain, islice, izip_longest, izip as zip
from re import search, sub
from functools import total_ordering
from re import sub
import datetime as dt
import lxml.html as lxh
import requests
# Purpose of this module is to download and parse syllabi from various departments
# In order to be corellated with individual courses
class Price(object):
def __init__(self, amnt, status):
self.dollars = float(amnt[1:])
self.status = status
def __repr__(self):
return "$%s %s" % (repr(self.dollars), self.status)
class Book(object):
def __init__(self, title, price):
self.title = title
self.price = price
def __repr__(self):
return '["%s", "%s"]' % (self.title, repr(self.price))
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1"
def normalize(word):
if len(word) > 1:
return ("%s%s" %
(word[0].upper(),
"".join(word[1:]).lower()))
return word
def parseAuthor(author):
split = author.split(" ")
if len(split) <= 1:
return author
lastname = split[0]
firstname = split[1]
return "%s %s" % (firstname, lastname)
def normwords(phrase):
words = phrase.split(" ")
return " ".join(map(normalize, words))
def books(dept, code, withPrices):
"""
Snatch me up a book title or three
"""
req = searchUrl % (dept, code)
html = requests.get(req).text
parsed = lxh.fromstring(html)
pricelist = prices(parsed)
for div in parsed.xpath(".//div"):
if (div.attrib.has_key("id") and
"prodDesc" in div.attrib["id"]):
textbook = div.text_content()
author = sub(r',', '',
"".join(
(div.getparent()
.xpath(".//span[@class='inline']")
[0].text_content()
.split(":")[1:])).strip())
price = pricelist.pop()
if withPrices:
yield (normwords(textbook), normwords(author), repr(price))
else:
yield (normwords(textbook), normwords(author))
def prices(html):
"""
Get the prices from a search result page
"""
ps = [
p.getparent().text_content().split()[0]
for p in html.xpath("//p/input[@type='checkbox']")
]
try:
amts, stats = zip(*list(reversed(list(grouper(2, ps)))))
return map(Price, amts, stats)
except ValueError:
return []
def textbookInfo(dept, code, withPrices=False):
"""
Return all the textbooks for a course
"""
return list(books(dept, code, withPrices))
def humanities():
"""
Download humanities syllabi
"""
return []
# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code)
# print list(courseInfo("PSYCH", "2B03"))
Loading…
Cancel
Save