Browse Source

revert to python 2 for class scraper because of strange bugs in html

parsing
master
wes 9 years ago
parent
commit
9ef3047301
  1. 29
      src/mcmaster/classes.py
  2. 8
      src/mcmaster/sylla.py

29
src/mcmaster/classes.py

@ -1,7 +1,7 @@
#! /usr/bin/python3 #! /usr/bin/python2
from sys import argv from sys import argv
from itertools import chain, islice from itertools import chain, islice, izip as zip
from re import search, sub from re import search, sub
from functools import total_ordering from functools import total_ordering
@ -18,13 +18,8 @@ fall = "2169"
spring_summer = "2175" spring_summer = "2175"
winter = "2171" winter = "2171"
def parseSem(sem):
if sem == "TBA":
return "TBA"
return sem[0] + sem[2:4] + sem[6]
# threading stuff # threading stuff
import queue as q import Queue as q
import threading as thd import threading as thd
baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL" baseurl = "https://applicants.mcmaster.ca/psp/prepprd/EMPLOYEE/PSFT_LS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL"
@ -129,12 +124,10 @@ class Section(dict):
@property @property
def sem(self): def sem(self):
if parseSem(self._sem) == fall: if self._sem == fall:
return "Fall" return "Fall"
elif parseSem(self._sem) == winter: elif self._sem == winter:
return "Winter" return "Winter"
elif parseSem(self._sem) == "TBA":
return "TBA"
else: else:
return "Spring/Summer" return "Spring/Summer"
@ -147,7 +140,7 @@ class Section(dict):
assert len(self._day) == 2 assert len(self._day) == 2
day = self._day day = self._day
else: else:
day = [day[n:n+2] for n in range(0, len(day)-1, 2)] day = [day[n:n+2] for n in xrange(0, len(day)-1, 2)]
self._date = (day, timeparse(start), timeparse(end)) self._date = (day, timeparse(start), timeparse(end))
@ -238,7 +231,7 @@ class MosReq(object):
# Let the server set some cookies before doing the searching # Let the server set some cookies before doing the searching
cookies = {} cookies = {}
for key, val in s.cookies.items(): for key, val in s.cookies.iteritems():
cookies[key] = val cookies[key] = val
self.cookies = cookies self.cookies = cookies
self.statenum = False self.statenum = False
@ -293,7 +286,7 @@ class MosReq(object):
self.codes_ = list(chain.from_iterable( self.codes_ = list(chain.from_iterable(
map((lambda l: map((lambda l:
self.getCodes(chr(l))), self.getCodes(chr(l))),
range(65, 91)))) xrange(65, 91))))
return self.codes_ return self.codes_
def request(codes, lists, semester): def request(codes, lists, semester):
@ -327,7 +320,7 @@ class CourseInfo(object):
lists = q.Queue() lists = q.Queue()
threads = [] threads = []
thread = None thread = None
for i in range(self.threadcount): for i in xrange(self.threadcount):
thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester)) thread = thd.Thread(group=None, target=request, args=(qcodes, lists, self.semester))
threads.append(thread) threads.append(thread)
thread.start() thread.start()
@ -361,5 +354,5 @@ def allCourses():
if __name__ == "__main__": if __name__ == "__main__":
for course in allCourses(): for course in allCourses():
sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, course.books)) sys.stdout.write("%s, %s, %s, %s\n" % (course.title, course.code, course.dept, list(chain.from_iterable(course.books))))
print(course.sections) print course.sections

8
src/mcmaster/sylla.py

@ -1,7 +1,7 @@
#! /usr/bin/python3 #! /usr/bin/python2
from sys import argv from sys import argv
from itertools import chain, islice, zip_longest from itertools import chain, islice, izip_longest, izip as zip
from re import search, sub from re import search, sub
from functools import total_ordering from functools import total_ordering
from re import sub from re import sub
@ -34,7 +34,7 @@ class Book(object):
def grouper(n, iterable, fillvalue=None): def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n args = [iter(iterable)] * n
return zip_longest(fillvalue=fillvalue, *args) return izip_longest(fillvalue=fillvalue, *args)
searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1" searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1"
@ -70,7 +70,7 @@ def books(dept, code, withPrices):
pricelist = prices(parsed) pricelist = prices(parsed)
for div in parsed.xpath(".//div"): for div in parsed.xpath(".//div"):
if ("id" in div.attrib and if (div.attrib.has_key("id") and
"prodDesc" in div.attrib["id"]): "prodDesc" in div.attrib["id"]):
textbook = div.text_content() textbook = div.text_content()

Loading…
Cancel
Save