Browse Source

minor refactoring

master
wes 8 years ago
parent
commit
5a512a6e05
  1. 25
      crawler/classes.py

25
crawler/classes.py

@ -65,7 +65,6 @@ def parse_semester(sem):
except IndexError: except IndexError:
return sem return sem
def timeparse(time): def timeparse(time):
""" """
Parse the time into numbers Parse the time into numbers
@ -96,20 +95,18 @@ class Class(object):
def __iter__(self): def __iter__(self):
return iter((self.title, sec) for sec in self.sections) return iter((self.title, sec) for sec in self.sections)
def hasCode(self): @property
def code(self):
""" """
Heuristic for checking if a course has a code associated with it Heuristic for checking if a course has a code associated with it
Checks if it has more than two words and if they start with uppercase letters Checks if it has more than two words and if they start with uppercase letters
""" """
splitted = self.title.strip().split(" ") splitted = self.title.strip().split(" ")
return ((len(splitted) >= 2) and if ((len(splitted) >= 2) and
(splitted[0].upper() == splitted[0]) and (splitted[0].upper() == splitted[0]) and
(splitted[1].upper() == splitted[1])) (splitted[1].upper() == splitted[1])):
return splitted
@property
def code(self):
if self.hasCode():
return self.title.strip().split(" ")[1].strip()
return False return False
@property @property
@ -194,6 +191,7 @@ class Section(dict):
return (""" return ("""
Time = %s, Location = %s, Instructor = %s, Semester Running = %s Time = %s, Location = %s, Instructor = %s, Semester Running = %s
""" % (self.date, self.loc, self.prof, self.sem)) """ % (self.date, self.loc, self.prof, self.sem))
def __gt__(self, x): def __gt__(self, x):
if isinstance(self.day, list): if isinstance(self.day, list):
raise NotImplementedError raise NotImplementedError
@ -230,12 +228,18 @@ def parseSection(section):
return classinfo return classinfo
def getSectionInfo(table): def getSectionInfo(table):
"""
Extract section information from the parsed course table
"""
trs = table.xpath(".//tr") trs = table.xpath(".//tr")
for tr in trs: for tr in trs:
if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]):
yield parseSection(tr) yield parseSection(tr)
def parseColumns(subject, html): def parseColumns(subject, html):
"""
Extract class columns
"""
parsed = lxh.fromstring(html) parsed = lxh.fromstring(html)
classInfo = (list(getSectionInfo(table)) for table in classInfo = (list(getSectionInfo(table)) for table in
@ -250,6 +254,9 @@ def parseColumns(subject, html):
return list(zip(classNames, classInfo)) return list(zip(classNames, classInfo))
def getCodes(html): def getCodes(html):
"""
Get department course codes
"""
parsed = lxh.fromstring(html) parsed = lxh.fromstring(html)
return (code.text_content().encode("UTF-8") for code in return (code.text_content().encode("UTF-8") for code in

Loading…
Cancel
Save