From 5a512a6e05b0886552bc0bccf6163e6ab31678fa Mon Sep 17 00:00:00 2001 From: wes Date: Wed, 17 May 2017 14:10:47 -0400 Subject: [PATCH] minor refactoring --- crawler/classes.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/crawler/classes.py b/crawler/classes.py index 8d889c1..57c7bb7 100755 --- a/crawler/classes.py +++ b/crawler/classes.py @@ -65,7 +65,6 @@ def parse_semester(sem): except IndexError: return sem - def timeparse(time): """ Parse the time into numbers @@ -96,20 +95,18 @@ class Class(object): def __iter__(self): return iter((self.title, sec) for sec in self.sections) - def hasCode(self): + @property + def code(self): """ Heuristic for checking if a course has a code associated with it Checks if it has more than two words and if they start with uppercase letters """ splitted = self.title.strip().split(" ") - return ((len(splitted) >= 2) and - (splitted[0].upper() == splitted[0]) and - (splitted[1].upper() == splitted[1])) + if ((len(splitted) >= 2) and + (splitted[0].upper() == splitted[0]) and + (splitted[1].upper() == splitted[1])): + return splitted - @property - def code(self): - if self.hasCode(): - return self.title.strip().split(" ")[1].strip() return False @property @@ -194,6 +191,7 @@ class Section(dict): return (""" Time = %s, Location = %s, Instructor = %s, Semester Running = %s """ % (self.date, self.loc, self.prof, self.sem)) + def __gt__(self, x): if isinstance(self.day, list): raise NotImplementedError @@ -230,12 +228,18 @@ def parseSection(section): return classinfo def getSectionInfo(table): + """ + Extract section information from the parsed course table + """ trs = table.xpath(".//tr") for tr in trs: if tr.xpath("@id") and search(r"SSR_CLSRCH", tr.xpath("@id")[0]): yield parseSection(tr) def parseColumns(subject, html): + """ + Extract class columns + """ parsed = lxh.fromstring(html) classInfo = (list(getSectionInfo(table)) for table in @@ -250,6 +254,9 @@ def parseColumns(subject, html): return list(zip(classNames, classInfo)) def getCodes(html): + """ + Get department course codes + """ parsed = lxh.fromstring(html) return (code.text_content().encode("UTF-8") for code in