TextbookEngine/mcmaster/sylla.py


								#! /usr/bin/python2


								from sys import argv

								from itertools import chain, islice, izip_longest, izip as zip

								from re import search, sub

								from functools import total_ordering

								from re import sub


								import datetime as dt

								import lxml.html as lxh

								import requests


								# Purpose of this module is to download and parse syllabi from various departments

								# In order to be corellated with individual courses


								class Price(object):

								    def __init__(self, amnt, status):

								        self.dollars = float(amnt[1:])

								        self.status = status


								    def __repr__(self):

								        return "$%s %s" % (repr(self.dollars), self.status)


								class Book(object):

								    def __init__(self, title, price):

								        self.title = title

								        self.price = price


								    def __repr__(self):

								        return '["%s", "%s"]' % (self.title, repr(self.price))


								def grouper(n, iterable, fillvalue=None):

								    "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"

								    args = [iter(iterable)] * n

								    return izip_longest(fillvalue=fillvalue, *args)


								searchUrl = "https://campusstore.mcmaster.ca/cgi-mcm/ws/txsub.pl?wsDEPTG1=%s&wsDEPTDESC1=&wsCOURSEG1=%s&crit_cnt=1"


								def normalize(word):

								    if len(word) > 1:

								        return ("%s%s" %

								                (word[0].upper(),

								                "".join(word[1:]).lower()))

								    return word


								def parseAuthor(author):

								    split = author.split(" ")

								    if len(split) <= 1:

								        return author

								    lastname = split[0]

								    firstname = split[1]

								    return "%s %s" % (firstname, lastname)


								def normwords(phrase):

								    words = phrase.split(" ")

								    return " ".join(map(normalize, words))


								def books(dept, code, withPrices):

								    """

								    Snatch me up a book title or three

								    """

								    req = searchUrl % (dept, code)


								    html = requests.get(req).text


								    parsed = lxh.fromstring(html)


								    pricelist = prices(parsed)


								    for div in parsed.xpath(".//div"):

								        if (div.attrib.has_key("id") and

								            "prodDesc" in div.attrib["id"]):


								            textbook = div.text_content()

								            author = sub(r',', '',

								                           "".join(

								                            (div.getparent()

								                            .xpath(".//span[@class='inline']")

								                            [0].text_content()

								                            .split(":")[1:])).strip())

								            price = pricelist.pop()

								            if withPrices:

								                yield (normwords(textbook), normwords(author), repr(price))

								            else:

								                yield (normwords(textbook), normwords(author))


								def prices(html):

								    """

								    Get the prices from a search result page

								    """

								    ps = [

								           p.getparent().text_content().split()[0]

								             for p in html.xpath("//p/input[@type='checkbox']")

								         ]


								    try:

								        amts, stats = zip(*list(reversed(list(grouper(2, ps)))))

								        return map(Price, amts, stats)

								    except ValueError:

								        return []


								def textbookInfo(dept, code, withPrices=False):

								    """

								    Return all the textbooks for a course

								    """

								    return list(books(dept, code, withPrices))


								def humanities():

								    """

								    Download humanities syllabi

								    """

								    return []


								# Example, getting the course info for Personality Theory (PSYCH = Department, 2B03 = Course code)

								# print list(courseInfo("PSYCH", "2B03"))