Browse Source

Move code that downloads courses to separate file

master
wes 9 years ago
parent
commit
487459572f
  1. 62
      src/database.py
  2. 26
      src/goasearch.py

62
src/database.py

@ -1,62 +0,0 @@
#! /usr/bin/python2
from sys import argv
from hashlib import sha1
def truncate(docid):
"""
Truncate a document id to 12 digits
The document ID should be based on a
hash of unique identifiers
"""
return int(str(docid)[0:12])
def createResource(textbookInfo, course, dept, coursecode, docid):
"""
Create a document associated with a course
This document contains any/all resources associated
with that course
example,
{
'books': [],
'dept': 'COLLAB',
'code': '2C03',
'sections': [
{
'prof': 'Lisa Pender',
'sem': '2015/09/08 - 2015/12/08',
'day': 'Mo'
},
{
'prof': 'Staff',
'sem': '2015/09/08 - 2015/12/08',
'day': 'Th'
}
],
'title': 'COLLAB 2C03 - Sociology I'
}
"""
textbooks = textbookInfo(dept.strip(), coursecode.strip())
# We truncate the id so we can have nicer looking URLs
# Since the id will be used to point to the resource page for that course
_id = str(truncate(docid))
fields = {
"_id" : _id,
"textbooks" : textbooks,
"coursetitle" : "%s %s" % (dept.strip(), coursecode.strip()),
"courseinfo" : course
#"Syllabus" : "blah"
}
try:
revisions = list(localdb.revisions(_id))
if not revisions:
return localdb.save(fields)
else:
rev = dict(revisions[0])["_rev"]
fields["_rev"] = rev
return localdb.save(fields)
except ResourceConflict:
print "Resource for %s already exists, not creating a new one" % (docid)

26
src/goasearch.py

@ -1,14 +1,18 @@
#! /usr/bin/python2
from search import indexListing
from textbookExceptions import UnIndexable
from mcmaster.classes import allCourses, classToJSON, indexListing
from itertools import imap
# predictive data
# switch to elasticsearch's prediction
import database
import predictions
class GOASearch(object):
def __init__(self):
return self
try:
print "Trying to create the index if it does not exist already"
createIndex("oersearch")
except Exception as e:
print e
print "Downloading course info"
for c in imap(classToJSON, allCourses()):
try:
print indexListing(c)
except UnIndexable as e:
print e

Loading…
Cancel
Save