You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
113 lines
2.9 KiB
113 lines
2.9 KiB
#! /usr/bin/python3
|
|
from functools import reduce
|
|
from operator import or_, and_
|
|
from hashlib import sha1
|
|
from sys import exit, stderr
|
|
from json import loads
|
|
from itertools import chain
|
|
from syslog import syslog
|
|
|
|
import elasticsearch
|
|
from elasticsearch_dsl import FacetedSearch, Search, Q
|
|
from elasticsearch_dsl.aggs import Terms, DateHistogram
|
|
|
|
from textbookExceptions import UnIndexable
|
|
|
|
# Generic instance of elasticsearch right now
|
|
es = elasticsearch.Elasticsearch()
|
|
|
|
def summarize(text):
|
|
splitted = text.split(" ")
|
|
if len(splitted) > 6:
|
|
return " ".join(splitted[0:6]) + ".."
|
|
return text
|
|
|
|
def search(field):
|
|
"""
|
|
Make a match search
|
|
"""
|
|
def s(term):
|
|
q = Q("match",
|
|
**{
|
|
field : term
|
|
})
|
|
return q
|
|
return s
|
|
|
|
def filterSem(term):
|
|
return Q("terms",
|
|
**{
|
|
"sections.sem" : [term]
|
|
})
|
|
|
|
def filterSections(secs):
|
|
"""
|
|
Get rid of tutorial sections
|
|
because they almost always have "Staff" as the instructor
|
|
This is just a heuristic of course
|
|
"""
|
|
filtered = [s for s in secs.sections if "Staff" not in s.prof]
|
|
if len(filtered) > 0:
|
|
return filtered
|
|
return False
|
|
|
|
def search_courses(terms):
|
|
"""
|
|
Run a search for courses
|
|
"""
|
|
syslog(repr(terms))
|
|
|
|
# A list of all the queries we want to run
|
|
qs = [searchers[field](term) for
|
|
field, term in
|
|
terms.items() if
|
|
term and field in searchers]
|
|
|
|
if not qs:
|
|
# No queries = no results
|
|
return []
|
|
|
|
# Reduce joins all of the queries into one query
|
|
# It will search for the conjunction of all of them
|
|
# So that means it cares about each query equally
|
|
q = reduce(and_, qs)
|
|
|
|
s = (Search(using=es, index="course_test")
|
|
.query(q))[0:100] # only return up to 100 results for now
|
|
|
|
results = s.execute()
|
|
syslog(repr(results))
|
|
|
|
filtered = [
|
|
(secs, filterSections(secs)[0].to_dict()) # get rid of tutorials
|
|
for secs in results
|
|
if filterSections(secs)
|
|
]
|
|
results = []
|
|
for obj, secs in filtered:
|
|
secs["title"] = obj.title
|
|
if obj["dept"] not in secs["title"]:
|
|
secs["dept"] = obj.dept
|
|
if "books" in obj:
|
|
secs["books"] = [
|
|
{
|
|
"booktitle" : book["title"],
|
|
"bookauthor" : book["author"],
|
|
"bookprice" : book["price"]
|
|
}
|
|
for book in obj.books
|
|
]
|
|
else:
|
|
secs["books"] = ""
|
|
results.append(secs)
|
|
|
|
return results
|
|
|
|
searchers = {
|
|
"title" : search("title"),
|
|
"loc" : search("loc"),
|
|
"time" : search("time"),
|
|
"prof" : search("prof"),
|
|
"day" : search("day"),
|
|
"sem" : filterSem
|
|
}
|
|
|