stock_scraper/wsbfin/wsbfin.py


								import praw

								import yfinance as yf

								from os import environ

								from collections import defaultdict

								from queue import Queue as queue

								from re import search


								reddit = praw.Reddit(client_id=environ.get("CLIENT_ID"),

								                     client_secret=environ.get("SECRET"),

								                     password=environ.get("PASSWORD"),

								                     user_agent="testscript by u/weskerfoot",

								                     username="weskerfoot")


								ignored = {"DD", "USA", "USA", "WBS", "FD"}

								symbols = defaultdict(int)


								def normalize_symbol(text):

								    """

								    Try to extract a stock symbol from a word, and return it.

								    """

								    result = search(r"\$?[A-Z]{2,5}", text) or search(r"\$[A-Z]{2,5}", text)

								    if result and result.group(0).upper() in symbols:

								        sym = result.group(0).upper()


								        if sym.startswith("$"):

								            sym = sym[1:]


								        symbols[sym] += 1

								        return


								    if result:

								        sym = result.group(0).upper()

								        if sym in ignored and (not sym.startswith("$")):

								            return None


								        if sym.startswith("$"):

								            sym = sym[1:]


								        try:

								            yf.Ticker(sym).info # it's a real symbol

								            symbols[sym] += 1

								        except:

								            pass

								    return None


								# use bloom filter to skip seen submissions/comments/etc

								# store everything in database, store bloom filter in the database too


								# get stock symbol mentioned in comment -> count number of replies, use that to weight them

								# store raw numbers for current day, after current day has elapsed, compress it into one row as array of most mentioned stocks in sorted order


								def submissions(sr):

								    for submission in reddit.subreddit(sr).stream.submissions():

								        for comment in submission.comments:

								            if not hasattr(comment, "replies"):

								                continue

								            for reply in comment.replies:

								                if hasattr(reply, "body"):

								                    yield normalize_symbol(reply.body)


								for comment in submissions("wallstreetbets"):

								    print(symbols)