import praw import yfinance as yf from os import environ from collections import defaultdict from queue import Queue as queue from re import search reddit = praw.Reddit(client_id=environ.get("CLIENT_ID"), client_secret=environ.get("SECRET"), password=environ.get("PASSWORD"), user_agent="testscript by u/weskerfoot", username="weskerfoot") ignored = {"DD", "USA", "USA", "WBS", "FD"} symbols = defaultdict(int) def normalize_symbol(text): """ Try to extract a stock symbol from a word, and return it. """ result = search(r"\$?[A-Z]{2,5}", text) or search(r"\$[A-Z]{2,5}", text) if result and result.group(0).upper() in symbols: sym = result.group(0).upper() if sym.startswith("$"): sym = sym[1:] symbols[sym] += 1 return if result: sym = result.group(0).upper() if sym in ignored and (not sym.startswith("$")): return None if sym.startswith("$"): sym = sym[1:] try: yf.Ticker(sym).info # it's a real symbol symbols[sym] += 1 except: pass return None # use bloom filter to skip seen submissions/comments/etc # store everything in database, store bloom filter in the database too # get stock symbol mentioned in comment -> count number of replies, use that to weight them # store raw numbers for current day, after current day has elapsed, compress it into one row as array of most mentioned stocks in sorted order def submissions(sr): for submission in reddit.subreddit(sr).stream.submissions(): for comment in submission.comments: if not hasattr(comment, "replies"): continue for reply in comment.replies: if hasattr(reply, "body"): yield normalize_symbol(reply.body) for comment in submissions("wallstreetbets"): print(symbols)