import praw
import yfinance as yf
from os import environ
from collections import defaultdict
from queue import Queue as queue
from re import search

reddit = praw.Reddit(client_id=environ.get("CLIENT_ID"),
                     client_secret=environ.get("SECRET"),
                     password=environ.get("PASSWORD"),
                     user_agent="testscript by u/weskerfoot",
                     username="weskerfoot")


ignored = {"DD", "USA", "USA", "WBS", "FD"}
symbols = defaultdict(int)

def normalize_symbol(text):
    """
    Try to extract a stock symbol from a word, and return it.
    """
    result = search(r"\$?[A-Z]{2,5}", text) or search(r"\$[A-Z]{2,5}", text)
    if result and result.group(0).upper() in symbols:
        sym = result.group(0).upper()

        if sym.startswith("$"):
            sym = sym[1:]

        symbols[sym] += 1
        return

    if result:
        sym = result.group(0).upper()
        if sym in ignored and (not sym.startswith("$")):
            return None

        if sym.startswith("$"):
            sym = sym[1:]

        try:
            yf.Ticker(sym).info # it's a real symbol
            symbols[sym] += 1
        except:
            pass
    return None

# use bloom filter to skip seen submissions/comments/etc
# store everything in database, store bloom filter in the database too

# get stock symbol mentioned in comment -> count number of replies, use that to weight them
# store raw numbers for current day, after current day has elapsed, compress it into one row as array of most mentioned stocks in sorted order

def submissions(sr):
    for submission in reddit.subreddit(sr).stream.submissions():
        for comment in submission.comments:
            if not hasattr(comment, "replies"):
                continue
            for reply in comment.replies:
                if hasattr(reply, "body"):
                    yield normalize_symbol(reply.body)

for comment in submissions("wallstreetbets"):
    print(symbols)