scrapes stock symbols from wallstreetbets on reddit
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

63 lines
1.9 KiB

import praw
import yfinance as yf
from os import environ
from collections import defaultdict
from queue import Queue as queue
from re import search
reddit = praw.Reddit(client_id=environ.get("CLIENT_ID"),
client_secret=environ.get("SECRET"),
password=environ.get("PASSWORD"),
user_agent="testscript by u/weskerfoot",
username="weskerfoot")
ignored = {"DD", "USA", "USA", "WBS", "FD"}
symbols = defaultdict(int)
def normalize_symbol(text):
"""
Try to extract a stock symbol from a word, and return it.
"""
result = search(r"\$?[A-Z]{2,5}", text) or search(r"\$[A-Z]{2,5}", text)
if result and result.group(0).upper() in symbols:
sym = result.group(0).upper()
if sym.startswith("$"):
sym = sym[1:]
symbols[sym] += 1
return
if result:
sym = result.group(0).upper()
if sym in ignored and (not sym.startswith("$")):
return None
if sym.startswith("$"):
sym = sym[1:]
try:
yf.Ticker(sym).info # it's a real symbol
symbols[sym] += 1
except:
pass
return None
# use bloom filter to skip seen submissions/comments/etc
# store everything in database, store bloom filter in the database too
# get stock symbol mentioned in comment -> count number of replies, use that to weight them
# store raw numbers for current day, after current day has elapsed, compress it into one row as array of most mentioned stocks in sorted order
def submissions(sr):
for submission in reddit.subreddit(sr).stream.submissions():
for comment in submission.comments:
if not hasattr(comment, "replies"):
continue
for reply in comment.replies:
if hasattr(reply, "body"):
yield normalize_symbol(reply.body)
for comment in submissions("wallstreetbets"):
print(symbols)