scrapes stock symbols from wallstreetbets on reddit
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

104 lines
3.3 KiB

import datetime
import praw
import yfinance as yf
import wsbfin.ingest.model
from collections import defaultdict
from os import environ
from queue import Queue as queue
from re import search
from sqlalchemy import Column, Text, Integer, Date
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
engine = create_engine("sqlite:///stocks.db", echo=True)
session = sessionmaker(bind=engine)()
reddit = praw.Reddit(client_id=environ.get("CLIENT_ID"),
client_secret=environ.get("SECRET"),
password=environ.get("PASSWORD"),
user_agent="testscript by u/weskerfoot",
username="weskerfoot")
# finra bonds
# seekingalpha
# sec filings
# open insider
# moodys
# twitter
# whale wisdom
# stockcharts
# tag with industries!
ignored = {"DD", "USA", "USA", "WSB", "FD", "TV", "GOAT", "YOLO"}
Base = declarative_base()
class Stock(Base):
__tablename__ = "stocks"
id = Column(Integer, primary_key=True)
symbol = Column(Text)
name = Column(Text)
count = Column(Integer)
day = Column(Date)
def __repr__(self):
return "%s, %s, %s" % (self.name, self.symbol, self.day)
def get_stock_info(text):
"""
Try to extract a stock symbol from a word, and return info about it.
"""
result = search(r"\$?[A-Z]{2,5}", text) or search(r"\$[A-Z]{2,5}", text)
if result:
sym = result.group(0).upper()
if sym in ignored and (not sym.startswith("$")):
return None
if sym.startswith("$"):
sym = sym[1:]
try:
return yf.Ticker(sym).info
except KeyError: # XXX what exceptions can yf throw?
return None
return None
# use bloom filter to skip seen submissions/comments/etc
# store everything in database, store bloom filter in the database too
# get stock symbol mentioned in comment -> count number of replies, use that to weight them
# store raw numbers for current day, after current day has elapsed, compress it into one row as array of most mentioned stocks in sorted order
def submissions(sr):
for submission in reddit.subreddit(sr).stream.submissions():
for comment in submission.comments:
if not hasattr(comment, "replies"):
continue
for reply in comment.replies:
if hasattr(reply, "body"):
info = get_stock_info(reply.body)
if not info:
continue
print(info.get("symbol"))
stock = session.query(Stock).filter_by(symbol=info.get("symbol"), day=datetime.datetime.now()).one_or_none()
if stock:
stock.count = stock.count + 1
else:
new_stock = Stock(symbol=info.get("symbol"),
name=info.get("longName"),
count=1,
day=datetime.datetime.now())
session.add(new_stock)
session.commit()
if __name__ == "__main__":
#Stock.__table__.create(engine)
for comment in submissions("wallstreetbets"):
print(comment)