Browse Source

Construct Message instances for each message in a convo

pull/72/head
Wesley Kerfoot 6 years ago
parent
commit
7697af2481
  1. 36
      deletefb/tools/conversations.py
  2. 9
      deletefb/types.py

36
deletefb/tools/conversations.py

@ -1,11 +1,14 @@
from .archive import archiver from .archive import archiver
from ..types import Conversation from ..types import Conversation, Message
from .common import SELENIUM_EXCEPTIONS, logger, click_button from .common import SELENIUM_EXCEPTIONS, logger, click_button
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
from pendulum import now from pendulum import now
from json import loads
import lxml.html as lxh
LOG = logger(__name__) LOG = logger(__name__)
@ -65,6 +68,28 @@ def get_conversations(driver):
return conversations return conversations
def get_convo_images(driver):
"""
Gets all links to images in a messenger conversation
Removes duplicates
"""
for img in set(lxh.fromstring(driver.page_source).xpath("//img")):
yield img.get("src")
def get_convo_messages(driver):
"""
Gets all messages in a conversation
"""
for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"):
data_store = loads(msg.get("data-store"))
msg_text = msg.text_content()
yield Message(
name=data_store.get("author"),
content=msg_text,
timestamp=data_store.get("timestamp")
)
def archive_conversation(driver, convo): def archive_conversation(driver, convo):
print(convo) print(convo)
@ -79,6 +104,7 @@ def archive_conversation(driver, convo):
LOG.exception("Could not load more messages") LOG.exception("Could not load more messages")
return return
# Expand conversation until we've reached the beginning
while True: while True:
try: try:
see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]") see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]")
@ -92,7 +118,11 @@ def archive_conversation(driver, convo):
click_button(driver, see_older) click_button(driver, see_older)
except SELENIUM_EXCEPTIONS: except SELENIUM_EXCEPTIONS:
continue continue
driver.find_element_by_xpath("html").save_screenshot("%s.png" % convo.name)
#for img in get_convo_images(driver):
#print(img)
convo.messages = list(get_convo_messages(driver))
def delete_conversations(driver, year=None): def delete_conversations(driver, year=None):
""" """
@ -110,8 +140,8 @@ def delete_conversations(driver, year=None):
if year and convo.timestamp: if year and convo.timestamp:
if convo.timestamp.year == int(year): if convo.timestamp.year == int(year):
archive_conversation(driver, convo) archive_conversation(driver, convo)
print(convo.messages)
# Otherwise we're looking at all convos # Otherwise we're looking at all convos
elif not year: elif not year:
archive_conversation(driver, convo) archive_conversation(driver, convo)

9
deletefb/types.py

@ -36,6 +36,15 @@ class Conversation:
url = attr.ib() url = attr.ib()
name = attr.ib() name = attr.ib()
timestamp = attr.ib(converter=convert_timestamp) timestamp = attr.ib(converter=convert_timestamp)
messages = attr.ib(default=[])
@attr.s
class Message:
name = attr.ib()
content = attr.ib()
# Remove the last 3 digits from FB's timestamps. They are not standard.
timestamp = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
@attr.s @attr.s
class Page: class Page:

Loading…
Cancel
Save