From 7697af2481598bc4df0bfa7c0969b5797fe481b6 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sun, 28 Jul 2019 12:22:58 -0400 Subject: [PATCH] Construct Message instances for each message in a convo --- deletefb/tools/conversations.py | 36 ++++++++++++++++++++++++++++++--- deletefb/types.py | 9 +++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 253b281..c6f57fb 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,11 +1,14 @@ from .archive import archiver -from ..types import Conversation +from ..types import Conversation, Message from .common import SELENIUM_EXCEPTIONS, logger, click_button from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains from pendulum import now +from json import loads + +import lxml.html as lxh LOG = logger(__name__) @@ -65,6 +68,28 @@ def get_conversations(driver): return conversations +def get_convo_images(driver): + """ + Gets all links to images in a messenger conversation + Removes duplicates + """ + for img in set(lxh.fromstring(driver.page_source).xpath("//img")): + yield img.get("src") + +def get_convo_messages(driver): + """ + Gets all messages in a conversation + """ + + for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"): + data_store = loads(msg.get("data-store")) + msg_text = msg.text_content() + + yield Message( + name=data_store.get("author"), + content=msg_text, + timestamp=data_store.get("timestamp") + ) def archive_conversation(driver, convo): print(convo) @@ -79,6 +104,7 @@ def archive_conversation(driver, convo): LOG.exception("Could not load more messages") return + # Expand conversation until we've reached the beginning while True: try: see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]") @@ -92,7 +118,11 @@ def archive_conversation(driver, convo): click_button(driver, see_older) except SELENIUM_EXCEPTIONS: continue - driver.find_element_by_xpath("html").save_screenshot("%s.png" % convo.name) + + #for img in get_convo_images(driver): + #print(img) + + convo.messages = list(get_convo_messages(driver)) def delete_conversations(driver, year=None): """ @@ -110,8 +140,8 @@ def delete_conversations(driver, year=None): if year and convo.timestamp: if convo.timestamp.year == int(year): archive_conversation(driver, convo) + print(convo.messages) # Otherwise we're looking at all convos elif not year: archive_conversation(driver, convo) - diff --git a/deletefb/types.py b/deletefb/types.py index b04e6ab..0e589d7 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -36,6 +36,15 @@ class Conversation: url = attr.ib() name = attr.ib() timestamp = attr.ib(converter=convert_timestamp) + messages = attr.ib(default=[]) + +@attr.s +class Message: + name = attr.ib() + content = attr.ib() + + # Remove the last 3 digits from FB's timestamps. They are not standard. + timestamp = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) @attr.s class Page: