Use cattrs to serialize conversations in archive

6 years ago · 01647262c3
5 changed files with 43 additions and 31 deletions
--- a/deletefb/deletefb.py
+++ b/deletefb/deletefb.py
@ -4,7 +4,7 @@ from .tools.config import settings
 from .tools.likes import unlike_pages
 from .tools.login import login
 from .tools.wall import delete_posts
-from .tools.conversations import delete_conversations
+from .tools.conversations import traverse_conversations
 from .tools.comments import delete_comments
 import argparse
@ -119,7 +119,7 @@ def run_delete():
        delete_comments(driver, args.profile_url)
    elif args.mode == "conversations":
-        delete_conversations(driver, year=args.year)
+        traverse_conversations(driver, year=args.year)
    else:
        print("Please enter a valid mode")
--- a/deletefb/tools/archive.py
+++ b/deletefb/tools/archive.py
@ -1,13 +1,21 @@
 from .config import settings
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
 import attr
 import cattr
 import json
 TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
 # Used to avoid duplicates in the log
 from pybloom_live import BloomFilter
 cattr.register_unstructure_hook(
    datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT)
 )
 def make_filter():
    return BloomFilter(
        capacity=settings["MAX_POSTS"],
@ -30,7 +38,7 @@ class Archive:
        print("Archiving {0}".format(content))
        if content.name not in self._bloom_filter:
-            self.archive_file.write(json.dumps(attr.asdict(content)) + "\n")
+            self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n")
            self._bloom_filter.add(content.name)
        return
--- a/deletefb/tools/conversations.py
+++ b/deletefb/tools/conversations.py
@ -68,17 +68,9 @@ def get_conversations(driver):
    return conversations
-def get_convo_images(driver):
+def parse_conversation(driver):
    """
-    Gets all links to images in a messenger conversation
+    Extracts all messages in a conversation
    Removes duplicates
    """
    for img in set(lxh.fromstring(driver.page_source).xpath("//img")):
        yield img.get("src")
 def get_convo_messages(driver):
    """
    Gets all messages in a conversation
    """
    for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"):
@ -91,8 +83,10 @@ def get_convo_messages(driver):
                date=data_store.get("timestamp")
              )
-def archive_conversation(driver, convo):
+def get_messages(driver, convo):
-    print(convo)
+    """
    Get all of the messages for a given conversation
    """
    driver.get(convo.url)
    wait = WebDriverWait(driver, 20)
@ -119,12 +113,16 @@ def archive_conversation(driver, convo):
        except SELENIUM_EXCEPTIONS:
            continue
-    #for img in get_convo_images(driver):
+    return list(parse_conversation(driver))
        #print(img)
-    convo.messages = list(get_convo_messages(driver))
+def delete_conversation(driver, convo):
    """
    Deletes a conversation
    """
-def delete_conversations(driver, year=None):
+    return
 def traverse_conversations(driver, year=None):
    """
    Remove all conversations within a specified range
    """
@ -133,15 +131,18 @@ def delete_conversations(driver, year=None):
    convos = get_conversations(driver)
-    for convo in convos:
+    with archiver("conversations") as archive_convo:
-        # If the year is set and there is a date
+        for convo in convos:
-        # Then we want to only look at convos from this year
+            # If the year is set and there is a date
            # Then we want to only look at convos from this year
            if year and convo.date:
                if convo.date.year == int(year):
                    convo.messages = get_messages(driver, convo)
                    archive_convo.archive(convo)
-        if year and convo.date:
+            # Otherwise we're looking at all convos
-            if convo.date.year == int(year):
+            elif not year:
-                archive_conversation(driver, convo)
+                convo.messages = get_messages(driver, convo)
-                print(convo.messages)
+                archive_convo.archive(convo)
        # Otherwise we're looking at all convos
        elif not year:
            archive_conversation(driver, convo)
--- a/deletefb/types.py
+++ b/deletefb/types.py
@ -2,6 +2,8 @@ import attr
 import uuid
 import pendulum
 from datetime import datetime
 def convert_date(text):
    """
    Tries to parse a date into a DateTime instance
@ -35,7 +37,7 @@ class Comment:
 class Conversation:
    url = attr.ib()
    name = attr.ib()
-    date = attr.ib(converter=convert_date)
+    date : datetime = attr.ib(converter=convert_date)
    messages = attr.ib(default=[])
@attr.s
@ -44,7 +46,7 @@ class Message:
    content = attr.ib()
    # Remove the last 3 digits from FB's dates. They are not standard.
-    date = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
+    date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
@attr.s
 class Page:
--- a/setup.py
+++ b/setup.py
@ -25,6 +25,7 @@ setuptools.setup(
        "requests",
        "pybloom-live",
        "attrs",
        "cattrs",
        "lxml",
        "pendulum"
    ],