diff --git a/deletefb/deletefb.py b/deletefb/deletefb.py index c2f8c2e..21c5f47 100755 --- a/deletefb/deletefb.py +++ b/deletefb/deletefb.py @@ -4,7 +4,7 @@ from .tools.config import settings from .tools.likes import unlike_pages from .tools.login import login from .tools.wall import delete_posts -from .tools.conversations import delete_conversations +from .tools.conversations import traverse_conversations from .tools.comments import delete_comments import argparse @@ -119,7 +119,7 @@ def run_delete(): delete_comments(driver, args.profile_url) elif args.mode == "conversations": - delete_conversations(driver, year=args.year) + traverse_conversations(driver, year=args.year) else: print("Please enter a valid mode") diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 1bc7364..1352da0 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -1,13 +1,21 @@ from .config import settings from contextlib import contextmanager from pathlib import Path +from datetime import datetime import attr +import cattr import json +TIME_FORMAT = "%Y-%m-%d %H:%M:%S" + # Used to avoid duplicates in the log from pybloom_live import BloomFilter +cattr.register_unstructure_hook( + datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT) +) + def make_filter(): return BloomFilter( capacity=settings["MAX_POSTS"], @@ -30,7 +38,7 @@ class Archive: print("Archiving {0}".format(content)) if content.name not in self._bloom_filter: - self.archive_file.write(json.dumps(attr.asdict(content)) + "\n") + self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n") self._bloom_filter.add(content.name) return diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 19d82ab..1c1c557 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -68,17 +68,9 @@ def get_conversations(driver): return conversations -def get_convo_images(driver): +def parse_conversation(driver): """ - Gets all links to images in a messenger conversation - Removes duplicates - """ - for img in set(lxh.fromstring(driver.page_source).xpath("//img")): - yield img.get("src") - -def get_convo_messages(driver): - """ - Gets all messages in a conversation + Extracts all messages in a conversation """ for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"): @@ -91,8 +83,10 @@ def get_convo_messages(driver): date=data_store.get("timestamp") ) -def archive_conversation(driver, convo): - print(convo) +def get_messages(driver, convo): + """ + Get all of the messages for a given conversation + """ driver.get(convo.url) wait = WebDriverWait(driver, 20) @@ -119,12 +113,16 @@ def archive_conversation(driver, convo): except SELENIUM_EXCEPTIONS: continue - #for img in get_convo_images(driver): - #print(img) + return list(parse_conversation(driver)) - convo.messages = list(get_convo_messages(driver)) +def delete_conversation(driver, convo): + """ + Deletes a conversation + """ -def delete_conversations(driver, year=None): + return + +def traverse_conversations(driver, year=None): """ Remove all conversations within a specified range """ @@ -133,15 +131,18 @@ def delete_conversations(driver, year=None): convos = get_conversations(driver) - for convo in convos: - # If the year is set and there is a date - # Then we want to only look at convos from this year + with archiver("conversations") as archive_convo: + for convo in convos: + # If the year is set and there is a date + # Then we want to only look at convos from this year + + if year and convo.date: + if convo.date.year == int(year): + convo.messages = get_messages(driver, convo) + archive_convo.archive(convo) - if year and convo.date: - if convo.date.year == int(year): - archive_conversation(driver, convo) - print(convo.messages) + # Otherwise we're looking at all convos + elif not year: + convo.messages = get_messages(driver, convo) + archive_convo.archive(convo) - # Otherwise we're looking at all convos - elif not year: - archive_conversation(driver, convo) diff --git a/deletefb/types.py b/deletefb/types.py index 16a5c60..43ee24c 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -2,6 +2,8 @@ import attr import uuid import pendulum +from datetime import datetime + def convert_date(text): """ Tries to parse a date into a DateTime instance @@ -35,7 +37,7 @@ class Comment: class Conversation: url = attr.ib() name = attr.ib() - date = attr.ib(converter=convert_date) + date : datetime = attr.ib(converter=convert_date) messages = attr.ib(default=[]) @attr.s @@ -44,7 +46,7 @@ class Message: content = attr.ib() # Remove the last 3 digits from FB's dates. They are not standard. - date = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) + date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) @attr.s class Page: diff --git a/setup.py b/setup.py index 0e5681f..5edd7de 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ setuptools.setup( "requests", "pybloom-live", "attrs", + "cattrs", "lxml", "pendulum" ],