diff --git a/deletefb/deletefb.log b/deletefb/deletefb.log new file mode 100644 index 0000000..e69de29 diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 1352da0..72a6e01 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -6,6 +6,7 @@ from datetime import datetime import attr import cattr import json +import typing TIME_FORMAT = "%Y-%m-%d %H:%M:%S" @@ -38,7 +39,10 @@ class Archive: print("Archiving {0}".format(content)) if content.name not in self._bloom_filter: - self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n") + self.archive_file.write(json.dumps(cattr.unstructure(content), + indent=4, + sort_keys=True) + "\n") + self._bloom_filter.add(content.name) return diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 1c1c557..003924c 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -83,9 +83,18 @@ def parse_conversation(driver): date=data_store.get("timestamp") ) -def get_messages(driver, convo): +def get_images(driver): """ - Get all of the messages for a given conversation + Gets all links to images in a messenger conversation + Removes duplicates + """ + for img in set(lxh.fromstring(driver.page_source).xpath("//img")): + yield img.get("src") + +def get_convo(driver, convo): + """ + Get all of the messages/images for a given conversation + Returns a list of messages and a list of image links """ driver.get(convo.url) @@ -113,7 +122,9 @@ def get_messages(driver, convo): except SELENIUM_EXCEPTIONS: continue - return list(parse_conversation(driver)) + messages = list(parse_conversation(driver)) + image_links = list(set(get_images(driver))) + return (messages, image_links) def delete_conversation(driver, convo): """ @@ -122,6 +133,24 @@ def delete_conversation(driver, convo): return +def extract_convo(driver, convo): + """ + Extract messages and image links from a conversation + Return a new Conversation instance + """ + result = get_convo(driver, convo) + + if not result: + return None + + messages, image_links = result + + convo.messages = messages + convo.image_links = image_links + + return convo + + def traverse_conversations(driver, year=None): """ Remove all conversations within a specified range @@ -138,11 +167,11 @@ def traverse_conversations(driver, year=None): if year and convo.date: if convo.date.year == int(year): - convo.messages = get_messages(driver, convo) + extract_convo(driver, convo) archive_convo.archive(convo) # Otherwise we're looking at all convos elif not year: - convo.messages = get_messages(driver, convo) + extract_convo(driver, convo) archive_convo.archive(convo) diff --git a/deletefb/types.py b/deletefb/types.py index 43ee24c..c1a0148 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -39,6 +39,7 @@ class Conversation: name = attr.ib() date : datetime = attr.ib(converter=convert_date) messages = attr.ib(default=[]) + image_links = attr.ib(default=[]) @attr.s class Message: