From 6e8970b23f0b3a1cc24ef1b8aab4f3077a83a484 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Wed, 3 Jul 2019 19:25:26 -0400 Subject: [PATCH 01/21] Stubs for removing messages and comments, new type for convos --- deletefb/tools/comments.py | 15 +++++++++++++++ deletefb/tools/messages.py | 15 +++++++++++++++ deletefb/types.py | 6 ++++++ 3 files changed, 36 insertions(+) create mode 100644 deletefb/tools/comments.py create mode 100644 deletefb/tools/messages.py diff --git a/deletefb/tools/comments.py b/deletefb/tools/comments.py new file mode 100644 index 0000000..a167845 --- /dev/null +++ b/deletefb/tools/comments.py @@ -0,0 +1,15 @@ +from .archive import archiver +from ..types import Comment +from .common import SELENIUM_EXCEPTIONS, logger, click_button +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +LOG = logger(__name__) + +def delete_comments(driver, profile_url): + """ + Remove all comments on posts + """ + + driver.get("{0}/allactivity?privacy_source=activity_log&category_key=commentscluster".format(profile_url)) diff --git a/deletefb/tools/messages.py b/deletefb/tools/messages.py new file mode 100644 index 0000000..be4a745 --- /dev/null +++ b/deletefb/tools/messages.py @@ -0,0 +1,15 @@ +from .archive import archiver +from ..types import Conversation +from .common import SELENIUM_EXCEPTIONS, logger, click_button +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +LOG = logger(__name__) + +def delete_comments(driver): + """ + Remove all conversations within a specified range + """ + + driver.get("https://www.facebook.com/messages/t/" diff --git a/deletefb/types.py b/deletefb/types.py index a771c67..8cae509 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -23,6 +23,12 @@ class Comment: date = attr.ib(factory=timestamp_now) name = attr.ib(factory=lambda: uuid.uuid4().hex) +@attr.s +class Conversation: + recipient = attr.ib() + last_message_time = attr.ib(factory=timestamp_now) + name = attr.ib() + @attr.s class Page: name = attr.ib() -- 2.30.2 From fbc18058bdf4c0e4e0d5298c915ca9ac47dd3498 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Wed, 3 Jul 2019 19:33:57 -0400 Subject: [PATCH 02/21] Clean up new modules --- deletefb/deletefb.py | 11 ++++++++++- deletefb/tools/{messages.py => conversations.py} | 4 ++-- deletefb/types.py | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) rename deletefb/tools/{messages.py => conversations.py} (81%) diff --git a/deletefb/deletefb.py b/deletefb/deletefb.py index 6688f4c..ec82c95 100755 --- a/deletefb/deletefb.py +++ b/deletefb/deletefb.py @@ -4,6 +4,8 @@ from .tools.config import settings from .tools.likes import unlike_pages from .tools.login import login from .tools.wall import delete_posts +from .tools.conversations import delete_conversations +from .tools.comments import delete_comments import argparse import getpass @@ -21,7 +23,7 @@ def run_delete(): default="wall", dest="mode", type=str, - choices=["wall", "unlike_pages"], + choices=["wall", "unlike_pages", "comments", "conversations"], help="The mode you want to run in. Default is `wall' which deletes wall posts" ) @@ -112,6 +114,13 @@ def run_delete(): elif args.mode == "unlike_pages": unlike_pages(driver, args.profile_url) + + elif args.mode == "comments": + delete_comments(driver, args.profile_url) + + elif args.mode == "conversations": + delete_conversations(driver) + else: print("Please enter a valid mode") sys.exit(1) diff --git a/deletefb/tools/messages.py b/deletefb/tools/conversations.py similarity index 81% rename from deletefb/tools/messages.py rename to deletefb/tools/conversations.py index be4a745..a1870f0 100644 --- a/deletefb/tools/messages.py +++ b/deletefb/tools/conversations.py @@ -7,9 +7,9 @@ from selenium.webdriver.support.ui import WebDriverWait LOG = logger(__name__) -def delete_comments(driver): +def delete_conversations(driver): """ Remove all conversations within a specified range """ - driver.get("https://www.facebook.com/messages/t/" + driver.get("https://www.facebook.com/messages/t/") diff --git a/deletefb/types.py b/deletefb/types.py index 8cae509..44672bb 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -26,8 +26,8 @@ class Comment: @attr.s class Conversation: recipient = attr.ib() - last_message_time = attr.ib(factory=timestamp_now) name = attr.ib() + last_message_time = attr.ib(factory=timestamp_now) @attr.s class Page: -- 2.30.2 From 3939c8642dd1abe498893c08b02613ff072f71a1 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Wed, 3 Jul 2019 20:17:04 -0400 Subject: [PATCH 03/21] (almost) working list of convo URLs --- deletefb/tools/comments.py | 2 ++ deletefb/tools/conversations.py | 19 +++++++++++++++++++ deletefb/tools/wall.py | 1 - 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/deletefb/tools/comments.py b/deletefb/tools/comments.py index a167845..ebf9de7 100644 --- a/deletefb/tools/comments.py +++ b/deletefb/tools/comments.py @@ -13,3 +13,5 @@ def delete_comments(driver, profile_url): """ driver.get("{0}/allactivity?privacy_source=activity_log&category_key=commentscluster".format(profile_url)) + + wait = WebDriverWait(driver, 20) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index a1870f0..cb023ae 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -4,12 +4,31 @@ from .common import SELENIUM_EXCEPTIONS, logger, click_button from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.common.action_chains import ActionChains LOG = logger(__name__) +def get_conversation_list(driver): + """ + Get a list of conversations + """ + + actions = ActionChains(driver) + + convos = driver.find_elements_by_xpath("//ul[@aria-label=\"Conversation list\"]/li") + + for convo in convos: + actions.move_to_element(convo).perform() + yield convo.find_element_by_xpath("//a") + def delete_conversations(driver): """ Remove all conversations within a specified range """ driver.get("https://www.facebook.com/messages/t/") + + wait = WebDriverWait(driver, 20) + + for convo_url in get_conversation_list(driver): + print(convo_url.get_property("data-href")) diff --git a/deletefb/tools/wall.py b/deletefb/tools/wall.py index 890bae0..1d9d232 100644 --- a/deletefb/tools/wall.py +++ b/deletefb/tools/wall.py @@ -42,7 +42,6 @@ def delete_posts(driver, post_content_element = driver.find_element_by_class_name(post_content_sel) post_content_ts = driver.find_element_by_class_name(post_timestamp_sel) - # Archive the post archive_wall_post.archive( Post( -- 2.30.2 From d17919f6752500b425d91ef6435ab47579367317 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Fri, 5 Jul 2019 03:57:08 -0400 Subject: [PATCH 04/21] Gathering conversations --- deletefb/tools/common.py | 16 ++++++++----- deletefb/tools/conversations.py | 40 +++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/deletefb/tools/common.py b/deletefb/tools/common.py index 7f6364a..5c14118 100644 --- a/deletefb/tools/common.py +++ b/deletefb/tools/common.py @@ -2,7 +2,8 @@ from os.path import isfile from selenium.common.exceptions import ( NoSuchElementException, StaleElementReferenceException, - TimeoutException + TimeoutException, + JavascriptException ) import json @@ -19,13 +20,18 @@ SELENIUM_EXCEPTIONS = ( def click_button(driver, el): """ Click a button using Javascript - Args: - driver: seleniumrequests.Chrome Driver instance - Returns: - None """ driver.execute_script("arguments[0].click();", el) +def scroll_to(driver, el): + """ + Scroll an element into view, using JS + """ + try: + driver.execute_script("arguments[0].scrollIntoView();", el) + except SELENIUM_EXCEPTIONS: + return + def logger(name): """ Args: diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index cb023ae..45ebb7b 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,6 +1,6 @@ from .archive import archiver from ..types import Conversation -from .common import SELENIUM_EXCEPTIONS, logger, click_button +from .common import SELENIUM_EXCEPTIONS, logger, scroll_to from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait @@ -8,18 +8,44 @@ from selenium.webdriver.common.action_chains import ActionChains LOG = logger(__name__) -def get_conversation_list(driver): +def get_conversation_list(driver, offset=0): """ Get a list of conversations """ actions = ActionChains(driver) - convos = driver.find_elements_by_xpath("//ul[@aria-label=\"Conversation list\"]/li") + convos = driver.find_elements_by_xpath("//ul[@aria-label=\"Conversation list\"]/li/div/a[@role=\"link\"]") - for convo in convos: + for convo in convos[offset:]: actions.move_to_element(convo).perform() - yield convo.find_element_by_xpath("//a") + yield convo + actions.move_to_element(current_convo).perform() + +def get_all_conversations(driver): + conversation_urls = set() + + current_convo = None + + while True: + l = len(conversation_urls) + + for convo in get_conversation_list(driver, offset=l): + url = convo.get_attribute("data-href") + conversation_urls.add(url) + current_convo = convo + + if current_convo: + scroll_to(driver, current_convo) + + print(l) + print(len(conversation_urls)) + if len(conversation_urls) == l: + # no more conversations left + break + + return list(conversation_urls) + def delete_conversations(driver): """ @@ -30,5 +56,5 @@ def delete_conversations(driver): wait = WebDriverWait(driver, 20) - for convo_url in get_conversation_list(driver): - print(convo_url.get_property("data-href")) + for convo_url in get_all_conversations(driver): + print(convo_url) -- 2.30.2 From 65e6286234750a214f730e3d24606e862a21c51f Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 13 Jul 2019 20:12:26 -0400 Subject: [PATCH 05/21] Iterating through all conversations working! --- deletefb/tools/conversations.py | 59 +++++++++++++++------------------ 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 45ebb7b..e51790e 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,60 +1,53 @@ from .archive import archiver from ..types import Conversation -from .common import SELENIUM_EXCEPTIONS, logger, scroll_to +from .common import SELENIUM_EXCEPTIONS, logger, scroll_to, click_button from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains +from time import sleep LOG = logger(__name__) -def get_conversation_list(driver, offset=0): +def get_conversations(driver): """ Get a list of conversations """ actions = ActionChains(driver) - convos = driver.find_elements_by_xpath("//ul[@aria-label=\"Conversation list\"]/li/div/a[@role=\"link\"]") - - for convo in convos[offset:]: - actions.move_to_element(convo).perform() - yield convo - actions.move_to_element(current_convo).perform() - -def get_all_conversations(driver): - conversation_urls = set() + wait = WebDriverWait(driver, 20) - current_convo = None + try: + wait.until( + EC.presence_of_element_located((By.XPATH, "//div[@id=\"threadlist_rows\"]")) + ) + except SELENIUM_EXCEPTIONS: + LOG.exception("No conversations") + return while True: - l = len(conversation_urls) - - for convo in get_conversation_list(driver, offset=l): - url = convo.get_attribute("data-href") - conversation_urls.add(url) - current_convo = convo - - if current_convo: - scroll_to(driver, current_convo) - - print(l) - print(len(conversation_urls)) - if len(conversation_urls) == l: - # no more conversations left + for convo in driver.find_elements_by_xpath("//a"): + url = convo.get_attribute("href") + if url and "messages/read" in url: + yield url + + try: + next_url = driver.find_element_by_id("see_older_threads").find_element_by_xpath("a").get_attribute("href") + except SELENIUM_EXCEPTIONS: break - - return list(conversation_urls) - + if not next_url: + break + driver.get(next_url) def delete_conversations(driver): """ Remove all conversations within a specified range """ - driver.get("https://www.facebook.com/messages/t/") + driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1") - wait = WebDriverWait(driver, 20) + convos = list(get_conversations(driver)) - for convo_url in get_all_conversations(driver): - print(convo_url) + for convo in convos: + driver.get(convo) -- 2.30.2 From 723d90981d55b99cc01ee1252d65f7a19908ad81 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Tue, 16 Jul 2019 23:24:05 -0400 Subject: [PATCH 06/21] Try parsing conversation timestamps, if they exist --- deletefb/tools/common.py | 5 +++++ deletefb/tools/conversations.py | 15 ++++++++++----- deletefb/types.py | 1 + 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/deletefb/tools/common.py b/deletefb/tools/common.py index 5c14118..301d5f0 100644 --- a/deletefb/tools/common.py +++ b/deletefb/tools/common.py @@ -5,11 +5,13 @@ from selenium.common.exceptions import ( TimeoutException, JavascriptException ) +from arrow.parser import ParserError import json import logging import logging.config import os +import arrow SELENIUM_EXCEPTIONS = ( NoSuchElementException, @@ -32,6 +34,9 @@ def scroll_to(driver, el): except SELENIUM_EXCEPTIONS: return +def parse_ts(text): + return arrow.get(text, "DD/M/YYYY") + def logger(name): """ Args: diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index e51790e..7ea3ee5 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,11 +1,10 @@ from .archive import archiver from ..types import Conversation -from .common import SELENIUM_EXCEPTIONS, logger, scroll_to, click_button +from .common import SELENIUM_EXCEPTIONS, logger, parse_ts, ParserError from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains -from time import sleep LOG = logger(__name__) @@ -30,10 +29,16 @@ def get_conversations(driver): for convo in driver.find_elements_by_xpath("//a"): url = convo.get_attribute("href") if url and "messages/read" in url: - yield url - + try: + print(parse_ts(convo.find_element_by_xpath("../../..//abbr").text)) + except ParserError: + print("Failed to parse timestamp") + continue try: - next_url = driver.find_element_by_id("see_older_threads").find_element_by_xpath("a").get_attribute("href") + next_url = (driver.find_element_by_id("see_older_threads"). + find_element_by_xpath("a"). + get_attribute("href")) + except SELENIUM_EXCEPTIONS: break if not next_url: diff --git a/deletefb/types.py b/deletefb/types.py index 44672bb..429f3ba 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -25,6 +25,7 @@ class Comment: @attr.s class Conversation: + url = attr.ib() recipient = attr.ib() name = attr.ib() last_message_time = attr.ib(factory=timestamp_now) -- 2.30.2 From aa9e3672c348aebc53e301722df76c5f2d7d662e Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 20 Jul 2019 15:06:56 -0400 Subject: [PATCH 07/21] Package up conversations into its own type now --- deletefb/tools/conversations.py | 17 +++++++++++++++-- deletefb/types.py | 3 +-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 7ea3ee5..fb4a992 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -28,12 +28,23 @@ def get_conversations(driver): while True: for convo in driver.find_elements_by_xpath("//a"): url = convo.get_attribute("href") + timestamp = None + if url and "messages/read" in url: try: - print(parse_ts(convo.find_element_by_xpath("../../..//abbr").text)) + timestamp = parse_ts(convo.find_element_by_xpath("../../..//abbr").text) except ParserError: print("Failed to parse timestamp") continue + + conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip() + + assert(conversation_name) + assert(url) + + yield Conversation(url=url, + name=conversation_name, + timestamp=timestamp) try: next_url = (driver.find_element_by_id("see_older_threads"). find_element_by_xpath("a"). @@ -55,4 +66,6 @@ def delete_conversations(driver): convos = list(get_conversations(driver)) for convo in convos: - driver.get(convo) + print(convo.url) + print(convo.name) + print(convo.timestamp) diff --git a/deletefb/types.py b/deletefb/types.py index 429f3ba..3d0c2c4 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -26,9 +26,8 @@ class Comment: @attr.s class Conversation: url = attr.ib() - recipient = attr.ib() name = attr.ib() - last_message_time = attr.ib(factory=timestamp_now) + timestamp = attr.ib(default=None) @attr.s class Page: -- 2.30.2 From 0355ebcc66d2d57da750e8791691b659db32041b Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 20 Jul 2019 16:05:50 -0400 Subject: [PATCH 08/21] Refactor timestamp parsing --- deletefb/tools/common.py | 6 +----- deletefb/tools/conversations.py | 34 +++++++++++++++++++-------------- deletefb/types.py | 24 +++++++++++++---------- requirements.txt | 12 ++++++++++++ setup.py | 3 ++- 5 files changed, 49 insertions(+), 30 deletions(-) diff --git a/deletefb/tools/common.py b/deletefb/tools/common.py index 301d5f0..18c74cd 100644 --- a/deletefb/tools/common.py +++ b/deletefb/tools/common.py @@ -5,13 +5,12 @@ from selenium.common.exceptions import ( TimeoutException, JavascriptException ) -from arrow.parser import ParserError import json import logging import logging.config import os -import arrow +import pendulum SELENIUM_EXCEPTIONS = ( NoSuchElementException, @@ -34,9 +33,6 @@ def scroll_to(driver, el): except SELENIUM_EXCEPTIONS: return -def parse_ts(text): - return arrow.get(text, "DD/M/YYYY") - def logger(name): """ Args: diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index fb4a992..75f13d0 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,6 +1,6 @@ from .archive import archiver from ..types import Conversation -from .common import SELENIUM_EXCEPTIONS, logger, parse_ts, ParserError +from .common import SELENIUM_EXCEPTIONS, logger from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait @@ -25,26 +25,32 @@ def get_conversations(driver): LOG.exception("No conversations") return + # This function *cannot* be a generator + # Otherwise elements will become stale + conversations = [] + while True: for convo in driver.find_elements_by_xpath("//a"): url = convo.get_attribute("href") + timestamp = None if url and "messages/read" in url: - try: - timestamp = parse_ts(convo.find_element_by_xpath("../../..//abbr").text) - except ParserError: - print("Failed to parse timestamp") - continue + timestamp = convo.find_element_by_xpath("../../..//abbr").text conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip() assert(conversation_name) assert(url) - yield Conversation(url=url, - name=conversation_name, - timestamp=timestamp) + conversations.append( + Conversation( + url=url, + timestamp=timestamp, + name=conversation_name + ) + ) + try: next_url = (driver.find_element_by_id("see_older_threads"). find_element_by_xpath("a"). @@ -56,16 +62,16 @@ def get_conversations(driver): break driver.get(next_url) -def delete_conversations(driver): + return conversations + +def delete_conversations(driver, older_than=None): """ Remove all conversations within a specified range """ driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1") - convos = list(get_conversations(driver)) + convos = get_conversations(driver) for convo in convos: - print(convo.url) - print(convo.name) - print(convo.timestamp) + print(convo) diff --git a/deletefb/types.py b/deletefb/types.py index 3d0c2c4..d2d8f7e 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -1,35 +1,39 @@ import attr import uuid -import datetime +import pendulum -def timestamp_now(): - """ - Returns: a timestamp for this instant, in ISO 8601 format - """ - return datetime.datetime.isoformat(datetime.datetime.now()) +def convert_timestamp(text): + try: + return pendulum.from_format(text, "DD/M/YYYY") + except ValueError: + try: + return (pendulum.from_format(text, "DD MMM") + .set(year=pendulum.now().year)) + except ValueError: + return None # Data type definitions of posts and comments @attr.s class Post: content = attr.ib() comments = attr.ib(default=[]) - date = attr.ib(factory=timestamp_now) + date = attr.ib(factory=pendulum.now) name = attr.ib(factory=lambda: uuid.uuid4().hex) @attr.s class Comment: commenter = attr.ib() content = attr.ib() - date = attr.ib(factory=timestamp_now) + date = attr.ib(factory=pendulum.now) name = attr.ib(factory=lambda: uuid.uuid4().hex) @attr.s class Conversation: url = attr.ib() name = attr.ib() - timestamp = attr.ib(default=None) + timestamp = attr.ib(converter=convert_timestamp) @attr.s class Page: name = attr.ib() - date = attr.ib(factory=timestamp_now) + date = attr.ib(factory=pendulum.now) diff --git a/requirements.txt b/requirements.txt index 0780145..eb788e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,25 @@ attrs==19.1.0 bitarray==0.9.3 +bleach==3.1.0 certifi==2018.11.29 chardet==3.0.4 +docutils==0.14 idna==2.8 +pendulum==2.0.5 +pkginfo==1.5.0.1 pybloom-live==3.0.0 +Pygments==2.4.2 +python-dateutil==2.8.0 +pytzdata==2019.2 +readme-renderer==24.0 requests==2.22.0 requests-file==1.4.3 +requests-toolbelt==0.9.1 selenium==3.141.0 selenium-requests==1.3 six==1.12.0 tldextract==2.2.0 +tqdm==4.32.2 +twine==1.13.0 urllib3==1.25.2 +webencodings==0.5.1 diff --git a/setup.py b/setup.py index a9b692d..fc1cef0 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,8 @@ setuptools.setup( "selenium-requests", "requests", "pybloom-live", - "attrs" + "attrs", + "pendulum" ], classifiers= [ "Programming Language :: Python :: 3", -- 2.30.2 From b74dd81e0d15ecfabdcaed56505671379feba45e Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 20 Jul 2019 17:05:00 -0400 Subject: [PATCH 09/21] Support for filtering conversations by year --- deletefb/deletefb.py | 6 +++--- deletefb/tools/conversations.py | 15 +++++++++++++-- deletefb/types.py | 4 ++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/deletefb/deletefb.py b/deletefb/deletefb.py index ec82c95..c2f8c2e 100755 --- a/deletefb/deletefb.py +++ b/deletefb/deletefb.py @@ -93,8 +93,8 @@ def run_delete(): settings["ARCHIVE"] = not args.archive_off - if args.year and args.mode != "wall": - parser.error("The --year option is only supported in wall mode") + if args.year and args.mode not in ("wall", "conversations"): + parser.error("The --year option is not supported in this mode") args_user_password = args.password or getpass.getpass('Enter your password: ') @@ -119,7 +119,7 @@ def run_delete(): delete_comments(driver, args.profile_url) elif args.mode == "conversations": - delete_conversations(driver) + delete_conversations(driver, year=args.year) else: print("Please enter a valid mode") diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 75f13d0..4f39a38 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -5,6 +5,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains +from pendulum import now LOG = logger(__name__) @@ -64,7 +65,7 @@ def get_conversations(driver): return conversations -def delete_conversations(driver, older_than=None): +def delete_conversations(driver, year=None): """ Remove all conversations within a specified range """ @@ -74,4 +75,14 @@ def delete_conversations(driver, older_than=None): convos = get_conversations(driver) for convo in convos: - print(convo) + # If the year is set and there is a timestamp + # Then we want to only look at convos from this year + + if year and convo.timestamp: + if convo.timestamp.year == int(year): + print(convo) + + # Otherwise we're looking at all convos + elif not year: + print(convo) + diff --git a/deletefb/types.py b/deletefb/types.py index d2d8f7e..b04e6ab 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -3,6 +3,10 @@ import uuid import pendulum def convert_timestamp(text): + """ + Tries to parse a timestamp into a DateTime instance + Returns `None` if it cannot be parsed + """ try: return pendulum.from_format(text, "DD/M/YYYY") except ValueError: -- 2.30.2 From 015f57a17f59ef617217c94992247bde8da6ddcc Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 27 Jul 2019 13:23:15 -0400 Subject: [PATCH 10/21] Can now get entire conversations --- deletefb/tools/conversations.py | 35 ++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 4f39a38..253b281 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,6 +1,6 @@ from .archive import archiver from ..types import Conversation -from .common import SELENIUM_EXCEPTIONS, logger +from .common import SELENIUM_EXCEPTIONS, logger, click_button from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait @@ -65,6 +65,35 @@ def get_conversations(driver): return conversations + +def archive_conversation(driver, convo): + print(convo) + driver.get(convo.url) + + wait = WebDriverWait(driver, 20) + try: + wait.until( + EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'See Older Messages')]")) + ) + except SELENIUM_EXCEPTIONS: + LOG.exception("Could not load more messages") + return + + while True: + try: + see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]") + except SELENIUM_EXCEPTIONS: + break + + if not see_older: + break + + try: + click_button(driver, see_older) + except SELENIUM_EXCEPTIONS: + continue + driver.find_element_by_xpath("html").save_screenshot("%s.png" % convo.name) + def delete_conversations(driver, year=None): """ Remove all conversations within a specified range @@ -80,9 +109,9 @@ def delete_conversations(driver, year=None): if year and convo.timestamp: if convo.timestamp.year == int(year): - print(convo) + archive_conversation(driver, convo) # Otherwise we're looking at all convos elif not year: - print(convo) + archive_conversation(driver, convo) -- 2.30.2 From d3064257e0898b4c4336cf8a61d1320122a9e7e0 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 27 Jul 2019 13:37:10 -0400 Subject: [PATCH 11/21] Depend on lxml --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index eb788e5..24001f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ certifi==2018.11.29 chardet==3.0.4 docutils==0.14 idna==2.8 +lxml==4.4.0 pendulum==2.0.5 pkginfo==1.5.0.1 pybloom-live==3.0.0 diff --git a/setup.py b/setup.py index fc1cef0..0e5681f 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ setuptools.setup( "requests", "pybloom-live", "attrs", + "lxml", "pendulum" ], classifiers= [ -- 2.30.2 From 7697af2481598bc4df0bfa7c0969b5797fe481b6 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sun, 28 Jul 2019 12:22:58 -0400 Subject: [PATCH 12/21] Construct Message instances for each message in a convo --- deletefb/tools/conversations.py | 36 ++++++++++++++++++++++++++++++--- deletefb/types.py | 9 +++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 253b281..c6f57fb 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,11 +1,14 @@ from .archive import archiver -from ..types import Conversation +from ..types import Conversation, Message from .common import SELENIUM_EXCEPTIONS, logger, click_button from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains from pendulum import now +from json import loads + +import lxml.html as lxh LOG = logger(__name__) @@ -65,6 +68,28 @@ def get_conversations(driver): return conversations +def get_convo_images(driver): + """ + Gets all links to images in a messenger conversation + Removes duplicates + """ + for img in set(lxh.fromstring(driver.page_source).xpath("//img")): + yield img.get("src") + +def get_convo_messages(driver): + """ + Gets all messages in a conversation + """ + + for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"): + data_store = loads(msg.get("data-store")) + msg_text = msg.text_content() + + yield Message( + name=data_store.get("author"), + content=msg_text, + timestamp=data_store.get("timestamp") + ) def archive_conversation(driver, convo): print(convo) @@ -79,6 +104,7 @@ def archive_conversation(driver, convo): LOG.exception("Could not load more messages") return + # Expand conversation until we've reached the beginning while True: try: see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]") @@ -92,7 +118,11 @@ def archive_conversation(driver, convo): click_button(driver, see_older) except SELENIUM_EXCEPTIONS: continue - driver.find_element_by_xpath("html").save_screenshot("%s.png" % convo.name) + + #for img in get_convo_images(driver): + #print(img) + + convo.messages = list(get_convo_messages(driver)) def delete_conversations(driver, year=None): """ @@ -110,8 +140,8 @@ def delete_conversations(driver, year=None): if year and convo.timestamp: if convo.timestamp.year == int(year): archive_conversation(driver, convo) + print(convo.messages) # Otherwise we're looking at all convos elif not year: archive_conversation(driver, convo) - diff --git a/deletefb/types.py b/deletefb/types.py index b04e6ab..0e589d7 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -36,6 +36,15 @@ class Conversation: url = attr.ib() name = attr.ib() timestamp = attr.ib(converter=convert_timestamp) + messages = attr.ib(default=[]) + +@attr.s +class Message: + name = attr.ib() + content = attr.ib() + + # Remove the last 3 digits from FB's timestamps. They are not standard. + timestamp = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) @attr.s class Page: -- 2.30.2 From b2e4a92e82e3a98288bac766ffd3f3e945d99112 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sun, 28 Jul 2019 19:01:03 -0400 Subject: [PATCH 13/21] Rename `timestamp` to `date` to be consistent --- deletefb/tools/conversations.py | 14 +++++++------- deletefb/types.py | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index c6f57fb..19d82ab 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -37,11 +37,11 @@ def get_conversations(driver): for convo in driver.find_elements_by_xpath("//a"): url = convo.get_attribute("href") - timestamp = None + date = None if url and "messages/read" in url: - timestamp = convo.find_element_by_xpath("../../..//abbr").text + date = convo.find_element_by_xpath("../../..//abbr").text conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip() assert(conversation_name) @@ -50,7 +50,7 @@ def get_conversations(driver): conversations.append( Conversation( url=url, - timestamp=timestamp, + date=date, name=conversation_name ) ) @@ -88,7 +88,7 @@ def get_convo_messages(driver): yield Message( name=data_store.get("author"), content=msg_text, - timestamp=data_store.get("timestamp") + date=data_store.get("timestamp") ) def archive_conversation(driver, convo): @@ -134,11 +134,11 @@ def delete_conversations(driver, year=None): convos = get_conversations(driver) for convo in convos: - # If the year is set and there is a timestamp + # If the year is set and there is a date # Then we want to only look at convos from this year - if year and convo.timestamp: - if convo.timestamp.year == int(year): + if year and convo.date: + if convo.date.year == int(year): archive_conversation(driver, convo) print(convo.messages) diff --git a/deletefb/types.py b/deletefb/types.py index 0e589d7..16a5c60 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -2,9 +2,9 @@ import attr import uuid import pendulum -def convert_timestamp(text): +def convert_date(text): """ - Tries to parse a timestamp into a DateTime instance + Tries to parse a date into a DateTime instance Returns `None` if it cannot be parsed """ try: @@ -35,7 +35,7 @@ class Comment: class Conversation: url = attr.ib() name = attr.ib() - timestamp = attr.ib(converter=convert_timestamp) + date = attr.ib(converter=convert_date) messages = attr.ib(default=[]) @attr.s @@ -43,8 +43,8 @@ class Message: name = attr.ib() content = attr.ib() - # Remove the last 3 digits from FB's timestamps. They are not standard. - timestamp = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) + # Remove the last 3 digits from FB's dates. They are not standard. + date = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) @attr.s class Page: -- 2.30.2 From 01647262c31cbeac01b92032fa83d4f6019e7649 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Mon, 29 Jul 2019 00:39:06 -0400 Subject: [PATCH 14/21] Use cattrs to serialize conversations in archive --- deletefb/deletefb.py | 4 +-- deletefb/tools/archive.py | 10 ++++++- deletefb/tools/conversations.py | 53 +++++++++++++++++---------------- deletefb/types.py | 6 ++-- setup.py | 1 + 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/deletefb/deletefb.py b/deletefb/deletefb.py index c2f8c2e..21c5f47 100755 --- a/deletefb/deletefb.py +++ b/deletefb/deletefb.py @@ -4,7 +4,7 @@ from .tools.config import settings from .tools.likes import unlike_pages from .tools.login import login from .tools.wall import delete_posts -from .tools.conversations import delete_conversations +from .tools.conversations import traverse_conversations from .tools.comments import delete_comments import argparse @@ -119,7 +119,7 @@ def run_delete(): delete_comments(driver, args.profile_url) elif args.mode == "conversations": - delete_conversations(driver, year=args.year) + traverse_conversations(driver, year=args.year) else: print("Please enter a valid mode") diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 1bc7364..1352da0 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -1,13 +1,21 @@ from .config import settings from contextlib import contextmanager from pathlib import Path +from datetime import datetime import attr +import cattr import json +TIME_FORMAT = "%Y-%m-%d %H:%M:%S" + # Used to avoid duplicates in the log from pybloom_live import BloomFilter +cattr.register_unstructure_hook( + datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT) +) + def make_filter(): return BloomFilter( capacity=settings["MAX_POSTS"], @@ -30,7 +38,7 @@ class Archive: print("Archiving {0}".format(content)) if content.name not in self._bloom_filter: - self.archive_file.write(json.dumps(attr.asdict(content)) + "\n") + self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n") self._bloom_filter.add(content.name) return diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 19d82ab..1c1c557 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -68,17 +68,9 @@ def get_conversations(driver): return conversations -def get_convo_images(driver): +def parse_conversation(driver): """ - Gets all links to images in a messenger conversation - Removes duplicates - """ - for img in set(lxh.fromstring(driver.page_source).xpath("//img")): - yield img.get("src") - -def get_convo_messages(driver): - """ - Gets all messages in a conversation + Extracts all messages in a conversation """ for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"): @@ -91,8 +83,10 @@ def get_convo_messages(driver): date=data_store.get("timestamp") ) -def archive_conversation(driver, convo): - print(convo) +def get_messages(driver, convo): + """ + Get all of the messages for a given conversation + """ driver.get(convo.url) wait = WebDriverWait(driver, 20) @@ -119,12 +113,16 @@ def archive_conversation(driver, convo): except SELENIUM_EXCEPTIONS: continue - #for img in get_convo_images(driver): - #print(img) + return list(parse_conversation(driver)) - convo.messages = list(get_convo_messages(driver)) +def delete_conversation(driver, convo): + """ + Deletes a conversation + """ -def delete_conversations(driver, year=None): + return + +def traverse_conversations(driver, year=None): """ Remove all conversations within a specified range """ @@ -133,15 +131,18 @@ def delete_conversations(driver, year=None): convos = get_conversations(driver) - for convo in convos: - # If the year is set and there is a date - # Then we want to only look at convos from this year + with archiver("conversations") as archive_convo: + for convo in convos: + # If the year is set and there is a date + # Then we want to only look at convos from this year + + if year and convo.date: + if convo.date.year == int(year): + convo.messages = get_messages(driver, convo) + archive_convo.archive(convo) - if year and convo.date: - if convo.date.year == int(year): - archive_conversation(driver, convo) - print(convo.messages) + # Otherwise we're looking at all convos + elif not year: + convo.messages = get_messages(driver, convo) + archive_convo.archive(convo) - # Otherwise we're looking at all convos - elif not year: - archive_conversation(driver, convo) diff --git a/deletefb/types.py b/deletefb/types.py index 16a5c60..43ee24c 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -2,6 +2,8 @@ import attr import uuid import pendulum +from datetime import datetime + def convert_date(text): """ Tries to parse a date into a DateTime instance @@ -35,7 +37,7 @@ class Comment: class Conversation: url = attr.ib() name = attr.ib() - date = attr.ib(converter=convert_date) + date : datetime = attr.ib(converter=convert_date) messages = attr.ib(default=[]) @attr.s @@ -44,7 +46,7 @@ class Message: content = attr.ib() # Remove the last 3 digits from FB's dates. They are not standard. - date = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) + date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) @attr.s class Page: diff --git a/setup.py b/setup.py index 0e5681f..5edd7de 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ setuptools.setup( "requests", "pybloom-live", "attrs", + "cattrs", "lxml", "pendulum" ], -- 2.30.2 From 3b46b987636d4b0d31d14e85ccc1283f082221a1 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Fri, 2 Aug 2019 01:46:39 -0400 Subject: [PATCH 15/21] Cleaning up README --- README.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 856e1f3..f847b48 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Personally, I did this so I would feel less attached to my Facebook profile ## Installation You have several options to run it. -1) Install from PyPI with `pip3 install --user delete-facebook-posts` +1) Install from PyPI with `pip3 install --user delete-facebook-posts` (recommended) 2) Clone this repo and run `pip3 install --user .` or do `pip3 install --user git+https://github.com/weskerfoot/DeleteFB.git` 3) Set up a Python virtualenv, activate it, and run `pip3 install -r requirements.txt`, then you can just run `python -m deletefb.deletefb` in the DeleteFB directory. @@ -62,14 +62,7 @@ git+https://github.com/weskerfoot/DeleteFB.git` * You may also pass in a code by using the `-F` argument, e.g. `-F 111111`. ## Delete By Year -* The tool supports passing the `--year` flag in order to delete wall posts by - year. E.g. `-Y 2010` would delete posts from the year 2010. It is incompatible with any mode other than `wall`. - -## Unlike Pages -* You may use `-M unlike_pages` to unlike all of your pages. The names of the - pages will be archived (unless archival is turned off), and this option - conflicts with the year option. This will only unlike your *pages* that you - have liked. It will *not* unlike anything else (like books or movies). +* The tool supports passing the `--year` flag in order to delete/archive by year. E.g. `-Y 2010` would only affect posts from 2010. ## Archival * The tool will archive everything being deleted by default in `.log` files. -- 2.30.2 From 9ea47f3e46a5d94e4513c0f72bdf246254e79c2b Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Fri, 2 Aug 2019 01:56:40 -0400 Subject: [PATCH 16/21] Add contribution guidelines --- CONTRIBUTING.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ce5fde5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,13 @@ +### How to contribute + +## Dependencies +If you are adding any new dependencies, please make sure that both `requirements.txt` and `setup.py` have been updated. Please read [this](https://caremad.io/posts/2013/07/setup-vs-requirement/) if you are confused about the difference between `requirements.txt` and the `install_requires` section. + +## Virtualenv +Always develop with virtualenv, as well as test with `pip install --user .`. This helps make sure implicit dependencies aren't accidentally introduced, and makes sure the average user will be more likely to run it without issues. + +## Pull requests +Feel free to make a pull request! Make sure to give a brief overview of what you did, and why you think it is useful. If you are fixing a specific bug or resolving an issue, then make sure to reference it in your PR. + +## Coding style +Try to be consistent with the existing codebase as much as possible. Things should be modularized. Don't repeat yourself if possible, but don't add needless complexity. Straightforward is often better than clever and optimized. -- 2.30.2 From 5377b48850997a5c852f7e3f0171fcb14cbe5d17 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Fri, 2 Aug 2019 02:32:29 -0400 Subject: [PATCH 17/21] Prettify archives, log image links in convos --- deletefb/deletefb.log | 0 deletefb/tools/archive.py | 6 ++++- deletefb/tools/conversations.py | 39 ++++++++++++++++++++++++++++----- deletefb/types.py | 1 + 4 files changed, 40 insertions(+), 6 deletions(-) create mode 100644 deletefb/deletefb.log diff --git a/deletefb/deletefb.log b/deletefb/deletefb.log new file mode 100644 index 0000000..e69de29 diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 1352da0..72a6e01 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -6,6 +6,7 @@ from datetime import datetime import attr import cattr import json +import typing TIME_FORMAT = "%Y-%m-%d %H:%M:%S" @@ -38,7 +39,10 @@ class Archive: print("Archiving {0}".format(content)) if content.name not in self._bloom_filter: - self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n") + self.archive_file.write(json.dumps(cattr.unstructure(content), + indent=4, + sort_keys=True) + "\n") + self._bloom_filter.add(content.name) return diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 1c1c557..003924c 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -83,9 +83,18 @@ def parse_conversation(driver): date=data_store.get("timestamp") ) -def get_messages(driver, convo): +def get_images(driver): """ - Get all of the messages for a given conversation + Gets all links to images in a messenger conversation + Removes duplicates + """ + for img in set(lxh.fromstring(driver.page_source).xpath("//img")): + yield img.get("src") + +def get_convo(driver, convo): + """ + Get all of the messages/images for a given conversation + Returns a list of messages and a list of image links """ driver.get(convo.url) @@ -113,7 +122,9 @@ def get_messages(driver, convo): except SELENIUM_EXCEPTIONS: continue - return list(parse_conversation(driver)) + messages = list(parse_conversation(driver)) + image_links = list(set(get_images(driver))) + return (messages, image_links) def delete_conversation(driver, convo): """ @@ -122,6 +133,24 @@ def delete_conversation(driver, convo): return +def extract_convo(driver, convo): + """ + Extract messages and image links from a conversation + Return a new Conversation instance + """ + result = get_convo(driver, convo) + + if not result: + return None + + messages, image_links = result + + convo.messages = messages + convo.image_links = image_links + + return convo + + def traverse_conversations(driver, year=None): """ Remove all conversations within a specified range @@ -138,11 +167,11 @@ def traverse_conversations(driver, year=None): if year and convo.date: if convo.date.year == int(year): - convo.messages = get_messages(driver, convo) + extract_convo(driver, convo) archive_convo.archive(convo) # Otherwise we're looking at all convos elif not year: - convo.messages = get_messages(driver, convo) + extract_convo(driver, convo) archive_convo.archive(convo) diff --git a/deletefb/types.py b/deletefb/types.py index 43ee24c..c1a0148 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -39,6 +39,7 @@ class Conversation: name = attr.ib() date : datetime = attr.ib(converter=convert_date) messages = attr.ib(default=[]) + image_links = attr.ib(default=[]) @attr.s class Message: -- 2.30.2 From 81262fe4d7f5e760817e29ebb01738f2e6135827 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Fri, 2 Aug 2019 03:16:41 -0400 Subject: [PATCH 18/21] Add timestamps to archive files --- deletefb/tools/archive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 72a6e01..052a05a 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -2,6 +2,7 @@ from .config import settings from contextlib import contextmanager from pathlib import Path from datetime import datetime +from time import time import attr import cattr @@ -36,7 +37,7 @@ class Archive: """ Archive an object """ - print("Archiving {0}".format(content)) + print("Archiving content") if content.name not in self._bloom_filter: self.archive_file.write(json.dumps(cattr.unstructure(content), @@ -50,7 +51,7 @@ class Archive: def archiver(archive_type): archive_file = open( - (Path(".") / Path(archive_type).name).with_suffix(".log"), + (Path(".") / Path(archive_type).name).with_suffix(".log.{0}".format(time())), mode="ta", buffering=1 ) -- 2.30.2 From e1c7e822f3b0e94ba036d10f7d6ce3921931bfee Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sat, 3 Aug 2019 23:11:37 -0400 Subject: [PATCH 19/21] Refactoring, almost support deleting --- deletefb/tools/archive.py | 4 +++- deletefb/tools/common.py | 14 ++++++++++++ deletefb/tools/conversations.py | 40 +++++++++++++-------------------- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 052a05a..1fca7c3 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -37,7 +37,9 @@ class Archive: """ Archive an object """ - print("Archiving content") + + if hasattr(content, 'name'): + print("Archiving {0}".format(content.name)) if content.name not in self._bloom_filter: self.archive_file.write(json.dumps(cattr.unstructure(content), diff --git a/deletefb/tools/common.py b/deletefb/tools/common.py index 18c74cd..de0d679 100644 --- a/deletefb/tools/common.py +++ b/deletefb/tools/common.py @@ -1,4 +1,7 @@ from os.path import isfile +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By from selenium.common.exceptions import ( NoSuchElementException, StaleElementReferenceException, @@ -52,6 +55,17 @@ def logger(name): logging.config.dictConfig(config["logging"]) return logging.getLogger(name) + +def wait_xpath(driver, expr): + """ + Takes an XPath expression, and waits at most 20 seconds until it exists + """ + wait = WebDriverWait(driver, 20) + try: + wait.until(EC.presence_of_element_located((By.XPATH, expr))) + except SELENIUM_EXCEPTIONS: + return + NO_CHROME_DRIVER = """ You need to install the chromedriver for Selenium\n Please see this link https://github.com/weskerfoot/DeleteFB#how-to-use-it\n diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 003924c..d94923d 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,12 +1,10 @@ from .archive import archiver from ..types import Conversation, Message -from .common import SELENIUM_EXCEPTIONS, logger, click_button -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait +from .common import SELENIUM_EXCEPTIONS, logger, click_button, wait_xpath from selenium.webdriver.common.action_chains import ActionChains from pendulum import now from json import loads +from time import sleep import lxml.html as lxh @@ -17,17 +15,7 @@ def get_conversations(driver): Get a list of conversations """ - actions = ActionChains(driver) - - wait = WebDriverWait(driver, 20) - - try: - wait.until( - EC.presence_of_element_located((By.XPATH, "//div[@id=\"threadlist_rows\"]")) - ) - except SELENIUM_EXCEPTIONS: - LOG.exception("No conversations") - return + wait_xpath(driver, "//div[@id=\"threadlist_rows\"]") # This function *cannot* be a generator # Otherwise elements will become stale @@ -98,14 +86,7 @@ def get_convo(driver, convo): """ driver.get(convo.url) - wait = WebDriverWait(driver, 20) - try: - wait.until( - EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'See Older Messages')]")) - ) - except SELENIUM_EXCEPTIONS: - LOG.exception("Could not load more messages") - return + wait_xpath(driver, "//*[contains(text(), 'See Older Messages')]") # Expand conversation until we've reached the beginning while True: @@ -131,6 +112,12 @@ def delete_conversation(driver, convo): Deletes a conversation """ + actions = ActionChains(driver) + + delete_button = driver.find_element_by_xpath("//select/option[contains(text(), 'Delete')]") + + actions.move_to_element(delete_button).click().perform() + return def extract_convo(driver, convo): @@ -150,8 +137,7 @@ def extract_convo(driver, convo): return convo - -def traverse_conversations(driver, year=None): +def traverse_conversations(driver, year=None, delete=False): """ Remove all conversations within a specified range """ @@ -169,9 +155,13 @@ def traverse_conversations(driver, year=None): if convo.date.year == int(year): extract_convo(driver, convo) archive_convo.archive(convo) + if delete: + delete_conversation(driver, convo) # Otherwise we're looking at all convos elif not year: extract_convo(driver, convo) archive_convo.archive(convo) + if delete: + delete_conversation(driver, convo) -- 2.30.2 From c91bec3367f52e3dbbf931a007c99a4a8f522823 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sun, 4 Aug 2019 16:25:22 -0400 Subject: [PATCH 20/21] Conversation deletion almost working --- deletefb/tools/conversations.py | 34 +++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index d94923d..4c7c638 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -1,7 +1,9 @@ from .archive import archiver from ..types import Conversation, Message from .common import SELENIUM_EXCEPTIONS, logger, click_button, wait_xpath +from .config import settings from selenium.webdriver.common.action_chains import ActionChains +from selenium.webdriver.support.ui import Select from pendulum import now from json import loads from time import sleep @@ -114,10 +116,22 @@ def delete_conversation(driver, convo): actions = ActionChains(driver) - delete_button = driver.find_element_by_xpath("//select/option[contains(text(), 'Delete')]") + menu_select = Select(driver.find_element_by_xpath("//select/option[contains(text(), 'Delete')]/..")) + + for i, option in enumerate(menu_select.options): + print(option.text) + if option.text.strip() == "Delete": + menu_select.select_by_index(i) + + + wait_xpath(driver, "//h2[contains(text(), 'Delete conversation')]") + + delete_button = driver.find_element_by_xpath("//a[contains(text(), 'Delete')][@role='button']") actions.move_to_element(delete_button).click().perform() + sleep(10000) + return def extract_convo(driver, convo): @@ -137,7 +151,7 @@ def extract_convo(driver, convo): return convo -def traverse_conversations(driver, year=None, delete=False): +def traverse_conversations(driver, year=None): """ Remove all conversations within a specified range """ @@ -154,14 +168,18 @@ def traverse_conversations(driver, year=None, delete=False): if year and convo.date: if convo.date.year == int(year): extract_convo(driver, convo) - archive_convo.archive(convo) - if delete: - delete_conversation(driver, convo) + + if settings["ARCHIVE"]: + archive_convo.archive(convo) + + delete_conversation(driver, convo) # Otherwise we're looking at all convos elif not year: extract_convo(driver, convo) - archive_convo.archive(convo) - if delete: - delete_conversation(driver, convo) + + if settings["ARCHIVE"]: + archive_convo.archive(convo) + + delete_conversation(driver, convo) -- 2.30.2 From 06da87fba61f78ca67abadcd9230f2982c255ec1 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Sun, 4 Aug 2019 19:44:31 -0400 Subject: [PATCH 21/21] Deletion of conversations working --- deletefb/tools/conversations.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py index 4c7c638..bcbd089 100644 --- a/deletefb/tools/conversations.py +++ b/deletefb/tools/conversations.py @@ -6,7 +6,6 @@ from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.ui import Select from pendulum import now from json import loads -from time import sleep import lxml.html as lxh @@ -119,19 +118,14 @@ def delete_conversation(driver, convo): menu_select = Select(driver.find_element_by_xpath("//select/option[contains(text(), 'Delete')]/..")) for i, option in enumerate(menu_select.options): - print(option.text) if option.text.strip() == "Delete": menu_select.select_by_index(i) - + break wait_xpath(driver, "//h2[contains(text(), 'Delete conversation')]") - delete_button = driver.find_element_by_xpath("//a[contains(text(), 'Delete')][@role='button']") - actions.move_to_element(delete_button).click().perform() - sleep(10000) - return def extract_convo(driver, convo): -- 2.30.2