diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ce5fde5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,13 @@ +### How to contribute + +## Dependencies +If you are adding any new dependencies, please make sure that both `requirements.txt` and `setup.py` have been updated. Please read [this](https://caremad.io/posts/2013/07/setup-vs-requirement/) if you are confused about the difference between `requirements.txt` and the `install_requires` section. + +## Virtualenv +Always develop with virtualenv, as well as test with `pip install --user .`. This helps make sure implicit dependencies aren't accidentally introduced, and makes sure the average user will be more likely to run it without issues. + +## Pull requests +Feel free to make a pull request! Make sure to give a brief overview of what you did, and why you think it is useful. If you are fixing a specific bug or resolving an issue, then make sure to reference it in your PR. + +## Coding style +Try to be consistent with the existing codebase as much as possible. Things should be modularized. Don't repeat yourself if possible, but don't add needless complexity. Straightforward is often better than clever and optimized. diff --git a/README.md b/README.md index 856e1f3..f847b48 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Personally, I did this so I would feel less attached to my Facebook profile ## Installation You have several options to run it. -1) Install from PyPI with `pip3 install --user delete-facebook-posts` +1) Install from PyPI with `pip3 install --user delete-facebook-posts` (recommended) 2) Clone this repo and run `pip3 install --user .` or do `pip3 install --user git+https://github.com/weskerfoot/DeleteFB.git` 3) Set up a Python virtualenv, activate it, and run `pip3 install -r requirements.txt`, then you can just run `python -m deletefb.deletefb` in the DeleteFB directory. @@ -62,14 +62,7 @@ git+https://github.com/weskerfoot/DeleteFB.git` * You may also pass in a code by using the `-F` argument, e.g. `-F 111111`. ## Delete By Year -* The tool supports passing the `--year` flag in order to delete wall posts by - year. E.g. `-Y 2010` would delete posts from the year 2010. It is incompatible with any mode other than `wall`. - -## Unlike Pages -* You may use `-M unlike_pages` to unlike all of your pages. The names of the - pages will be archived (unless archival is turned off), and this option - conflicts with the year option. This will only unlike your *pages* that you - have liked. It will *not* unlike anything else (like books or movies). +* The tool supports passing the `--year` flag in order to delete/archive by year. E.g. `-Y 2010` would only affect posts from 2010. ## Archival * The tool will archive everything being deleted by default in `.log` files. diff --git a/deletefb/deletefb.log b/deletefb/deletefb.log new file mode 100644 index 0000000..e69de29 diff --git a/deletefb/deletefb.py b/deletefb/deletefb.py index 6688f4c..21c5f47 100755 --- a/deletefb/deletefb.py +++ b/deletefb/deletefb.py @@ -4,6 +4,8 @@ from .tools.config import settings from .tools.likes import unlike_pages from .tools.login import login from .tools.wall import delete_posts +from .tools.conversations import traverse_conversations +from .tools.comments import delete_comments import argparse import getpass @@ -21,7 +23,7 @@ def run_delete(): default="wall", dest="mode", type=str, - choices=["wall", "unlike_pages"], + choices=["wall", "unlike_pages", "comments", "conversations"], help="The mode you want to run in. Default is `wall' which deletes wall posts" ) @@ -91,8 +93,8 @@ def run_delete(): settings["ARCHIVE"] = not args.archive_off - if args.year and args.mode != "wall": - parser.error("The --year option is only supported in wall mode") + if args.year and args.mode not in ("wall", "conversations"): + parser.error("The --year option is not supported in this mode") args_user_password = args.password or getpass.getpass('Enter your password: ') @@ -112,6 +114,13 @@ def run_delete(): elif args.mode == "unlike_pages": unlike_pages(driver, args.profile_url) + + elif args.mode == "comments": + delete_comments(driver, args.profile_url) + + elif args.mode == "conversations": + traverse_conversations(driver, year=args.year) + else: print("Please enter a valid mode") sys.exit(1) diff --git a/deletefb/tools/archive.py b/deletefb/tools/archive.py index 7a54cd0..718ee65 100644 --- a/deletefb/tools/archive.py +++ b/deletefb/tools/archive.py @@ -1,13 +1,23 @@ from .config import settings from contextlib import contextmanager from pathlib import Path +from datetime import datetime +from time import time import attr +import cattr import json +import typing + +TIME_FORMAT = "%Y-%m-%d %H:%M:%S" # Used to avoid duplicates in the log from pybloom_live import BloomFilter +cattr.register_unstructure_hook( + datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT) +) + def make_filter(): return BloomFilter( capacity=settings["MAX_POSTS"], @@ -27,10 +37,15 @@ class Archive: """ Archive an object """ - print("Archiving {0}".format(content)) + + if hasattr(content, 'name'): + print("Archiving {0}".format(content.name)) if content.name not in self._bloom_filter: - self.archive_file.write(json.dumps(attr.asdict(content)) + "\n") + self.archive_file.write(json.dumps(cattr.unstructure(content), + indent=4, + sort_keys=True) + "\n") + self._bloom_filter.add(content.name) return @@ -38,7 +53,7 @@ class Archive: def archiver(archive_type): archive_file = open( - str((Path(".") / Path(archive_type).name).with_suffix(".log")), + str((Path(".") / Path(archive_type).name).with_suffix(".log.{0}".format(time()))), mode="ta", buffering=1 ) diff --git a/deletefb/tools/comments.py b/deletefb/tools/comments.py new file mode 100644 index 0000000..ebf9de7 --- /dev/null +++ b/deletefb/tools/comments.py @@ -0,0 +1,17 @@ +from .archive import archiver +from ..types import Comment +from .common import SELENIUM_EXCEPTIONS, logger, click_button +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +LOG = logger(__name__) + +def delete_comments(driver, profile_url): + """ + Remove all comments on posts + """ + + driver.get("{0}/allactivity?privacy_source=activity_log&category_key=commentscluster".format(profile_url)) + + wait = WebDriverWait(driver, 20) diff --git a/deletefb/tools/common.py b/deletefb/tools/common.py index 7f6364a..de0d679 100644 --- a/deletefb/tools/common.py +++ b/deletefb/tools/common.py @@ -1,14 +1,19 @@ from os.path import isfile +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By from selenium.common.exceptions import ( NoSuchElementException, StaleElementReferenceException, - TimeoutException + TimeoutException, + JavascriptException ) import json import logging import logging.config import os +import pendulum SELENIUM_EXCEPTIONS = ( NoSuchElementException, @@ -19,13 +24,18 @@ SELENIUM_EXCEPTIONS = ( def click_button(driver, el): """ Click a button using Javascript - Args: - driver: seleniumrequests.Chrome Driver instance - Returns: - None """ driver.execute_script("arguments[0].click();", el) +def scroll_to(driver, el): + """ + Scroll an element into view, using JS + """ + try: + driver.execute_script("arguments[0].scrollIntoView();", el) + except SELENIUM_EXCEPTIONS: + return + def logger(name): """ Args: @@ -45,6 +55,17 @@ def logger(name): logging.config.dictConfig(config["logging"]) return logging.getLogger(name) + +def wait_xpath(driver, expr): + """ + Takes an XPath expression, and waits at most 20 seconds until it exists + """ + wait = WebDriverWait(driver, 20) + try: + wait.until(EC.presence_of_element_located((By.XPATH, expr))) + except SELENIUM_EXCEPTIONS: + return + NO_CHROME_DRIVER = """ You need to install the chromedriver for Selenium\n Please see this link https://github.com/weskerfoot/DeleteFB#how-to-use-it\n diff --git a/deletefb/tools/conversations.py b/deletefb/tools/conversations.py new file mode 100644 index 0000000..bcbd089 --- /dev/null +++ b/deletefb/tools/conversations.py @@ -0,0 +1,179 @@ +from .archive import archiver +from ..types import Conversation, Message +from .common import SELENIUM_EXCEPTIONS, logger, click_button, wait_xpath +from .config import settings +from selenium.webdriver.common.action_chains import ActionChains +from selenium.webdriver.support.ui import Select +from pendulum import now +from json import loads + +import lxml.html as lxh + +LOG = logger(__name__) + +def get_conversations(driver): + """ + Get a list of conversations + """ + + wait_xpath(driver, "//div[@id=\"threadlist_rows\"]") + + # This function *cannot* be a generator + # Otherwise elements will become stale + conversations = [] + + while True: + for convo in driver.find_elements_by_xpath("//a"): + url = convo.get_attribute("href") + + date = None + + if url and "messages/read" in url: + + date = convo.find_element_by_xpath("../../..//abbr").text + conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip() + + assert(conversation_name) + assert(url) + + conversations.append( + Conversation( + url=url, + date=date, + name=conversation_name + ) + ) + + try: + next_url = (driver.find_element_by_id("see_older_threads"). + find_element_by_xpath("a"). + get_attribute("href")) + + except SELENIUM_EXCEPTIONS: + break + if not next_url: + break + driver.get(next_url) + + return conversations + +def parse_conversation(driver): + """ + Extracts all messages in a conversation + """ + + for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"): + data_store = loads(msg.get("data-store")) + msg_text = msg.text_content() + + yield Message( + name=data_store.get("author"), + content=msg_text, + date=data_store.get("timestamp") + ) + +def get_images(driver): + """ + Gets all links to images in a messenger conversation + Removes duplicates + """ + for img in set(lxh.fromstring(driver.page_source).xpath("//img")): + yield img.get("src") + +def get_convo(driver, convo): + """ + Get all of the messages/images for a given conversation + Returns a list of messages and a list of image links + """ + driver.get(convo.url) + + wait_xpath(driver, "//*[contains(text(), 'See Older Messages')]") + + # Expand conversation until we've reached the beginning + while True: + try: + see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]") + except SELENIUM_EXCEPTIONS: + break + + if not see_older: + break + + try: + click_button(driver, see_older) + except SELENIUM_EXCEPTIONS: + continue + + messages = list(parse_conversation(driver)) + image_links = list(set(get_images(driver))) + return (messages, image_links) + +def delete_conversation(driver, convo): + """ + Deletes a conversation + """ + + actions = ActionChains(driver) + + menu_select = Select(driver.find_element_by_xpath("//select/option[contains(text(), 'Delete')]/..")) + + for i, option in enumerate(menu_select.options): + if option.text.strip() == "Delete": + menu_select.select_by_index(i) + break + + wait_xpath(driver, "//h2[contains(text(), 'Delete conversation')]") + delete_button = driver.find_element_by_xpath("//a[contains(text(), 'Delete')][@role='button']") + actions.move_to_element(delete_button).click().perform() + + return + +def extract_convo(driver, convo): + """ + Extract messages and image links from a conversation + Return a new Conversation instance + """ + result = get_convo(driver, convo) + + if not result: + return None + + messages, image_links = result + + convo.messages = messages + convo.image_links = image_links + + return convo + +def traverse_conversations(driver, year=None): + """ + Remove all conversations within a specified range + """ + + driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1") + + convos = get_conversations(driver) + + with archiver("conversations") as archive_convo: + for convo in convos: + # If the year is set and there is a date + # Then we want to only look at convos from this year + + if year and convo.date: + if convo.date.year == int(year): + extract_convo(driver, convo) + + if settings["ARCHIVE"]: + archive_convo.archive(convo) + + delete_conversation(driver, convo) + + # Otherwise we're looking at all convos + elif not year: + extract_convo(driver, convo) + + if settings["ARCHIVE"]: + archive_convo.archive(convo) + + delete_conversation(driver, convo) + diff --git a/deletefb/tools/wall.py b/deletefb/tools/wall.py index 890bae0..1d9d232 100644 --- a/deletefb/tools/wall.py +++ b/deletefb/tools/wall.py @@ -42,7 +42,6 @@ def delete_posts(driver, post_content_element = driver.find_element_by_class_name(post_content_sel) post_content_ts = driver.find_element_by_class_name(post_timestamp_sel) - # Archive the post archive_wall_post.archive( Post( diff --git a/deletefb/types.py b/deletefb/types.py index a771c67..c1a0148 100644 --- a/deletefb/types.py +++ b/deletefb/types.py @@ -1,29 +1,55 @@ import attr import uuid -import datetime +import pendulum -def timestamp_now(): +from datetime import datetime + +def convert_date(text): """ - Returns: a timestamp for this instant, in ISO 8601 format + Tries to parse a date into a DateTime instance + Returns `None` if it cannot be parsed """ - return datetime.datetime.isoformat(datetime.datetime.now()) + try: + return pendulum.from_format(text, "DD/M/YYYY") + except ValueError: + try: + return (pendulum.from_format(text, "DD MMM") + .set(year=pendulum.now().year)) + except ValueError: + return None # Data type definitions of posts and comments @attr.s class Post: content = attr.ib() comments = attr.ib(default=[]) - date = attr.ib(factory=timestamp_now) + date = attr.ib(factory=pendulum.now) name = attr.ib(factory=lambda: uuid.uuid4().hex) @attr.s class Comment: commenter = attr.ib() content = attr.ib() - date = attr.ib(factory=timestamp_now) + date = attr.ib(factory=pendulum.now) name = attr.ib(factory=lambda: uuid.uuid4().hex) +@attr.s +class Conversation: + url = attr.ib() + name = attr.ib() + date : datetime = attr.ib(converter=convert_date) + messages = attr.ib(default=[]) + image_links = attr.ib(default=[]) + +@attr.s +class Message: + name = attr.ib() + content = attr.ib() + + # Remove the last 3 digits from FB's dates. They are not standard. + date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) + @attr.s class Page: name = attr.ib() - date = attr.ib(factory=timestamp_now) + date = attr.ib(factory=pendulum.now) diff --git a/requirements.txt b/requirements.txt index 0780145..24001f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,26 @@ attrs==19.1.0 bitarray==0.9.3 +bleach==3.1.0 certifi==2018.11.29 chardet==3.0.4 +docutils==0.14 idna==2.8 +lxml==4.4.0 +pendulum==2.0.5 +pkginfo==1.5.0.1 pybloom-live==3.0.0 +Pygments==2.4.2 +python-dateutil==2.8.0 +pytzdata==2019.2 +readme-renderer==24.0 requests==2.22.0 requests-file==1.4.3 +requests-toolbelt==0.9.1 selenium==3.141.0 selenium-requests==1.3 six==1.12.0 tldextract==2.2.0 +tqdm==4.32.2 +twine==1.13.0 urllib3==1.25.2 +webencodings==0.5.1 diff --git a/setup.py b/setup.py index c05d8a3..04fd8dc 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,10 @@ setuptools.setup( "selenium-requests", "requests", "pybloom-live", - "attrs" + "attrs", + "cattrs", + "lxml", + "pendulum" ], classifiers= [ "Programming Language :: Python :: 3",