Browse Source

Merge pull request #72 from weskerfoot/delete-convos

Archiving and deleting messenger conversations
pull/80/head
Wesley Kerfoot 5 years ago
committed by GitHub
parent
commit
7359747ae4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 13
      CONTRIBUTING.md
  2. 11
      README.md
  3. 0
      deletefb/deletefb.log
  4. 15
      deletefb/deletefb.py
  5. 21
      deletefb/tools/archive.py
  6. 17
      deletefb/tools/comments.py
  7. 31
      deletefb/tools/common.py
  8. 179
      deletefb/tools/conversations.py
  9. 1
      deletefb/tools/wall.py
  10. 40
      deletefb/types.py
  11. 13
      requirements.txt
  12. 5
      setup.py

13
CONTRIBUTING.md

@ -0,0 +1,13 @@
### How to contribute
## Dependencies
If you are adding any new dependencies, please make sure that both `requirements.txt` and `setup.py` have been updated. Please read [this](https://caremad.io/posts/2013/07/setup-vs-requirement/) if you are confused about the difference between `requirements.txt` and the `install_requires` section.
## Virtualenv
Always develop with virtualenv, as well as test with `pip install --user .`. This helps make sure implicit dependencies aren't accidentally introduced, and makes sure the average user will be more likely to run it without issues.
## Pull requests
Feel free to make a pull request! Make sure to give a brief overview of what you did, and why you think it is useful. If you are fixing a specific bug or resolving an issue, then make sure to reference it in your PR.
## Coding style
Try to be consistent with the existing codebase as much as possible. Things should be modularized. Don't repeat yourself if possible, but don't add needless complexity. Straightforward is often better than clever and optimized.

11
README.md

@ -19,7 +19,7 @@ Personally, I did this so I would feel less attached to my Facebook profile
## Installation
You have several options to run it.
1) Install from PyPI with `pip3 install --user delete-facebook-posts`
1) Install from PyPI with `pip3 install --user delete-facebook-posts` (recommended)
2) Clone this repo and run `pip3 install --user .` or do `pip3 install --user
git+https://github.com/weskerfoot/DeleteFB.git`
3) Set up a Python virtualenv, activate it, and run `pip3 install -r requirements.txt`, then you can just run `python -m deletefb.deletefb` in the DeleteFB directory.
@ -62,14 +62,7 @@ git+https://github.com/weskerfoot/DeleteFB.git`
* You may also pass in a code by using the `-F` argument, e.g. `-F 111111`.
## Delete By Year
* The tool supports passing the `--year` flag in order to delete wall posts by
year. E.g. `-Y 2010` would delete posts from the year 2010. It is incompatible with any mode other than `wall`.
## Unlike Pages
* You may use `-M unlike_pages` to unlike all of your pages. The names of the
pages will be archived (unless archival is turned off), and this option
conflicts with the year option. This will only unlike your *pages* that you
have liked. It will *not* unlike anything else (like books or movies).
* The tool supports passing the `--year` flag in order to delete/archive by year. E.g. `-Y 2010` would only affect posts from 2010.
## Archival
* The tool will archive everything being deleted by default in `.log` files.

0
deletefb/deletefb.log

15
deletefb/deletefb.py

@ -4,6 +4,8 @@ from .tools.config import settings
from .tools.likes import unlike_pages
from .tools.login import login
from .tools.wall import delete_posts
from .tools.conversations import traverse_conversations
from .tools.comments import delete_comments
import argparse
import getpass
@ -21,7 +23,7 @@ def run_delete():
default="wall",
dest="mode",
type=str,
choices=["wall", "unlike_pages"],
choices=["wall", "unlike_pages", "comments", "conversations"],
help="The mode you want to run in. Default is `wall' which deletes wall posts"
)
@ -91,8 +93,8 @@ def run_delete():
settings["ARCHIVE"] = not args.archive_off
if args.year and args.mode != "wall":
parser.error("The --year option is only supported in wall mode")
if args.year and args.mode not in ("wall", "conversations"):
parser.error("The --year option is not supported in this mode")
args_user_password = args.password or getpass.getpass('Enter your password: ')
@ -112,6 +114,13 @@ def run_delete():
elif args.mode == "unlike_pages":
unlike_pages(driver, args.profile_url)
elif args.mode == "comments":
delete_comments(driver, args.profile_url)
elif args.mode == "conversations":
traverse_conversations(driver, year=args.year)
else:
print("Please enter a valid mode")
sys.exit(1)

21
deletefb/tools/archive.py

@ -1,13 +1,23 @@
from .config import settings
from contextlib import contextmanager
from pathlib import Path
from datetime import datetime
from time import time
import attr
import cattr
import json
import typing
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
# Used to avoid duplicates in the log
from pybloom_live import BloomFilter
cattr.register_unstructure_hook(
datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT)
)
def make_filter():
return BloomFilter(
capacity=settings["MAX_POSTS"],
@ -27,10 +37,15 @@ class Archive:
"""
Archive an object
"""
print("Archiving {0}".format(content))
if hasattr(content, 'name'):
print("Archiving {0}".format(content.name))
if content.name not in self._bloom_filter:
self.archive_file.write(json.dumps(attr.asdict(content)) + "\n")
self.archive_file.write(json.dumps(cattr.unstructure(content),
indent=4,
sort_keys=True) + "\n")
self._bloom_filter.add(content.name)
return
@ -38,7 +53,7 @@ class Archive:
def archiver(archive_type):
archive_file = open(
str((Path(".") / Path(archive_type).name).with_suffix(".log")),
str((Path(".") / Path(archive_type).name).with_suffix(".log.{0}".format(time()))),
mode="ta",
buffering=1
)

17
deletefb/tools/comments.py

@ -0,0 +1,17 @@
from .archive import archiver
from ..types import Comment
from .common import SELENIUM_EXCEPTIONS, logger, click_button
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
LOG = logger(__name__)
def delete_comments(driver, profile_url):
"""
Remove all comments on posts
"""
driver.get("{0}/allactivity?privacy_source=activity_log&category_key=commentscluster".format(profile_url))
wait = WebDriverWait(driver, 20)

31
deletefb/tools/common.py

@ -1,14 +1,19 @@
from os.path import isfile
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import (
NoSuchElementException,
StaleElementReferenceException,
TimeoutException
TimeoutException,
JavascriptException
)
import json
import logging
import logging.config
import os
import pendulum
SELENIUM_EXCEPTIONS = (
NoSuchElementException,
@ -19,13 +24,18 @@ SELENIUM_EXCEPTIONS = (
def click_button(driver, el):
"""
Click a button using Javascript
Args:
driver: seleniumrequests.Chrome Driver instance
Returns:
None
"""
driver.execute_script("arguments[0].click();", el)
def scroll_to(driver, el):
"""
Scroll an element into view, using JS
"""
try:
driver.execute_script("arguments[0].scrollIntoView();", el)
except SELENIUM_EXCEPTIONS:
return
def logger(name):
"""
Args:
@ -45,6 +55,17 @@ def logger(name):
logging.config.dictConfig(config["logging"])
return logging.getLogger(name)
def wait_xpath(driver, expr):
"""
Takes an XPath expression, and waits at most 20 seconds until it exists
"""
wait = WebDriverWait(driver, 20)
try:
wait.until(EC.presence_of_element_located((By.XPATH, expr)))
except SELENIUM_EXCEPTIONS:
return
NO_CHROME_DRIVER = """
You need to install the chromedriver for Selenium\n
Please see this link https://github.com/weskerfoot/DeleteFB#how-to-use-it\n

179
deletefb/tools/conversations.py

@ -0,0 +1,179 @@
from .archive import archiver
from ..types import Conversation, Message
from .common import SELENIUM_EXCEPTIONS, logger, click_button, wait_xpath
from .config import settings
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from pendulum import now
from json import loads
import lxml.html as lxh
LOG = logger(__name__)
def get_conversations(driver):
"""
Get a list of conversations
"""
wait_xpath(driver, "//div[@id=\"threadlist_rows\"]")
# This function *cannot* be a generator
# Otherwise elements will become stale
conversations = []
while True:
for convo in driver.find_elements_by_xpath("//a"):
url = convo.get_attribute("href")
date = None
if url and "messages/read" in url:
date = convo.find_element_by_xpath("../../..//abbr").text
conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip()
assert(conversation_name)
assert(url)
conversations.append(
Conversation(
url=url,
date=date,
name=conversation_name
)
)
try:
next_url = (driver.find_element_by_id("see_older_threads").
find_element_by_xpath("a").
get_attribute("href"))
except SELENIUM_EXCEPTIONS:
break
if not next_url:
break
driver.get(next_url)
return conversations
def parse_conversation(driver):
"""
Extracts all messages in a conversation
"""
for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"):
data_store = loads(msg.get("data-store"))
msg_text = msg.text_content()
yield Message(
name=data_store.get("author"),
content=msg_text,
date=data_store.get("timestamp")
)
def get_images(driver):
"""
Gets all links to images in a messenger conversation
Removes duplicates
"""
for img in set(lxh.fromstring(driver.page_source).xpath("//img")):
yield img.get("src")
def get_convo(driver, convo):
"""
Get all of the messages/images for a given conversation
Returns a list of messages and a list of image links
"""
driver.get(convo.url)
wait_xpath(driver, "//*[contains(text(), 'See Older Messages')]")
# Expand conversation until we've reached the beginning
while True:
try:
see_older = driver.find_element_by_xpath("//*[contains(text(), 'See Older Messages')]")
except SELENIUM_EXCEPTIONS:
break
if not see_older:
break
try:
click_button(driver, see_older)
except SELENIUM_EXCEPTIONS:
continue
messages = list(parse_conversation(driver))
image_links = list(set(get_images(driver)))
return (messages, image_links)
def delete_conversation(driver, convo):
"""
Deletes a conversation
"""
actions = ActionChains(driver)
menu_select = Select(driver.find_element_by_xpath("//select/option[contains(text(), 'Delete')]/.."))
for i, option in enumerate(menu_select.options):
if option.text.strip() == "Delete":
menu_select.select_by_index(i)
break
wait_xpath(driver, "//h2[contains(text(), 'Delete conversation')]")
delete_button = driver.find_element_by_xpath("//a[contains(text(), 'Delete')][@role='button']")
actions.move_to_element(delete_button).click().perform()
return
def extract_convo(driver, convo):
"""
Extract messages and image links from a conversation
Return a new Conversation instance
"""
result = get_convo(driver, convo)
if not result:
return None
messages, image_links = result
convo.messages = messages
convo.image_links = image_links
return convo
def traverse_conversations(driver, year=None):
"""
Remove all conversations within a specified range
"""
driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1")
convos = get_conversations(driver)
with archiver("conversations") as archive_convo:
for convo in convos:
# If the year is set and there is a date
# Then we want to only look at convos from this year
if year and convo.date:
if convo.date.year == int(year):
extract_convo(driver, convo)
if settings["ARCHIVE"]:
archive_convo.archive(convo)
delete_conversation(driver, convo)
# Otherwise we're looking at all convos
elif not year:
extract_convo(driver, convo)
if settings["ARCHIVE"]:
archive_convo.archive(convo)
delete_conversation(driver, convo)

1
deletefb/tools/wall.py

@ -42,7 +42,6 @@ def delete_posts(driver,
post_content_element = driver.find_element_by_class_name(post_content_sel)
post_content_ts = driver.find_element_by_class_name(post_timestamp_sel)
# Archive the post
archive_wall_post.archive(
Post(

40
deletefb/types.py

@ -1,29 +1,55 @@
import attr
import uuid
import datetime
import pendulum
def timestamp_now():
from datetime import datetime
def convert_date(text):
"""
Returns: a timestamp for this instant, in ISO 8601 format
Tries to parse a date into a DateTime instance
Returns `None` if it cannot be parsed
"""
return datetime.datetime.isoformat(datetime.datetime.now())
try:
return pendulum.from_format(text, "DD/M/YYYY")
except ValueError:
try:
return (pendulum.from_format(text, "DD MMM")
.set(year=pendulum.now().year))
except ValueError:
return None
# Data type definitions of posts and comments
@attr.s
class Post:
content = attr.ib()
comments = attr.ib(default=[])
date = attr.ib(factory=timestamp_now)
date = attr.ib(factory=pendulum.now)
name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
class Comment:
commenter = attr.ib()
content = attr.ib()
date = attr.ib(factory=timestamp_now)
date = attr.ib(factory=pendulum.now)
name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
class Conversation:
url = attr.ib()
name = attr.ib()
date : datetime = attr.ib(converter=convert_date)
messages = attr.ib(default=[])
image_links = attr.ib(default=[])
@attr.s
class Message:
name = attr.ib()
content = attr.ib()
# Remove the last 3 digits from FB's dates. They are not standard.
date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
@attr.s
class Page:
name = attr.ib()
date = attr.ib(factory=timestamp_now)
date = attr.ib(factory=pendulum.now)

13
requirements.txt

@ -1,13 +1,26 @@
attrs==19.1.0
bitarray==0.9.3
bleach==3.1.0
certifi==2018.11.29
chardet==3.0.4
docutils==0.14
idna==2.8
lxml==4.4.0
pendulum==2.0.5
pkginfo==1.5.0.1
pybloom-live==3.0.0
Pygments==2.4.2
python-dateutil==2.8.0
pytzdata==2019.2
readme-renderer==24.0
requests==2.22.0
requests-file==1.4.3
requests-toolbelt==0.9.1
selenium==3.141.0
selenium-requests==1.3
six==1.12.0
tldextract==2.2.0
tqdm==4.32.2
twine==1.13.0
urllib3==1.25.2
webencodings==0.5.1

5
setup.py

@ -24,7 +24,10 @@ setuptools.setup(
"selenium-requests",
"requests",
"pybloom-live",
"attrs"
"attrs",
"cattrs",
"lxml",
"pendulum"
],
classifiers= [
"Programming Language :: Python :: 3",

Loading…
Cancel
Save