Browse Source

Merge pull request #53 from weskerfoot/refactor

Refactor wall module & refactor archiver
pull/63/head
Wesley Kerfoot 6 years ago
committed by GitHub
parent
commit
beaa848a13
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 13
      deletefb/deletefb.py
  2. 54
      deletefb/tools/archive.py
  3. 61
      deletefb/tools/common.py
  4. 22
      deletefb/tools/likes.py
  5. 8
      deletefb/tools/login.py
  6. 36
      deletefb/tools/wall.py
  7. 29
      deletefb/types.py

13
deletefb/deletefb.py

@ -1,17 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
from .tools.common import logger
from .tools.config import settings
from .tools.likes import unlike_pages
from .tools.login import login
from .tools.wall import delete_posts
import argparse import argparse
import getpass import getpass
import json
import os
import sys import sys
from .tools.config import settings
from .tools.common import logger
from .tools.login import login
from .tools.wall import delete_posts
from .tools.likes import unlike_pages
LOG = logger("deletefb") LOG = logger("deletefb")
def run_delete(): def run_delete():

54
deletefb/tools/archive.py

@ -0,0 +1,54 @@
from .config import settings
from contextlib import contextmanager
from pathlib import Path
import attr
import json
# Used to avoid duplicates in the log
from pybloom_live import BloomFilter
def make_filter():
return BloomFilter(
capacity=settings["MAX_POSTS"],
error_rate=0.001
)
@attr.s
class Archive:
archive_type = attr.ib()
# We give the Archive class a file handle
archive_file = attr.ib()
_bloom_filter = attr.ib(factory=make_filter)
def archive(self, content):
"""
Archive an object
"""
print("Archiving {0}".format(content))
if content.name not in self._bloom_filter:
self.archive_file.write(json.dumps(attr.asdict(content)) + "\n")
self._bloom_filter.add(content.name)
return
@contextmanager
def archiver(archive_type):
archive_file = open(
(Path(".") / Path(archive_type).name).with_suffix(".log"),
mode="ta",
buffering=1
)
archiver_instance = Archive(
archive_type=archive_type,
archive_file=archive_file
)
try:
yield archiver_instance
finally:
archive_file.close()

61
deletefb/tools/common.py

@ -1,21 +1,15 @@
import json from os.path import isfile
import logging
import logging.config
import os
import time
from .config import settings
# Used to avoid duplicates in the log
from pybloom_live import BloomFilter
from os.path import abspath, relpath, split, isfile
from selenium.common.exceptions import ( from selenium.common.exceptions import (
NoSuchElementException, NoSuchElementException,
StaleElementReferenceException, StaleElementReferenceException,
TimeoutException TimeoutException
) )
import json
import logging
import logging.config
import os
SELENIUM_EXCEPTIONS = ( SELENIUM_EXCEPTIONS = (
NoSuchElementException, NoSuchElementException,
StaleElementReferenceException, StaleElementReferenceException,
@ -42,8 +36,7 @@ def logger(name):
""" """
# Make sure the path always points to the correct directory # Make sure the path always points to the correct directory
config_path = os.path.dirname( config_path = os.path.dirname(os.path.realpath(__file__)) + "/../logging_conf.json"
os.path.realpath(__file__)) + "/../logging_conf.json"
if not isfile(config_path): # called from file (deletefb.py) if not isfile(config_path): # called from file (deletefb.py)
os.chdir("..") os.chdir("..")
@ -52,46 +45,6 @@ def logger(name):
logging.config.dictConfig(config["logging"]) logging.config.dictConfig(config["logging"])
return logging.getLogger(name) return logging.getLogger(name)
def archiver(category):
"""
Log content to file. Call using `archive("some content")`
Args:
category: str The category of logs you want to log
Returns:
(log_file_handle, archiver)
"""
log_path = "{0}.log".format(abspath(relpath(split(category)[-1], ".")))
log_file = open(log_path, mode="ta", buffering=1)
bfilter = BloomFilter(
capacity=settings["MAX_POSTS"],
error_rate=0.001
)
def log(content, timestamp=False):
if not settings["ARCHIVE"]:
return
if content in bfilter:
# This was already archived
return
structured_content = {
"category" : category,
"content" : content,
"timestamp" : timestamp
}
log_file.write("{0}\n".format(json.dumps(structured_content)))
bfilter.add(content)
return (log_file, log)
NO_CHROME_DRIVER = """ NO_CHROME_DRIVER = """
You need to install the chromedriver for Selenium\n You need to install the chromedriver for Selenium\n
Please see this link https://github.com/weskerfoot/DeleteFB#how-to-use-it\n Please see this link https://github.com/weskerfoot/DeleteFB#how-to-use-it\n

22
deletefb/tools/likes.py

@ -1,9 +1,9 @@
from .archive import archiver
from ..types import Page
from .common import SELENIUM_EXCEPTIONS, logger, click_button
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from .common import SELENIUM_EXCEPTIONS, archiver, logger, click_button
LOG = logger(__name__) LOG = logger(__name__)
@ -41,8 +41,6 @@ def get_page_links(driver):
""" """
pages = driver.find_elements_by_xpath("//li//div/div/a[contains(@class, 'lfloat')]") pages = driver.find_elements_by_xpath("//li//div/div/a[contains(@class, 'lfloat')]")
actions = ActionChains(driver)
return [page.get_attribute("href").replace("www", "mobile") for page in pages] return [page.get_attribute("href").replace("www", "mobile") for page in pages]
def unlike_page(driver, url, archive=None): def unlike_page(driver, url, archive=None):
@ -64,8 +62,6 @@ def unlike_page(driver, url, archive=None):
wait = WebDriverWait(driver, 20) wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
try: try:
wait.until( wait.until(
EC.presence_of_element_located((By.XPATH, "//*[text()='Liked']")) EC.presence_of_element_located((By.XPATH, "//*[text()='Liked']"))
@ -89,7 +85,7 @@ def unlike_page(driver, url, archive=None):
click_button(driver, unlike_button) click_button(driver, unlike_button)
if archive: if archive:
archive(url) archive(Page(name=url))
def unlike_pages(driver, profile_url): def unlike_pages(driver, profile_url):
""" """
@ -102,21 +98,17 @@ def unlike_pages(driver, profile_url):
None None
""" """
like_log, archive_likes = archiver("likes") with archiver("likes") as archive_likes:
load_likes(driver, profile_url) load_likes(driver, profile_url)
urls = get_page_links(driver) urls = get_page_links(driver)
while urls: while urls:
for url in urls: for url in urls:
unlike_page(driver, url, archive=archive_likes) unlike_page(driver, url, archive=archive_likes.archive)
try: try:
load_likes(driver, profile_url) load_likes(driver, profile_url)
urls = get_page_links(driver) urls = get_page_links(driver)
except SELENIUM_EXCEPTIONS: except SELENIUM_EXCEPTIONS:
# We're done # We're done
break break
# Explicitly close the log file when we're done with it
like_log.close()

8
deletefb/tools/login.py

@ -1,12 +1,10 @@
import time from .common import NO_CHROME_DRIVER
import sys
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from seleniumrequests import Chrome from seleniumrequests import Chrome
from .common import NO_CHROME_DRIVER import sys
import time
def login(user_email_address, def login(user_email_address,
user_password, user_password,

36
deletefb/tools/wall.py

@ -1,8 +1,10 @@
import time from ..types import Post
from .archive import archiver
from .common import SELENIUM_EXCEPTIONS, click_button
from .config import settings
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
from .config import settings import time
from .common import SELENIUM_EXCEPTIONS, archiver, click_button
# Used as a threshold to avoid running forever # Used as a threshold to avoid running forever
MAX_POSTS = settings["MAX_POSTS"] MAX_POSTS = settings["MAX_POSTS"]
@ -30,8 +32,9 @@ def delete_posts(driver,
post_content_sel = "userContent" post_content_sel = "userContent"
post_timestamp_sel = "timestampContent" post_timestamp_sel = "timestampContent"
wall_log, archive_wall_post = archiver("wall") button_types = ["FeedDeleteOption", "HIDE_FROM_TIMELINE", "UNTAG"]
with archiver("wall") as archive_wall_post:
while True: while True:
try: try:
timeline_element = driver.find_element_by_class_name(post_button_sel) timeline_element = driver.find_element_by_class_name(post_button_sel)
@ -39,7 +42,14 @@ def delete_posts(driver,
post_content_element = driver.find_element_by_class_name(post_content_sel) post_content_element = driver.find_element_by_class_name(post_content_sel)
post_content_ts = driver.find_element_by_class_name(post_timestamp_sel) post_content_ts = driver.find_element_by_class_name(post_timestamp_sel)
archive_wall_post(post_content_element.text, timestamp=post_content_ts.text)
# Archive the post
archive_wall_post.archive(
Post(
content=post_content_element.text,
date=post_content_ts.text
)
)
actions = ActionChains(driver) actions = ActionChains(driver)
actions.move_to_element(timeline_element).click().perform() actions.move_to_element(timeline_element).click().perform()
@ -47,13 +57,18 @@ def delete_posts(driver,
menu = driver.find_element_by_css_selector("#globalContainer > div.uiContextualLayerPositioner.uiLayer > div") menu = driver.find_element_by_css_selector("#globalContainer > div.uiContextualLayerPositioner.uiLayer > div")
actions.move_to_element(menu).perform() actions.move_to_element(menu).perform()
delete_button = None
for button_type in button_types:
try: try:
delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"FeedDeleteOption\"]") delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"{0}\"]".format(button_type))
except SELENIUM_EXCEPTIONS: break
try:
delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"HIDE_FROM_TIMELINE\"]")
except SELENIUM_EXCEPTIONS: except SELENIUM_EXCEPTIONS:
delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"UNTAG\"]") continue
if not delete_button:
print("Could not find anything to delete")
break
actions.move_to_element(delete_button).click().perform() actions.move_to_element(delete_button).click().perform()
confirmation_button = driver.find_element_by_class_name("layerConfirm") confirmation_button = driver.find_element_by_class_name("layerConfirm")
@ -64,7 +79,6 @@ def delete_posts(driver,
continue continue
else: else:
break break
wall_log.close()
# Required to sleep the thread for a bit after using JS to click this button # Required to sleep the thread for a bit after using JS to click this button
time.sleep(5) time.sleep(5)

29
deletefb/types.py

@ -0,0 +1,29 @@
import attr
import uuid
import datetime
def timestamp_now():
"""
Returns: a timestamp for this instant, in ISO 8601 format
"""
return datetime.datetime.isoformat(datetime.datetime.now())
# Data type definitions of posts and comments
@attr.s
class Post:
content = attr.ib()
comments = attr.ib(default=[])
date = attr.ib(factory=timestamp_now)
name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
class Comment:
commenter = attr.ib()
content = attr.ib()
date = attr.ib(factory=timestamp_now)
name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
class Page:
name = attr.ib()
date = attr.ib(factory=timestamp_now)
Loading…
Cancel
Save