Merge pull request #53 from weskerfoot/refactor

Refactor wall module & refactor archiver
6 years ago · beaa848a13
7 changed files with 161 additions and 124 deletions
--- a/deletefb/deletefb.py
+++ b/deletefb/deletefb.py
@ -1,17 +1,14 @@
 #!/usr/bin/env python
 from .tools.common import logger
 from .tools.config import settings
 from .tools.likes import unlike_pages
 from .tools.login import login
 from .tools.wall import delete_posts
 import argparse
 import getpass
 import json
 import os
 import sys
 from .tools.config import settings
 from .tools.common import logger
 from .tools.login import login
 from .tools.wall import delete_posts
 from .tools.likes import unlike_pages
 LOG = logger("deletefb")
 def run_delete():
--- a/deletefb/tools/archive.py
+++ b/deletefb/tools/archive.py
@ -0,0 +1,54 @@
 from .config import settings
 from contextlib import contextmanager
 from pathlib import Path
 import attr
 import json
 # Used to avoid duplicates in the log
 from pybloom_live import BloomFilter
 def make_filter():
    return BloomFilter(
        capacity=settings["MAX_POSTS"],
        error_rate=0.001
    )
@attr.s
 class Archive:
    archive_type = attr.ib()
    # We give the Archive class a file handle
    archive_file = attr.ib()
    _bloom_filter = attr.ib(factory=make_filter)
    def archive(self, content):
        """
        Archive an object
        """
        print("Archiving {0}".format(content))
        if content.name not in self._bloom_filter:
            self.archive_file.write(json.dumps(attr.asdict(content)) + "\n")
            self._bloom_filter.add(content.name)
        return
@contextmanager
 def archiver(archive_type):
    archive_file = open(
        (Path(".") / Path(archive_type).name).with_suffix(".log"),
        mode="ta",
        buffering=1
    )
    archiver_instance = Archive(
        archive_type=archive_type,
        archive_file=archive_file
    )
    try:
        yield archiver_instance
    finally:
        archive_file.close()
--- a/deletefb/tools/common.py
+++ b/deletefb/tools/common.py
@ -1,21 +1,15 @@
-import json
+from os.path import isfile
 import logging
 import logging.config
 import os
 import time
 from .config import settings
 # Used to avoid duplicates in the log
 from pybloom_live import BloomFilter
 from os.path import abspath, relpath, split, isfile
 from selenium.common.exceptions import (
    NoSuchElementException,
    StaleElementReferenceException,
    TimeoutException
 )
 import json
 import logging
 import logging.config
 import os
 SELENIUM_EXCEPTIONS = (
    NoSuchElementException,
    StaleElementReferenceException,
@ -42,8 +36,7 @@ def logger(name):
    """
    # Make sure the path always points to the correct directory
-    config_path = os.path.dirname(
+    config_path = os.path.dirname(os.path.realpath(__file__)) + "/../logging_conf.json"
                    os.path.realpath(__file__)) + "/../logging_conf.json"
    if not isfile(config_path):  # called from file (deletefb.py)
        os.chdir("..")
@ -52,46 +45,6 @@ def logger(name):
        logging.config.dictConfig(config["logging"])
    return logging.getLogger(name)
 def archiver(category):
    """
     Log content to file. Call using `archive("some content")`
    Args:
        category: str The category of logs you want to log
    Returns:
        (log_file_handle, archiver)
    """
    log_path = "{0}.log".format(abspath(relpath(split(category)[-1], ".")))
    log_file = open(log_path, mode="ta", buffering=1)
    bfilter = BloomFilter(
            capacity=settings["MAX_POSTS"],
            error_rate=0.001
    )
    def log(content, timestamp=False):
        if not settings["ARCHIVE"]:
            return
        if content in bfilter:
            # This was already archived
            return
        structured_content = {
            "category" : category,
            "content" : content,
            "timestamp" : timestamp
        }
        log_file.write("{0}\n".format(json.dumps(structured_content)))
        bfilter.add(content)
    return (log_file, log)
 NO_CHROME_DRIVER = """
 You need to install the chromedriver for Selenium\n
 Please see this link https://github.com/weskerfoot/DeleteFB#how-to-use-it\n
--- a/deletefb/tools/likes.py
+++ b/deletefb/tools/likes.py
@ -1,9 +1,9 @@
 from .archive import archiver
 from ..types import Page
 from .common import SELENIUM_EXCEPTIONS, logger, click_button
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-
+from selenium.webdriver.support.ui import WebDriverWait
 from .common import SELENIUM_EXCEPTIONS, archiver, logger, click_button
 LOG = logger(__name__)
@ -41,8 +41,6 @@ def get_page_links(driver):
    """
    pages = driver.find_elements_by_xpath("//li//div/div/a[contains(@class, 'lfloat')]")
    actions = ActionChains(driver)
    return [page.get_attribute("href").replace("www", "mobile") for page in pages]
 def unlike_page(driver, url, archive=None):
@ -64,8 +62,6 @@ def unlike_page(driver, url, archive=None):
    wait = WebDriverWait(driver, 20)
    actions = ActionChains(driver)
    try:
        wait.until(
            EC.presence_of_element_located((By.XPATH, "//*[text()='Liked']"))
@ -89,7 +85,7 @@ def unlike_page(driver, url, archive=None):
    click_button(driver, unlike_button)
    if archive:
-        archive(url)
+        archive(Page(name=url))
 def unlike_pages(driver, profile_url):
    """
@ -102,21 +98,17 @@ def unlike_pages(driver, profile_url):
        None
    """
-    like_log, archive_likes = archiver("likes")
+    with archiver("likes") as archive_likes:
        load_likes(driver, profile_url)
        urls = get_page_links(driver)
        while urls:
            for url in urls:
-            unlike_page(driver, url, archive=archive_likes)
+                unlike_page(driver, url, archive=archive_likes.archive)
            try:
                load_likes(driver, profile_url)
                urls = get_page_links(driver)
            except SELENIUM_EXCEPTIONS:
                # We're done
                break
    # Explicitly close the log file when we're done with it
    like_log.close()
--- a/deletefb/tools/login.py
+++ b/deletefb/tools/login.py
@ -1,12 +1,10 @@
-import time
+from .common import NO_CHROME_DRIVER
 import sys
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.chrome.options import Options
 from seleniumrequests import Chrome
-from .common import NO_CHROME_DRIVER
+import sys
-
+import time
 def login(user_email_address,
          user_password,
--- a/deletefb/tools/wall.py
+++ b/deletefb/tools/wall.py
@ -1,8 +1,10 @@
-import time
+from ..types import Post
 from .archive import archiver
 from .common import SELENIUM_EXCEPTIONS, click_button
 from .config import settings
 from selenium.webdriver.common.action_chains import ActionChains
-from .config import settings
+import time
 from .common import SELENIUM_EXCEPTIONS, archiver, click_button
 # Used as a threshold to avoid running forever
 MAX_POSTS = settings["MAX_POSTS"]
@ -30,8 +32,9 @@ def delete_posts(driver,
        post_content_sel = "userContent"
        post_timestamp_sel = "timestampContent"
-        wall_log, archive_wall_post = archiver("wall")
+        button_types = ["FeedDeleteOption", "HIDE_FROM_TIMELINE", "UNTAG"]
        with archiver("wall") as archive_wall_post:
            while True:
                try:
                    timeline_element = driver.find_element_by_class_name(post_button_sel)
@ -39,7 +42,14 @@ def delete_posts(driver,
                    post_content_element = driver.find_element_by_class_name(post_content_sel)
                    post_content_ts = driver.find_element_by_class_name(post_timestamp_sel)
-                archive_wall_post(post_content_element.text, timestamp=post_content_ts.text)
+
                    # Archive the post
                    archive_wall_post.archive(
                        Post(
                            content=post_content_element.text,
                            date=post_content_ts.text
                        )
                    )
                    actions = ActionChains(driver)
                    actions.move_to_element(timeline_element).click().perform()
@ -47,13 +57,18 @@ def delete_posts(driver,
                    menu = driver.find_element_by_css_selector("#globalContainer > div.uiContextualLayerPositioner.uiLayer > div")
                    actions.move_to_element(menu).perform()
                    delete_button = None
                    for button_type in button_types:
                        try:
-                    delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"FeedDeleteOption\"]")
+                            delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"{0}\"]".format(button_type))
-                except SELENIUM_EXCEPTIONS:
+                            break
                    try:
                        delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"HIDE_FROM_TIMELINE\"]")
                        except SELENIUM_EXCEPTIONS:
-                        delete_button = menu.find_element_by_xpath("//a[@data-feed-option-name=\"UNTAG\"]")
+                            continue
                    if not delete_button:
                        print("Could not find anything to delete")
                        break
                    actions.move_to_element(delete_button).click().perform()
                    confirmation_button = driver.find_element_by_class_name("layerConfirm")
@ -64,7 +79,6 @@ def delete_posts(driver,
                    continue
                else:
                    break
        wall_log.close()
            # Required to sleep the thread for a bit after using JS to click this button
            time.sleep(5)
--- a/deletefb/types.py
+++ b/deletefb/types.py
@ -0,0 +1,29 @@
 import attr
 import uuid
 import datetime
 def timestamp_now():
    """
    Returns: a timestamp for this instant, in ISO 8601 format
    """
    return datetime.datetime.isoformat(datetime.datetime.now())
 # Data type definitions of posts and comments
@attr.s
 class Post:
    content = attr.ib()
    comments = attr.ib(default=[])
    date = attr.ib(factory=timestamp_now)
    name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
 class Comment:
    commenter = attr.ib()
    content = attr.ib()
    date = attr.ib(factory=timestamp_now)
    name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
 class Page:
    name = attr.ib()
    date = attr.ib(factory=timestamp_now)