Browse Source

Bump version and refactor to use bloom filters for avoiding dups

pull/51/head
Wesley Kerfoot 6 years ago
parent
commit
2580f4594e
  1. 14
      deletefb/tools/common.py
  2. 3
      deletefb/tools/config.py
  3. 14
      deletefb/tools/likes.py
  4. 3
      deletefb/tools/wall.py
  5. 5
      setup.py

14
deletefb/tools/common.py

@ -6,6 +6,9 @@ import time
from .config import settings from .config import settings
# Used to avoid duplicates in the log
from pybloom_live import BloomFilter
from os.path import abspath, relpath, split, isfile from os.path import abspath, relpath, split, isfile
from selenium.common.exceptions import ( from selenium.common.exceptions import (
NoSuchElementException, NoSuchElementException,
@ -59,10 +62,19 @@ def archiver(category):
log_file = open(log_path, mode="ta", buffering=1) log_file = open(log_path, mode="ta", buffering=1)
bfilter = BloomFilter(
capacity=settings["MAX_POSTS"],
error_rate=0.001
)
def log(content, timestamp=False): def log(content, timestamp=False):
if not settings["ARCHIVE"]: if not settings["ARCHIVE"]:
return return
if content in bfilter:
# This was already archived
return
structured_content = { structured_content = {
"category" : category, "category" : category,
"content" : content, "content" : content,
@ -71,6 +83,8 @@ def archiver(category):
log_file.write("{0}\n".format(json.dumps(structured_content))) log_file.write("{0}\n".format(json.dumps(structured_content)))
bfilter.add(content)
return (log_file, log) return (log_file, log)

3
deletefb/tools/config.py

@ -1,3 +1,4 @@
settings = { settings = {
"ARCHIVE" : True "ARCHIVE" : True,
"MAX_POSTS" : 5000
} }

14
deletefb/tools/likes.py

@ -47,19 +47,23 @@ def get_page_links(driver):
return [page.get_attribute("href").replace("www", "mobile") for page in pages] return [page.get_attribute("href").replace("www", "mobile") for page in pages]
def unlike_page(driver, url): def unlike_page(driver, url, archive=None):
""" """
Unlikes a page given the URL to it Unlikes a page given the URL to it
Args: Args:
driver: seleniumrequests.Chrome Driver instance driver: seleniumrequests.Chrome Driver instance
url: url string pointing to a page url: url string pointing to a page
archive: archiver instance
Returns: Returns:
None None
""" """
driver.get(url) driver.get(url)
print(url)
print("Unliking {0}".format(url))
wait = WebDriverWait(driver, 30) wait = WebDriverWait(driver, 30)
actions = ActionChains(driver) actions = ActionChains(driver)
@ -82,6 +86,8 @@ def unlike_page(driver, url):
click_button(driver, unlike_button) click_button(driver, unlike_button)
if archive:
archive(url)
def unlike_pages(driver, profile_url): def unlike_pages(driver, profile_url):
""" """
@ -102,9 +108,9 @@ def unlike_pages(driver, profile_url):
while urls: while urls:
for url in urls: for url in urls:
unlike_page(driver, url) unlike_page(driver, url, archive=archive_likes)
load_likes(driver, profile_url)
try: try:
load_likes(driver, profile_url)
urls = get_page_links(driver) urls = get_page_links(driver)
except SELENIUM_EXCEPTIONS: except SELENIUM_EXCEPTIONS:
# We're done # We're done

3
deletefb/tools/wall.py

@ -1,10 +1,11 @@
import time import time
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
from .config import settings
from .common import SELENIUM_EXCEPTIONS, archiver, click_button from .common import SELENIUM_EXCEPTIONS, archiver, click_button
# Used as a threshold to avoid running forever # Used as a threshold to avoid running forever
MAX_POSTS = 15000 MAX_POSTS = settings["MAX_POSTS"]
def delete_posts(driver, def delete_posts(driver,
user_profile_url, user_profile_url,

5
setup.py

@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup( setuptools.setup(
name="delete-facebook-posts", name="delete-facebook-posts",
version="1.1.1", version="1.1.2",
author="Wesley Kerfoot", author="Wesley Kerfoot",
author_email="wes@wesk.tech", author_email="wes@wesk.tech",
description="A Selenium Script to Delete Facebook Posts", description="A Selenium Script to Delete Facebook Posts",
@ -16,7 +16,8 @@ setuptools.setup(
install_requires = [ install_requires = [
"selenium", "selenium",
"selenium-requests", "selenium-requests",
"requests" "requests",
"pybloom-live"
], ],
classifiers= [ classifiers= [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",

Loading…
Cancel
Save