Browse Source

Bump version and refactor to use bloom filters for avoiding dups

pull/51/head
Wesley Kerfoot 5 years ago
parent
commit
2580f4594e
  1. 14
      deletefb/tools/common.py
  2. 3
      deletefb/tools/config.py
  3. 14
      deletefb/tools/likes.py
  4. 3
      deletefb/tools/wall.py
  5. 5
      setup.py

14
deletefb/tools/common.py

@ -6,6 +6,9 @@ import time
from .config import settings
# Used to avoid duplicates in the log
from pybloom_live import BloomFilter
from os.path import abspath, relpath, split, isfile
from selenium.common.exceptions import (
NoSuchElementException,
@ -59,10 +62,19 @@ def archiver(category):
log_file = open(log_path, mode="ta", buffering=1)
bfilter = BloomFilter(
capacity=settings["MAX_POSTS"],
error_rate=0.001
)
def log(content, timestamp=False):
if not settings["ARCHIVE"]:
return
if content in bfilter:
# This was already archived
return
structured_content = {
"category" : category,
"content" : content,
@ -71,6 +83,8 @@ def archiver(category):
log_file.write("{0}\n".format(json.dumps(structured_content)))
bfilter.add(content)
return (log_file, log)

3
deletefb/tools/config.py

@ -1,3 +1,4 @@
settings = {
"ARCHIVE" : True
"ARCHIVE" : True,
"MAX_POSTS" : 5000
}

14
deletefb/tools/likes.py

@ -47,19 +47,23 @@ def get_page_links(driver):
return [page.get_attribute("href").replace("www", "mobile") for page in pages]
def unlike_page(driver, url):
def unlike_page(driver, url, archive=None):
"""
Unlikes a page given the URL to it
Args:
driver: seleniumrequests.Chrome Driver instance
url: url string pointing to a page
archive: archiver instance
Returns:
None
"""
driver.get(url)
print(url)
print("Unliking {0}".format(url))
wait = WebDriverWait(driver, 30)
actions = ActionChains(driver)
@ -82,6 +86,8 @@ def unlike_page(driver, url):
click_button(driver, unlike_button)
if archive:
archive(url)
def unlike_pages(driver, profile_url):
"""
@ -102,9 +108,9 @@ def unlike_pages(driver, profile_url):
while urls:
for url in urls:
unlike_page(driver, url)
load_likes(driver, profile_url)
unlike_page(driver, url, archive=archive_likes)
try:
load_likes(driver, profile_url)
urls = get_page_links(driver)
except SELENIUM_EXCEPTIONS:
# We're done

3
deletefb/tools/wall.py

@ -1,10 +1,11 @@
import time
from selenium.webdriver.common.action_chains import ActionChains
from .config import settings
from .common import SELENIUM_EXCEPTIONS, archiver, click_button
# Used as a threshold to avoid running forever
MAX_POSTS = 15000
MAX_POSTS = settings["MAX_POSTS"]
def delete_posts(driver,
user_profile_url,

5
setup.py

@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="delete-facebook-posts",
version="1.1.1",
version="1.1.2",
author="Wesley Kerfoot",
author_email="wes@wesk.tech",
description="A Selenium Script to Delete Facebook Posts",
@ -16,7 +16,8 @@ setuptools.setup(
install_requires = [
"selenium",
"selenium-requests",
"requests"
"requests",
"pybloom-live"
],
classifiers= [
"Programming Language :: Python :: 3",

Loading…
Cancel
Save