Browse Source

Archive wall posts, include timestamp

pull/31/head
Wesley Kerfoot 6 years ago
parent
commit
643929d3dd
  1. 11
      deletefb/tools/common.py
  2. 16
      deletefb/tools/wall.py

11
deletefb/tools/common.py

@ -25,12 +25,17 @@ def archiver(category):
call archiver like archive("some content") call archiver like archive("some content")
""" """
log_path = abspath(relpath(split(category)[-1], ".")) log_path = "{0}.log".format(abspath(relpath(split(category)[-1], ".")))
log_file = open(log_path, mode="wt", buffering=1) log_file = open(log_path, mode="wt", buffering=1)
def log(content): def log(content, timestamp=False):
structured_content = {"category" : category, "content" : content} structured_content = {
"category" : category,
"content" : content,
"timestamp" : timestamp
}
log_file.write("{0}\n".format(dumps(structured_content))) log_file.write("{0}\n".format(dumps(structured_content)))
return (log_file, log) return (log_file, log)

16
deletefb/tools/wall.py

@ -1,9 +1,10 @@
import time import time
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
from .common import SELENIUM_EXCEPTIONS from .common import SELENIUM_EXCEPTIONS, archiver
MAX_POSTS = 5000 # Used as a threshold to avoid running forever
MAX_POSTS = 15000
def delete_posts(driver, def delete_posts(driver,
user_profile_url, user_profile_url,
@ -17,12 +18,23 @@ def delete_posts(driver,
driver.get(user_profile_url) driver.get(user_profile_url)
wall_log, archive_wall_post = archiver("wall")
for _ in range(MAX_POSTS): for _ in range(MAX_POSTS):
post_button_sel = "_4xev" post_button_sel = "_4xev"
post_content_sel = "_5_jv"
post_timestamp_sel = "timestamp"
while True: while True:
try: try:
timeline_element = driver.find_element_by_class_name(post_button_sel) timeline_element = driver.find_element_by_class_name(post_button_sel)
post_content_element = driver.find_element_by_class_name(post_content_sel)
post_content_ts = driver.find_element_by_class_name(post_timestamp_sel)
archive_wall_post(post_content_element.text, timestamp=post_content_ts.text)
actions = ActionChains(driver) actions = ActionChains(driver)
actions.move_to_element(timeline_element).click().perform() actions.move_to_element(timeline_element).click().perform()

Loading…
Cancel
Save