Browse Source

Use cattrs to serialize conversations in archive

pull/72/head
Wesley Kerfoot 6 years ago
parent
commit
01647262c3
  1. 4
      deletefb/deletefb.py
  2. 10
      deletefb/tools/archive.py
  3. 53
      deletefb/tools/conversations.py
  4. 6
      deletefb/types.py
  5. 1
      setup.py

4
deletefb/deletefb.py

@ -4,7 +4,7 @@ from .tools.config import settings
from .tools.likes import unlike_pages from .tools.likes import unlike_pages
from .tools.login import login from .tools.login import login
from .tools.wall import delete_posts from .tools.wall import delete_posts
from .tools.conversations import delete_conversations from .tools.conversations import traverse_conversations
from .tools.comments import delete_comments from .tools.comments import delete_comments
import argparse import argparse
@ -119,7 +119,7 @@ def run_delete():
delete_comments(driver, args.profile_url) delete_comments(driver, args.profile_url)
elif args.mode == "conversations": elif args.mode == "conversations":
delete_conversations(driver, year=args.year) traverse_conversations(driver, year=args.year)
else: else:
print("Please enter a valid mode") print("Please enter a valid mode")

10
deletefb/tools/archive.py

@ -1,13 +1,21 @@
from .config import settings from .config import settings
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from datetime import datetime
import attr import attr
import cattr
import json import json
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
# Used to avoid duplicates in the log # Used to avoid duplicates in the log
from pybloom_live import BloomFilter from pybloom_live import BloomFilter
cattr.register_unstructure_hook(
datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT)
)
def make_filter(): def make_filter():
return BloomFilter( return BloomFilter(
capacity=settings["MAX_POSTS"], capacity=settings["MAX_POSTS"],
@ -30,7 +38,7 @@ class Archive:
print("Archiving {0}".format(content)) print("Archiving {0}".format(content))
if content.name not in self._bloom_filter: if content.name not in self._bloom_filter:
self.archive_file.write(json.dumps(attr.asdict(content)) + "\n") self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n")
self._bloom_filter.add(content.name) self._bloom_filter.add(content.name)
return return

53
deletefb/tools/conversations.py

@ -68,17 +68,9 @@ def get_conversations(driver):
return conversations return conversations
def get_convo_images(driver): def parse_conversation(driver):
""" """
Gets all links to images in a messenger conversation Extracts all messages in a conversation
Removes duplicates
"""
for img in set(lxh.fromstring(driver.page_source).xpath("//img")):
yield img.get("src")
def get_convo_messages(driver):
"""
Gets all messages in a conversation
""" """
for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"): for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"):
@ -91,8 +83,10 @@ def get_convo_messages(driver):
date=data_store.get("timestamp") date=data_store.get("timestamp")
) )
def archive_conversation(driver, convo): def get_messages(driver, convo):
print(convo) """
Get all of the messages for a given conversation
"""
driver.get(convo.url) driver.get(convo.url)
wait = WebDriverWait(driver, 20) wait = WebDriverWait(driver, 20)
@ -119,12 +113,16 @@ def archive_conversation(driver, convo):
except SELENIUM_EXCEPTIONS: except SELENIUM_EXCEPTIONS:
continue continue
#for img in get_convo_images(driver): return list(parse_conversation(driver))
#print(img)
convo.messages = list(get_convo_messages(driver)) def delete_conversation(driver, convo):
"""
Deletes a conversation
"""
def delete_conversations(driver, year=None): return
def traverse_conversations(driver, year=None):
""" """
Remove all conversations within a specified range Remove all conversations within a specified range
""" """
@ -133,15 +131,18 @@ def delete_conversations(driver, year=None):
convos = get_conversations(driver) convos = get_conversations(driver)
for convo in convos: with archiver("conversations") as archive_convo:
# If the year is set and there is a date for convo in convos:
# Then we want to only look at convos from this year # If the year is set and there is a date
# Then we want to only look at convos from this year
if year and convo.date:
if convo.date.year == int(year):
convo.messages = get_messages(driver, convo)
archive_convo.archive(convo)
if year and convo.date: # Otherwise we're looking at all convos
if convo.date.year == int(year): elif not year:
archive_conversation(driver, convo) convo.messages = get_messages(driver, convo)
print(convo.messages) archive_convo.archive(convo)
# Otherwise we're looking at all convos
elif not year:
archive_conversation(driver, convo)

6
deletefb/types.py

@ -2,6 +2,8 @@ import attr
import uuid import uuid
import pendulum import pendulum
from datetime import datetime
def convert_date(text): def convert_date(text):
""" """
Tries to parse a date into a DateTime instance Tries to parse a date into a DateTime instance
@ -35,7 +37,7 @@ class Comment:
class Conversation: class Conversation:
url = attr.ib() url = attr.ib()
name = attr.ib() name = attr.ib()
date = attr.ib(converter=convert_date) date : datetime = attr.ib(converter=convert_date)
messages = attr.ib(default=[]) messages = attr.ib(default=[])
@attr.s @attr.s
@ -44,7 +46,7 @@ class Message:
content = attr.ib() content = attr.ib()
# Remove the last 3 digits from FB's dates. They are not standard. # Remove the last 3 digits from FB's dates. They are not standard.
date = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3]))) date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
@attr.s @attr.s
class Page: class Page:

1
setup.py

@ -25,6 +25,7 @@ setuptools.setup(
"requests", "requests",
"pybloom-live", "pybloom-live",
"attrs", "attrs",
"cattrs",
"lxml", "lxml",
"pendulum" "pendulum"
], ],

Loading…
Cancel
Save