Browse Source

Use cattrs to serialize conversations in archive

pull/72/head
Wesley Kerfoot 5 years ago
parent
commit
01647262c3
  1. 4
      deletefb/deletefb.py
  2. 10
      deletefb/tools/archive.py
  3. 53
      deletefb/tools/conversations.py
  4. 6
      deletefb/types.py
  5. 1
      setup.py

4
deletefb/deletefb.py

@ -4,7 +4,7 @@ from .tools.config import settings
from .tools.likes import unlike_pages
from .tools.login import login
from .tools.wall import delete_posts
from .tools.conversations import delete_conversations
from .tools.conversations import traverse_conversations
from .tools.comments import delete_comments
import argparse
@ -119,7 +119,7 @@ def run_delete():
delete_comments(driver, args.profile_url)
elif args.mode == "conversations":
delete_conversations(driver, year=args.year)
traverse_conversations(driver, year=args.year)
else:
print("Please enter a valid mode")

10
deletefb/tools/archive.py

@ -1,13 +1,21 @@
from .config import settings
from contextlib import contextmanager
from pathlib import Path
from datetime import datetime
import attr
import cattr
import json
TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
# Used to avoid duplicates in the log
from pybloom_live import BloomFilter
cattr.register_unstructure_hook(
datetime, lambda dt: datetime.strftime(dt, format=TIME_FORMAT)
)
def make_filter():
return BloomFilter(
capacity=settings["MAX_POSTS"],
@ -30,7 +38,7 @@ class Archive:
print("Archiving {0}".format(content))
if content.name not in self._bloom_filter:
self.archive_file.write(json.dumps(attr.asdict(content)) + "\n")
self.archive_file.write(json.dumps(cattr.unstructure(content)) + "\n")
self._bloom_filter.add(content.name)
return

53
deletefb/tools/conversations.py

@ -68,17 +68,9 @@ def get_conversations(driver):
return conversations
def get_convo_images(driver):
def parse_conversation(driver):
"""
Gets all links to images in a messenger conversation
Removes duplicates
"""
for img in set(lxh.fromstring(driver.page_source).xpath("//img")):
yield img.get("src")
def get_convo_messages(driver):
"""
Gets all messages in a conversation
Extracts all messages in a conversation
"""
for msg in lxh.fromstring(driver.page_source).xpath("//div[@class='msg']/div"):
@ -91,8 +83,10 @@ def get_convo_messages(driver):
date=data_store.get("timestamp")
)
def archive_conversation(driver, convo):
print(convo)
def get_messages(driver, convo):
"""
Get all of the messages for a given conversation
"""
driver.get(convo.url)
wait = WebDriverWait(driver, 20)
@ -119,12 +113,16 @@ def archive_conversation(driver, convo):
except SELENIUM_EXCEPTIONS:
continue
#for img in get_convo_images(driver):
#print(img)
return list(parse_conversation(driver))
convo.messages = list(get_convo_messages(driver))
def delete_conversation(driver, convo):
"""
Deletes a conversation
"""
def delete_conversations(driver, year=None):
return
def traverse_conversations(driver, year=None):
"""
Remove all conversations within a specified range
"""
@ -133,15 +131,18 @@ def delete_conversations(driver, year=None):
convos = get_conversations(driver)
for convo in convos:
# If the year is set and there is a date
# Then we want to only look at convos from this year
with archiver("conversations") as archive_convo:
for convo in convos:
# If the year is set and there is a date
# Then we want to only look at convos from this year
if year and convo.date:
if convo.date.year == int(year):
convo.messages = get_messages(driver, convo)
archive_convo.archive(convo)
if year and convo.date:
if convo.date.year == int(year):
archive_conversation(driver, convo)
print(convo.messages)
# Otherwise we're looking at all convos
elif not year:
convo.messages = get_messages(driver, convo)
archive_convo.archive(convo)
# Otherwise we're looking at all convos
elif not year:
archive_conversation(driver, convo)

6
deletefb/types.py

@ -2,6 +2,8 @@ import attr
import uuid
import pendulum
from datetime import datetime
def convert_date(text):
"""
Tries to parse a date into a DateTime instance
@ -35,7 +37,7 @@ class Comment:
class Conversation:
url = attr.ib()
name = attr.ib()
date = attr.ib(converter=convert_date)
date : datetime = attr.ib(converter=convert_date)
messages = attr.ib(default=[])
@attr.s
@ -44,7 +46,7 @@ class Message:
content = attr.ib()
# Remove the last 3 digits from FB's dates. They are not standard.
date = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
date : datetime = attr.ib(converter=lambda t: pendulum.from_timestamp(int(str(t)[0:-3])))
@attr.s
class Page:

1
setup.py

@ -25,6 +25,7 @@ setuptools.setup(
"requests",
"pybloom-live",
"attrs",
"cattrs",
"lxml",
"pendulum"
],

Loading…
Cancel
Save