Browse Source

Refactor timestamp parsing

pull/72/head
Wesley Kerfoot 5 years ago
parent
commit
0355ebcc66
  1. 6
      deletefb/tools/common.py
  2. 34
      deletefb/tools/conversations.py
  3. 24
      deletefb/types.py
  4. 12
      requirements.txt
  5. 3
      setup.py

6
deletefb/tools/common.py

@ -5,13 +5,12 @@ from selenium.common.exceptions import (
TimeoutException,
JavascriptException
)
from arrow.parser import ParserError
import json
import logging
import logging.config
import os
import arrow
import pendulum
SELENIUM_EXCEPTIONS = (
NoSuchElementException,
@ -34,9 +33,6 @@ def scroll_to(driver, el):
except SELENIUM_EXCEPTIONS:
return
def parse_ts(text):
return arrow.get(text, "DD/M/YYYY")
def logger(name):
"""
Args:

34
deletefb/tools/conversations.py

@ -1,6 +1,6 @@
from .archive import archiver
from ..types import Conversation
from .common import SELENIUM_EXCEPTIONS, logger, parse_ts, ParserError
from .common import SELENIUM_EXCEPTIONS, logger
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
@ -25,26 +25,32 @@ def get_conversations(driver):
LOG.exception("No conversations")
return
# This function *cannot* be a generator
# Otherwise elements will become stale
conversations = []
while True:
for convo in driver.find_elements_by_xpath("//a"):
url = convo.get_attribute("href")
timestamp = None
if url and "messages/read" in url:
try:
timestamp = parse_ts(convo.find_element_by_xpath("../../..//abbr").text)
except ParserError:
print("Failed to parse timestamp")
continue
timestamp = convo.find_element_by_xpath("../../..//abbr").text
conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip()
assert(conversation_name)
assert(url)
yield Conversation(url=url,
name=conversation_name,
timestamp=timestamp)
conversations.append(
Conversation(
url=url,
timestamp=timestamp,
name=conversation_name
)
)
try:
next_url = (driver.find_element_by_id("see_older_threads").
find_element_by_xpath("a").
@ -56,16 +62,16 @@ def get_conversations(driver):
break
driver.get(next_url)
def delete_conversations(driver):
return conversations
def delete_conversations(driver, older_than=None):
"""
Remove all conversations within a specified range
"""
driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1")
convos = list(get_conversations(driver))
convos = get_conversations(driver)
for convo in convos:
print(convo.url)
print(convo.name)
print(convo.timestamp)
print(convo)

24
deletefb/types.py

@ -1,35 +1,39 @@
import attr
import uuid
import datetime
import pendulum
def timestamp_now():
"""
Returns: a timestamp for this instant, in ISO 8601 format
"""
return datetime.datetime.isoformat(datetime.datetime.now())
def convert_timestamp(text):
try:
return pendulum.from_format(text, "DD/M/YYYY")
except ValueError:
try:
return (pendulum.from_format(text, "DD MMM")
.set(year=pendulum.now().year))
except ValueError:
return None
# Data type definitions of posts and comments
@attr.s
class Post:
content = attr.ib()
comments = attr.ib(default=[])
date = attr.ib(factory=timestamp_now)
date = attr.ib(factory=pendulum.now)
name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
class Comment:
commenter = attr.ib()
content = attr.ib()
date = attr.ib(factory=timestamp_now)
date = attr.ib(factory=pendulum.now)
name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s
class Conversation:
url = attr.ib()
name = attr.ib()
timestamp = attr.ib(default=None)
timestamp = attr.ib(converter=convert_timestamp)
@attr.s
class Page:
name = attr.ib()
date = attr.ib(factory=timestamp_now)
date = attr.ib(factory=pendulum.now)

12
requirements.txt

@ -1,13 +1,25 @@
attrs==19.1.0
bitarray==0.9.3
bleach==3.1.0
certifi==2018.11.29
chardet==3.0.4
docutils==0.14
idna==2.8
pendulum==2.0.5
pkginfo==1.5.0.1
pybloom-live==3.0.0
Pygments==2.4.2
python-dateutil==2.8.0
pytzdata==2019.2
readme-renderer==24.0
requests==2.22.0
requests-file==1.4.3
requests-toolbelt==0.9.1
selenium==3.141.0
selenium-requests==1.3
six==1.12.0
tldextract==2.2.0
tqdm==4.32.2
twine==1.13.0
urllib3==1.25.2
webencodings==0.5.1

3
setup.py

@ -24,7 +24,8 @@ setuptools.setup(
"selenium-requests",
"requests",
"pybloom-live",
"attrs"
"attrs",
"pendulum"
],
classifiers= [
"Programming Language :: Python :: 3",

Loading…
Cancel
Save