Browse Source

Refactor timestamp parsing

pull/72/head
Wesley Kerfoot 6 years ago
parent
commit
0355ebcc66
  1. 6
      deletefb/tools/common.py
  2. 34
      deletefb/tools/conversations.py
  3. 24
      deletefb/types.py
  4. 12
      requirements.txt
  5. 3
      setup.py

6
deletefb/tools/common.py

@ -5,13 +5,12 @@ from selenium.common.exceptions import (
TimeoutException, TimeoutException,
JavascriptException JavascriptException
) )
from arrow.parser import ParserError
import json import json
import logging import logging
import logging.config import logging.config
import os import os
import arrow import pendulum
SELENIUM_EXCEPTIONS = ( SELENIUM_EXCEPTIONS = (
NoSuchElementException, NoSuchElementException,
@ -34,9 +33,6 @@ def scroll_to(driver, el):
except SELENIUM_EXCEPTIONS: except SELENIUM_EXCEPTIONS:
return return
def parse_ts(text):
return arrow.get(text, "DD/M/YYYY")
def logger(name): def logger(name):
""" """
Args: Args:

34
deletefb/tools/conversations.py

@ -1,6 +1,6 @@
from .archive import archiver from .archive import archiver
from ..types import Conversation from ..types import Conversation
from .common import SELENIUM_EXCEPTIONS, logger, parse_ts, ParserError from .common import SELENIUM_EXCEPTIONS, logger
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
@ -25,26 +25,32 @@ def get_conversations(driver):
LOG.exception("No conversations") LOG.exception("No conversations")
return return
# This function *cannot* be a generator
# Otherwise elements will become stale
conversations = []
while True: while True:
for convo in driver.find_elements_by_xpath("//a"): for convo in driver.find_elements_by_xpath("//a"):
url = convo.get_attribute("href") url = convo.get_attribute("href")
timestamp = None timestamp = None
if url and "messages/read" in url: if url and "messages/read" in url:
try:
timestamp = parse_ts(convo.find_element_by_xpath("../../..//abbr").text)
except ParserError:
print("Failed to parse timestamp")
continue
timestamp = convo.find_element_by_xpath("../../..//abbr").text
conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip() conversation_name = convo.find_element_by_xpath("../../../div/div/header/h3").text.strip()
assert(conversation_name) assert(conversation_name)
assert(url) assert(url)
yield Conversation(url=url, conversations.append(
name=conversation_name, Conversation(
timestamp=timestamp) url=url,
timestamp=timestamp,
name=conversation_name
)
)
try: try:
next_url = (driver.find_element_by_id("see_older_threads"). next_url = (driver.find_element_by_id("see_older_threads").
find_element_by_xpath("a"). find_element_by_xpath("a").
@ -56,16 +62,16 @@ def get_conversations(driver):
break break
driver.get(next_url) driver.get(next_url)
def delete_conversations(driver): return conversations
def delete_conversations(driver, older_than=None):
""" """
Remove all conversations within a specified range Remove all conversations within a specified range
""" """
driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1") driver.get("https://mobile.facebook.com/messages/?pageNum=1&selectable&see_older_newer=1")
convos = list(get_conversations(driver)) convos = get_conversations(driver)
for convo in convos: for convo in convos:
print(convo.url) print(convo)
print(convo.name)
print(convo.timestamp)

24
deletefb/types.py

@ -1,35 +1,39 @@
import attr import attr
import uuid import uuid
import datetime import pendulum
def timestamp_now(): def convert_timestamp(text):
""" try:
Returns: a timestamp for this instant, in ISO 8601 format return pendulum.from_format(text, "DD/M/YYYY")
""" except ValueError:
return datetime.datetime.isoformat(datetime.datetime.now()) try:
return (pendulum.from_format(text, "DD MMM")
.set(year=pendulum.now().year))
except ValueError:
return None
# Data type definitions of posts and comments # Data type definitions of posts and comments
@attr.s @attr.s
class Post: class Post:
content = attr.ib() content = attr.ib()
comments = attr.ib(default=[]) comments = attr.ib(default=[])
date = attr.ib(factory=timestamp_now) date = attr.ib(factory=pendulum.now)
name = attr.ib(factory=lambda: uuid.uuid4().hex) name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s @attr.s
class Comment: class Comment:
commenter = attr.ib() commenter = attr.ib()
content = attr.ib() content = attr.ib()
date = attr.ib(factory=timestamp_now) date = attr.ib(factory=pendulum.now)
name = attr.ib(factory=lambda: uuid.uuid4().hex) name = attr.ib(factory=lambda: uuid.uuid4().hex)
@attr.s @attr.s
class Conversation: class Conversation:
url = attr.ib() url = attr.ib()
name = attr.ib() name = attr.ib()
timestamp = attr.ib(default=None) timestamp = attr.ib(converter=convert_timestamp)
@attr.s @attr.s
class Page: class Page:
name = attr.ib() name = attr.ib()
date = attr.ib(factory=timestamp_now) date = attr.ib(factory=pendulum.now)

12
requirements.txt

@ -1,13 +1,25 @@
attrs==19.1.0 attrs==19.1.0
bitarray==0.9.3 bitarray==0.9.3
bleach==3.1.0
certifi==2018.11.29 certifi==2018.11.29
chardet==3.0.4 chardet==3.0.4
docutils==0.14
idna==2.8 idna==2.8
pendulum==2.0.5
pkginfo==1.5.0.1
pybloom-live==3.0.0 pybloom-live==3.0.0
Pygments==2.4.2
python-dateutil==2.8.0
pytzdata==2019.2
readme-renderer==24.0
requests==2.22.0 requests==2.22.0
requests-file==1.4.3 requests-file==1.4.3
requests-toolbelt==0.9.1
selenium==3.141.0 selenium==3.141.0
selenium-requests==1.3 selenium-requests==1.3
six==1.12.0 six==1.12.0
tldextract==2.2.0 tldextract==2.2.0
tqdm==4.32.2
twine==1.13.0
urllib3==1.25.2 urllib3==1.25.2
webencodings==0.5.1

3
setup.py

@ -24,7 +24,8 @@ setuptools.setup(
"selenium-requests", "selenium-requests",
"requests", "requests",
"pybloom-live", "pybloom-live",
"attrs" "attrs",
"pendulum"
], ],
classifiers= [ classifiers= [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",

Loading…
Cancel
Save