[tor-commits] [gettor/master] Add twitter service with parsing
hiro at torproject.org
hiro at torproject.org
Tue Oct 15 14:02:16 UTC 2019
commit 2082fa2b00c3611649f160e27754064f0459f025
Author: hiro <hiro at torproject.org>
Date: Thu Oct 10 16:18:07 2019 +0200
Add twitter service with parsing
---
gettor.conf.json.example | 5 +-
gettor/main.py | 10 +++
gettor/parse/twitter.py | 148 ++++++++++++++++++++++++++++++
gettor/services/twitter/twitterdm.py | 168 +++++++++++++++++++++++++++++++----
gettor/utils/twitter.py | 61 +++++++++++++
requirements.txt | 2 +-
share/locale/es.json | 2 +-
tests/conftests.py | 3 +
tests/test_twitter.py | 36 ++++++++
9 files changed, 415 insertions(+), 20 deletions(-)
diff --git a/gettor.conf.json.example b/gettor.conf.json.example
index 5c06e37..76450ce 100644
--- a/gettor.conf.json.example
+++ b/gettor.conf.json.example
@@ -3,6 +3,7 @@
"dbname": "/srv/gettor.torproject.org/home/gettor/gettor.db",
"email_parser_logfile": "/srv/gettor.torproject.org/home/gettor/log/email_parser.log",
"email_requests_limit": 30,
+ "twitter_requests_limit": 1,
"sendmail_interval": 10,
"sendmail_addr": "gettor at torproject.org",
"sendmail_host": "localhost",
@@ -11,5 +12,7 @@
"consumer_secret": "",
"access_key": "",
"access_secret": "",
- "twitter_handle": "get_tor"
+ "twitter_handle": "get_tor",
+ "twitter_messages_endpoint": "https://api.twitter.com/1.1/direct_messages/events/list.json",
+ "twitter_new_message_endpoint": "https://api.twitter.com/1.1/direct_messages/events/new.json"
}
diff --git a/gettor/main.py b/gettor/main.py
index 98a2de3..f914deb 100644
--- a/gettor/main.py
+++ b/gettor/main.py
@@ -19,6 +19,7 @@ from .utils import options
from .services import BaseService
from .services.email.sendmail import Sendmail
+from .services.twitter.twitterdm import Twitterdm
def run(gettor, app):
"""
@@ -36,3 +37,12 @@ def run(gettor, app):
gettor.addService(sendmail_service)
gettor.setServiceParent(app)
+
+
+ twitter_service = BaseService(
+ "twitterdm", twitterdm.get_interval(), twitterdm
+ )
+
+ gettor.addService(twitter_service)
+
+ gettor.setServiceParent(app)
diff --git a/gettor/parse/twitter.py b/gettor/parse/twitter.py
new file mode 100644
index 0000000..ea7ad81
--- /dev/null
+++ b/gettor/parse/twitter.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of GetTor, a Tor Browser distribution system.
+#
+# :authors: isra <hiro at torproject.org>
+# see also AUTHORS file
+#
+# :copyright: (c) 2008-2014, The Tor Project, Inc.
+# (c) 2019, Hiro
+#
+# :license: This is Free Software. See LICENSE for license information.
+
+from __future__ import absolute_import
+
+import re
+import dkim
+import hashlib
+
+from datetime import datetime
+import configparser
+
+from twisted.python import log
+from twisted.internet import defer
+from twisted.enterprise import adbapi
+
+from ..utils.db import SQLite3
+from ..utils import strings
+
+
+class TwitterParser(object):
+ """Class for parsing twitter message requests."""
+
+ def __init__(self, settings, twitter_id=None):
+ """
+ Constructor.
+ """
+ self.settings = settings
+ self.twitter_id = twitter_id
+
+
+ def build_request(self, msg_text, twitter_id, languages, platforms):
+
+ request = {
+ "id": twitter_id,
+ "command": None,
+ "platform": None,
+ "language": "en",
+ "service": "twitter"
+ }
+
+ if msg_text:
+ for word in re.split(r"\s+", msg_text.strip()):
+ if word.lower() in languages:
+ request["language"] = word.lower()
+ if word.lower() in platforms:
+ request["command"] = "links"
+ request["platform"] = word.lower()
+ if word.lower() == "help":
+ request["command"] = "help"
+ break
+
+ return request
+
+
+ def parse(self, msg, twitter_id):
+ """
+ Parse message content. Prevent service flooding. Finally, look for
+ commands to process the request. Current commands are:
+
+ - links: request links for download.
+ - help: help request.
+
+ :param msg_str (str): incomming message as string.
+
+ :return dict with email address and command (`links` or `help`).
+ """
+
+ log.msg("Building twitter message from string.", system="twitter parser")
+
+ platforms = self.settings.get("platforms")
+ languages = [*strings.get_locales().keys()]
+
+ hid = hashlib.sha256(twitter_id.encode('utf-8'))
+ log.msg(
+ "Request from {}".format(hid.hexdigest()), system="twitter parser"
+ )
+
+ request = self.build_request(msg, twitter_id, languages, platforms)
+
+ return request
+
+
+ @defer.inlineCallbacks
+ def parse_callback(self, request):
+ """
+ Callback invoked when the message has been parsed. It stores the
+ obtained information in the database for further processing by the
+ Sendmail service.
+
+ :param (dict) request: the built request based on message's content.
+ It contains the `email_addr` and command `fields`.
+
+ :return: deferred whose callback/errback will log database query
+ execution details.
+ """
+ twitter_requests_limit = self.settings.get("twitter_requests_limit")
+ log.msg(
+ "Found request for {}.".format(request['command']),
+ system="twitter parser"
+ )
+
+ if request["command"]:
+ now_str = datetime.now().strftime("%Y%m%d%H%M%S")
+ dbname = self.settings.get("dbname")
+ conn = SQLite3(dbname)
+
+ hid = hashlib.sha256(request['id'].encode('utf-8'))
+ # check limits first
+ num_requests = yield conn.get_num_requests(
+ id=hid.hexdigest(), service=request['service']
+ )
+
+ if num_requests[0][0] > twitter_requests_limit:
+ log.msg(
+ "Discarded. Too many requests from {}.".format(
+ hid.hexdigest
+ ), system="twitter parser"
+ )
+
+ else:
+ conn.new_request(
+ id=request['id'],
+ command=request['command'],
+ platform=request['platform'],
+ language=request['language'],
+ service=request['service'],
+ date=now_str,
+ status="ONHOLD",
+ )
+
+ def parse_errback(self, error):
+ """
+ Errback if we don't/can't parse the message's content.
+ """
+ log.msg(
+ "Error while parsing twitter message content: {}.".format(error),
+ system="twitter parser"
+ )
diff --git a/gettor/services/twitter/twitterdm.py b/gettor/services/twitter/twitterdm.py
index 7f8543e..b346946 100644
--- a/gettor/services/twitter/twitterdm.py
+++ b/gettor/services/twitter/twitterdm.py
@@ -15,10 +15,13 @@ from __future__ import absolute_import
import gettext
import hashlib
+
import configparser
from twisted.internet import defer
+from ...parse.twitter import TwitterParser
+from ...utils.twitter import Twitter
from ...utils.db import SQLite3 as DB
from ...utils.commons import log
from ...utils import strings
@@ -31,17 +34,14 @@ class Twitterdm(object):
"""
Constructor. It opens and stores a connection to the database.
:dbname: reads from configs
+
"""
self.settings = settings
dbname = self.settings.get("dbname")
- consumer_key = self.settings.get("consumer_key")
- consumer_secret = self.settings.get("consumer_secret")
- access_key = self.settings.get("access_key")
- access_secret = self.settings.get("access_secret")
- twitter_handle = self.settings.get("twitter_handle")
-
+ self.twitter = Twitter(settings)
self.conn = DB(dbname)
+
def get_interval(self):
"""
Get time interval for service periodicity.
@@ -59,6 +59,7 @@ class Twitterdm(object):
"""
log.info("Message sent successfully.")
+
def twitter_errback(self, error):
"""
Errback if we don't/can't send the message.
@@ -67,25 +68,158 @@ class Twitterdm(object):
raise Error("{}".format(error))
- def twitter_msg_list(self):
-
-
-
- def twitterdm(self):
+ def twitterdm(self, twitter_id, message):
"""
Send a twitter message for each message received. It creates a plain
text message, and sends it via twitter APIs
- :param twitter_handle (str): email address of the recipient.
- :param text (str): subject of the message.
+ :param twitter_id (str): twitter_id of the recipient.
+ :param message (str): text of the message.
:return: deferred whose callback/errback will handle the API execution
details.
"""
- log.debug("Retrieve list of messages")
-
- log.debug("Creating message")
+ return self.twitter.post_message(
+ twitter_id, message
+ ).addCallback(self.twitterdm_callback).addErrback(self.twitterdm_errback)
+ @defer.inlineCallbacks
+ def get_new(self):
+ """
+ Get new requests to process. This will define the `main loop` of
+ the Twitter service.
+ """
- log.debug("Calling twitter APIs.")
+ log.debug("Retrieve list of messages")
+ data = self.twitter.twitter_data()
+
+ for e in data['events']:
+
+ message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] }
+
+ log.debug("Parsing message")
+ tp = TwitterParser(settings, message_id)
+ yield defer.maybeDeferred(
+ tp.parse, e['message_create']['message_data']['text'], message_id
+ ).addCallback(tp.parse_callback).addErrback(tp.parse_errback)
+
+ # Manage help and links messages separately
+ help_requests = yield self.conn.get_requests(
+ status="ONHOLD", command="help", service="twitter"
+ )
+
+ link_requests = yield self.conn.get_requests(
+ status="ONHOLD", command="links", service="twtter"
+ )
+
+ if help_requests:
+ strings.load_strings("en")
+ try:
+ log.info("Got new help request.")
+
+ for request in help_requests:
+ ids = json.load(request[0])
+ message_id = ids['id']
+ twitter_id = ids['twitter_handle']
+ date = request[5]
+
+ hid = hashlib.sha256(twitter_id.encode('utf-8'))
+ log.info(
+ "Sending help message to {}.".format(
+ hid.hexdigest()
+ )
+ )
+
+ yield self.twitterdm(
+ twitter_id=twitter_id,
+ body=strings._("help_body")
+ )
+
+ yield self.conn.update_stats(
+ command="help", platform='', language='en',
+ service="twitter"
+ )
+
+ yield self.conn.update_request(
+ id=request[0], hid=hid.hexdigest(), status="SENT",
+ service="twitter", date=date
+ )
+
+ except Error as e:
+ log.info("Error sending twitter message: {}.".format(e))
+
+ elif link_requests:
+ try:
+ log.info("Got new links request.")
+
+ for request in link_requests:
+ ids = json.load(request[0])
+ message_id = ids['id']
+ twitter_id = ids['twitter_handle']
+ date = request[5]
+ platform = request[2]
+ language = request[3]
+
+ if not language:
+ language = 'en'
+
+ locales = strings.get_locales()
+
+ strings.load_strings(language)
+ locale = locales[language]['locale']
+
+ log.info("Getting links for {}.".format(platform))
+ links = yield self.conn.get_links(
+ platform=platform, language=locale, status="ACTIVE"
+ )
+
+ # build message
+ link_msg = None
+ for link in links:
+ provider = link[5]
+ version = link[4]
+ arch = link[3]
+ url = link[0]
+ file = link[7]
+ sig_url = url + ".asc"
+
+ link_str = "Tor Browser {} for {}-{}-{} ({}): {}\n".format(
+ version, platform, locale, arch, provider, url
+ )
+
+ link_str += "Signature file: {}\n".format(sig_url)
+
+ if link_msg:
+ link_msg = "{}\n{}".format(link_msg, link_str)
+ else:
+ link_msg = link_str
+
+ body_msg = strings._("links_body").format(platform, link_msg, file)
+
+ hid = hashlib.sha256(twitter_id.encode('utf-8'))
+ log.info(
+ "Sending links to {}.".format(
+ hid.hexdigest()
+ )
+ )
+
+ yield self.twitterdm(
+ email_addr=twitter_id,
+ body=body_msg
+ )
+
+ yield self.conn.update_stats(
+ command="links", platform=platform, language=locale,
+ service="twitter"
+ )
+
+ yield self.conn.update_request(
+ id=request[0], hid=hid.hexdigest(), status="SENT",
+ service="twitter", date=date
+ )
+
+ except Error as e:
+ log.info("Error sending message: {}.".format(e))
+ else:
+ log.debug("No pending twitter requests. Keep waiting.")
diff --git a/gettor/utils/twitter.py b/gettor/utils/twitter.py
new file mode 100644
index 0000000..2e3968b
--- /dev/null
+++ b/gettor/utils/twitter.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of GetTor, a Tor Browser distribution system.
+#
+# :authors: isra <hiro at torproject.org>
+# see also AUTHORS file
+#
+# :copyright: (c) 2008-2014, The Tor Project, Inc.
+# (c) 2019, Hiro
+#
+# :license: This is Free Software. See LICENSE for license information.
+
+from requests_oauthlib import OAuth1Session
+import json
+
+class Twitter(object):
+ """
+ Class for sending twitter commands via the API.
+ """
+ def __init__(self, settings):
+ """
+ Constructor.
+
+ """
+ self.settings = settings
+
+ consumer_key = self.settings.get("consumer_key")
+ consumer_secret = self.settings.get("consumer_secret")
+ access_key = self.settings.get("access_key")
+ access_secret = self.settings.get("access_secret")
+ twitter_handle = self.settings.get("twitter_handle")
+
+ self.twitter_messages_endpoint = self.settings.get("twitter_messages_endpoint")
+ self.twitter_new_message_endpoint = self.settings.get("twitter_new_message_endpoint")
+ self.twitter_client = self.twitter_oauth(consumer_key, consumer_secret, access_key, access_secret)
+
+ def twitter_oauth(self, consumer_key, consumer_secret, access_key, access_secret):
+ tw_client = OAuth1Session(client_key=consumer_key,
+ client_secret=consumer_secret,
+ resource_owner_key=access_key,
+ resource_owner_secret=access_secret)
+ return tw_client
+
+
+ def twitter_data(self):
+ data = self.twitter_client.get(self.twitter_messages_endpoint)
+ return data.json()
+
+
+ def post_message(self, twitter_id, text):
+ message = {
+ "event": {
+ "type": "message_create",
+ "message_create": {
+ "target": {"recipient_id": twitter_id },
+ "message_data": {"text": text }
+ }
+ }
+ }
+
+ data = self.twitter_client.post(self.twitter_new_message_endpoint, message)
diff --git a/requirements.txt b/requirements.txt
index a6150bc..fc786a5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,5 +2,5 @@ service_identity==18.1.0
pydkim==0.3
pyopenssl==19.0.0
dnspython==1.16.0
-validate_email==1.3
twisted==19.2.1
+requests_oauthlib=1.0.0
diff --git a/share/locale/es.json b/share/locale/es.json
index 8ed6722..4d18f61 100644
--- a/share/locale/es.json
+++ b/share/locale/es.json
@@ -7,7 +7,7 @@
"help_config": "Custom config file location (optional)",
"smtp_links_subject": "[GetTor] Links for your request",
"smtp_mirrors_subject": "[GetTor] Mirrors",
- "smtp_help_subject": "[GetTor] Help",
+ "smtp_help_subject": "[GetTor] Ayuda",
"smtp_unsupported_locale_subject": "[GetTor] Unsupported locale",
"smtp_unsupported_locale_msg": "The locale you requested '{}' is not supported.",
"smtp_vlinks_msg": "You requested Tor Browser for {}.\n\nYou will need only one of the links below to download the bundle. If a link does not work for you, try the next one.\n\n{}\n\nShould you have issues with any of the links above you can access the following Google Drive folder: https://drive.google.com/open?id=13CADQTsCwrGsIID09YQbNz2DfRMUoxUU\n\n Download the file: {}\n\n \n--\nGetTor",
diff --git a/tests/conftests.py b/tests/conftests.py
index 1f73f21..f5194a5 100644
--- a/tests/conftests.py
+++ b/tests/conftests.py
@@ -4,8 +4,11 @@ from __future__ import unicode_literals
from gettor.utils import options
from gettor.utils import strings
+from gettor.utils import twitter
from gettor.services.email import sendmail
+from gettor.services.twitter import twitterdm
from gettor.parse.email import EmailParser, AddressError, DKIMError
+from gettor.parse.twitter import TwitterParser
from email import message_from_string
from email.utils import parseaddr
diff --git a/tests/test_twitter.py b/tests/test_twitter.py
new file mode 100644
index 0000000..fe155cc
--- /dev/null
+++ b/tests/test_twitter.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+import pytest
+from twisted.trial import unittest
+from twisted.internet import defer, reactor
+from twisted.internet import task
+
+from . import conftests
+
+class TwitterTests(unittest.TestCase):
+ # Fail any tests which take longer than 15 seconds.
+ timeout = 15
+ def setUp(self):
+ self.settings = conftests.options.parse_settings()
+ self.tw_client = conftests.twitter.Twitter(self.settings)
+
+
+ def tearDown(self):
+ print("tearDown()")
+
+
+ def test_load_messages(self):
+ data = self.tw_client.twitter_data()
+ assert data['events']
+
+
+ def test_parse_tweet(self):
+ e = {'type': 'message_create', 'id': '1178649287208689669', 'created_timestamp': '1569846862972', 'message_create': {'target': {'recipient_id': '2514714800'}, 'sender_id': '1467062174', 'message_data': {'text': 'windows 10', 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}}}}
+ message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] }
+ message = e['message_create']['message_data']['text']
+ tp = conftests.TwitterParser(self.settings, message_id)
+ r = tp.parse(message, str(message_id))
+ self.assertEqual(r, {'command': 'links', 'id': "{'id': '1178649287208689669', 'twitter_handle': '1467062174'}", 'language': 'en', 'platform': 'windows','service': 'twitter'})
+
+
+if __name__ == "__main__":
+ unittest.main()
More information about the tor-commits
mailing list