[tor-commits] [gettor/master] Add twitter service with parsing

hiro at torproject.org hiro at torproject.org
Tue Oct 15 14:02:16 UTC 2019


commit 2082fa2b00c3611649f160e27754064f0459f025
Author: hiro <hiro at torproject.org>
Date:   Thu Oct 10 16:18:07 2019 +0200

    Add twitter service with parsing
---
 gettor.conf.json.example             |   5 +-
 gettor/main.py                       |  10 +++
 gettor/parse/twitter.py              | 148 ++++++++++++++++++++++++++++++
 gettor/services/twitter/twitterdm.py | 168 +++++++++++++++++++++++++++++++----
 gettor/utils/twitter.py              |  61 +++++++++++++
 requirements.txt                     |   2 +-
 share/locale/es.json                 |   2 +-
 tests/conftests.py                   |   3 +
 tests/test_twitter.py                |  36 ++++++++
 9 files changed, 415 insertions(+), 20 deletions(-)

diff --git a/gettor.conf.json.example b/gettor.conf.json.example
index 5c06e37..76450ce 100644
--- a/gettor.conf.json.example
+++ b/gettor.conf.json.example
@@ -3,6 +3,7 @@
   "dbname": "/srv/gettor.torproject.org/home/gettor/gettor.db",
   "email_parser_logfile": "/srv/gettor.torproject.org/home/gettor/log/email_parser.log",
   "email_requests_limit": 30,
+  "twitter_requests_limit": 1,
   "sendmail_interval": 10,
   "sendmail_addr": "gettor at torproject.org",
   "sendmail_host": "localhost",
@@ -11,5 +12,7 @@
   "consumer_secret": "",
   "access_key": "",
   "access_secret": "",
-  "twitter_handle": "get_tor"
+  "twitter_handle": "get_tor",
+  "twitter_messages_endpoint": "https://api.twitter.com/1.1/direct_messages/events/list.json",
+  "twitter_new_message_endpoint": "https://api.twitter.com/1.1/direct_messages/events/new.json"
 }
diff --git a/gettor/main.py b/gettor/main.py
index 98a2de3..f914deb 100644
--- a/gettor/main.py
+++ b/gettor/main.py
@@ -19,6 +19,7 @@ from .utils import options
 
 from .services import BaseService
 from .services.email.sendmail import Sendmail
+from .services.twitter.twitterdm import Twitterdm
 
 def run(gettor, app):
     """
@@ -36,3 +37,12 @@ def run(gettor, app):
     gettor.addService(sendmail_service)
 
     gettor.setServiceParent(app)
+
+
+    twitter_service = BaseService(
+        "twitterdm", twitterdm.get_interval(), twitterdm
+    )
+
+    gettor.addService(twitter_service)
+
+    gettor.setServiceParent(app)
diff --git a/gettor/parse/twitter.py b/gettor/parse/twitter.py
new file mode 100644
index 0000000..ea7ad81
--- /dev/null
+++ b/gettor/parse/twitter.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of GetTor, a Tor Browser distribution system.
+#
+# :authors: isra <hiro at torproject.org>
+#           see also AUTHORS file
+#
+# :copyright:   (c) 2008-2014, The Tor Project, Inc.
+#               (c) 2019, Hiro
+#
+# :license: This is Free Software. See LICENSE for license information.
+
+from __future__ import absolute_import
+
+import re
+import dkim
+import hashlib
+
+from datetime import datetime
+import configparser
+
+from twisted.python import log
+from twisted.internet import defer
+from twisted.enterprise import adbapi
+
+from ..utils.db import SQLite3
+from ..utils import strings
+
+
+class TwitterParser(object):
+    """Class for parsing twitter message requests."""
+
+    def __init__(self, settings, twitter_id=None):
+        """
+        Constructor.
+        """
+        self.settings = settings
+        self.twitter_id = twitter_id
+
+
+    def build_request(self, msg_text, twitter_id, languages, platforms):
+
+        request = {
+            "id": twitter_id,
+            "command": None,
+            "platform": None,
+            "language": "en",
+            "service": "twitter"
+        }
+
+        if msg_text:
+            for word in re.split(r"\s+", msg_text.strip()):
+                if word.lower() in languages:
+                    request["language"] = word.lower()
+                if word.lower() in platforms:
+                    request["command"] = "links"
+                    request["platform"] = word.lower()
+                if word.lower() == "help":
+                    request["command"] = "help"
+                    break
+
+        return request
+
+
+    def parse(self, msg, twitter_id):
+        """
+        Parse message content. Prevent service flooding. Finally, look for
+        commands to process the request. Current commands are:
+
+            - links: request links for download.
+            - help: help request.
+
+        :param msg_str (str): incomming message as string.
+
+        :return dict with email address and command (`links` or `help`).
+        """
+
+        log.msg("Building twitter message from string.", system="twitter parser")
+
+        platforms = self.settings.get("platforms")
+        languages = [*strings.get_locales().keys()]
+
+        hid = hashlib.sha256(twitter_id.encode('utf-8'))
+        log.msg(
+            "Request from {}".format(hid.hexdigest()), system="twitter parser"
+        )
+
+        request = self.build_request(msg, twitter_id, languages, platforms)
+
+        return request
+
+
+    @defer.inlineCallbacks
+    def parse_callback(self, request):
+        """
+        Callback invoked when the message has been parsed. It stores the
+        obtained information in the database for further processing by the
+        Sendmail service.
+
+        :param (dict) request: the built request based on message's content.
+        It contains the `email_addr` and command `fields`.
+
+        :return: deferred whose callback/errback will log database query
+        execution details.
+        """
+        twitter_requests_limit = self.settings.get("twitter_requests_limit")
+        log.msg(
+            "Found request for {}.".format(request['command']),
+            system="twitter parser"
+        )
+
+        if request["command"]:
+            now_str = datetime.now().strftime("%Y%m%d%H%M%S")
+            dbname = self.settings.get("dbname")
+            conn = SQLite3(dbname)
+
+            hid = hashlib.sha256(request['id'].encode('utf-8'))
+            # check limits first
+            num_requests = yield conn.get_num_requests(
+                id=hid.hexdigest(), service=request['service']
+            )
+
+            if num_requests[0][0] > twitter_requests_limit:
+                log.msg(
+                    "Discarded. Too many requests from {}.".format(
+                        hid.hexdigest
+                    ), system="twitter parser"
+            )
+
+            else:
+                conn.new_request(
+                    id=request['id'],
+                    command=request['command'],
+                    platform=request['platform'],
+                    language=request['language'],
+                    service=request['service'],
+                    date=now_str,
+                    status="ONHOLD",
+                )
+
+    def parse_errback(self, error):
+        """
+        Errback if we don't/can't parse the message's content.
+        """
+        log.msg(
+            "Error while parsing twitter message content: {}.".format(error),
+            system="twitter parser"
+        )
diff --git a/gettor/services/twitter/twitterdm.py b/gettor/services/twitter/twitterdm.py
index 7f8543e..b346946 100644
--- a/gettor/services/twitter/twitterdm.py
+++ b/gettor/services/twitter/twitterdm.py
@@ -15,10 +15,13 @@ from __future__ import absolute_import
 import gettext
 import hashlib
 
+
 import configparser
 
 from twisted.internet import defer
 
+from ...parse.twitter import TwitterParser
+from ...utils.twitter import Twitter
 from ...utils.db import SQLite3 as DB
 from ...utils.commons import log
 from ...utils import strings
@@ -31,17 +34,14 @@ class Twitterdm(object):
         """
         Constructor. It opens and stores a connection to the database.
         :dbname: reads from configs
+
         """
         self.settings = settings
         dbname = self.settings.get("dbname")
-        consumer_key = self.settings.get("consumer_key")
-        consumer_secret = self.settings.get("consumer_secret")
-        access_key = self.settings.get("access_key")
-        access_secret = self.settings.get("access_secret")
-        twitter_handle = self.settings.get("twitter_handle")
-
+        self.twitter = Twitter(settings)
         self.conn = DB(dbname)
 
+
     def get_interval(self):
         """
         Get time interval for service periodicity.
@@ -59,6 +59,7 @@ class Twitterdm(object):
         """
         log.info("Message sent successfully.")
 
+
     def twitter_errback(self, error):
         """
         Errback if we don't/can't send the message.
@@ -67,25 +68,158 @@ class Twitterdm(object):
         raise Error("{}".format(error))
 
 
-    def twitter_msg_list(self):
-
-
-
-    def twitterdm(self):
+    def twitterdm(self, twitter_id, message):
         """
         Send a twitter message for each message received. It creates a plain
         text message, and sends it via twitter APIs
 
-        :param twitter_handle (str): email address of the recipient.
-        :param text (str): subject of the message.
+        :param twitter_id (str): twitter_id of the recipient.
+        :param message (str): text of the message.
 
         :return: deferred whose callback/errback will handle the API execution
         details.
         """
 
-        log.debug("Retrieve list of messages")
-
-        log.debug("Creating message")
+        return self.twitter.post_message(
+            twitter_id, message
+        ).addCallback(self.twitterdm_callback).addErrback(self.twitterdm_errback)
 
+    @defer.inlineCallbacks
+    def get_new(self):
+        """
+        Get new requests to process. This will define the `main loop` of
+        the Twitter service.
+        """
 
-        log.debug("Calling twitter APIs.")
+        log.debug("Retrieve list of messages")
+        data = self.twitter.twitter_data()
+
+        for e in data['events']:
+
+            message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] }
+
+            log.debug("Parsing message")
+            tp = TwitterParser(settings, message_id)
+            yield defer.maybeDeferred(
+                tp.parse, e['message_create']['message_data']['text'], message_id
+            ).addCallback(tp.parse_callback).addErrback(tp.parse_errback)
+
+        # Manage help and links messages separately
+        help_requests = yield self.conn.get_requests(
+            status="ONHOLD", command="help", service="twitter"
+        )
+
+        link_requests = yield self.conn.get_requests(
+            status="ONHOLD", command="links", service="twtter"
+        )
+
+        if help_requests:
+            strings.load_strings("en")
+            try:
+                log.info("Got new help request.")
+
+                for request in help_requests:
+                    ids = json.load(request[0])
+                    message_id = ids['id']
+                    twitter_id = ids['twitter_handle']
+                    date = request[5]
+
+                    hid = hashlib.sha256(twitter_id.encode('utf-8'))
+                    log.info(
+                        "Sending help message to {}.".format(
+                            hid.hexdigest()
+                        )
+                    )
+
+                    yield self.twitterdm(
+                        twitter_id=twitter_id,
+                        body=strings._("help_body")
+                    )
+
+                    yield self.conn.update_stats(
+                        command="help", platform='', language='en',
+                        service="twitter"
+                    )
+
+                    yield self.conn.update_request(
+                        id=request[0], hid=hid.hexdigest(), status="SENT",
+                        service="twitter", date=date
+                    )
+
+            except Error as e:
+                log.info("Error sending twitter message: {}.".format(e))
+
+        elif link_requests:
+            try:
+                log.info("Got new links request.")
+
+                for request in link_requests:
+                    ids = json.load(request[0])
+                    message_id = ids['id']
+                    twitter_id = ids['twitter_handle']
+                    date = request[5]
+                    platform = request[2]
+                    language = request[3]
+
+                    if not language:
+                        language = 'en'
+
+                    locales = strings.get_locales()
+
+                    strings.load_strings(language)
+                    locale = locales[language]['locale']
+
+                    log.info("Getting links for {}.".format(platform))
+                    links = yield self.conn.get_links(
+                        platform=platform, language=locale, status="ACTIVE"
+                    )
+
+                    # build message
+                    link_msg = None
+                    for link in links:
+                        provider = link[5]
+                        version = link[4]
+                        arch = link[3]
+                        url = link[0]
+                        file = link[7]
+                        sig_url = url + ".asc"
+
+                        link_str = "Tor Browser {} for {}-{}-{} ({}): {}\n".format(
+                            version, platform, locale, arch, provider, url
+                        )
+
+                        link_str += "Signature file: {}\n".format(sig_url)
+
+                        if link_msg:
+                            link_msg = "{}\n{}".format(link_msg, link_str)
+                        else:
+                            link_msg = link_str
+
+                    body_msg = strings._("links_body").format(platform, link_msg, file)
+
+                    hid = hashlib.sha256(twitter_id.encode('utf-8'))
+                    log.info(
+                        "Sending links to {}.".format(
+                            hid.hexdigest()
+                        )
+                    )
+
+                    yield self.twitterdm(
+                        email_addr=twitter_id,
+                        body=body_msg
+                    )
+
+                    yield self.conn.update_stats(
+                        command="links", platform=platform, language=locale,
+                        service="twitter"
+                    )
+
+                    yield self.conn.update_request(
+                        id=request[0], hid=hid.hexdigest(), status="SENT",
+                        service="twitter", date=date
+                    )
+
+            except Error as e:
+                log.info("Error sending message: {}.".format(e))
+        else:
+            log.debug("No pending twitter requests. Keep waiting.")
diff --git a/gettor/utils/twitter.py b/gettor/utils/twitter.py
new file mode 100644
index 0000000..2e3968b
--- /dev/null
+++ b/gettor/utils/twitter.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of GetTor, a Tor Browser distribution system.
+#
+# :authors: isra <hiro at torproject.org>
+#           see also AUTHORS file
+#
+# :copyright:   (c) 2008-2014, The Tor Project, Inc.
+#               (c) 2019, Hiro
+#
+# :license: This is Free Software. See LICENSE for license information.
+
+from requests_oauthlib import OAuth1Session
+import json
+
+class Twitter(object):
+    """
+    Class for sending twitter commands via the API.
+    """
+    def __init__(self, settings):
+        """
+        Constructor.
+
+        """
+        self.settings = settings
+
+        consumer_key = self.settings.get("consumer_key")
+        consumer_secret = self.settings.get("consumer_secret")
+        access_key = self.settings.get("access_key")
+        access_secret = self.settings.get("access_secret")
+        twitter_handle = self.settings.get("twitter_handle")
+
+        self.twitter_messages_endpoint = self.settings.get("twitter_messages_endpoint")
+        self.twitter_new_message_endpoint = self.settings.get("twitter_new_message_endpoint")
+        self.twitter_client = self.twitter_oauth(consumer_key, consumer_secret, access_key, access_secret)
+
+    def twitter_oauth(self, consumer_key, consumer_secret, access_key, access_secret):
+        tw_client = OAuth1Session(client_key=consumer_key,
+                               client_secret=consumer_secret,
+                               resource_owner_key=access_key,
+                               resource_owner_secret=access_secret)
+        return tw_client
+
+
+    def twitter_data(self):
+        data = self.twitter_client.get(self.twitter_messages_endpoint)
+        return data.json()
+
+
+    def post_message(self, twitter_id, text):
+        message = {
+            "event": {
+                "type": "message_create",
+                "message_create": {
+                    "target": {"recipient_id": twitter_id },
+                    "message_data": {"text": text }
+                }
+            }
+        }
+
+        data = self.twitter_client.post(self.twitter_new_message_endpoint, message)
diff --git a/requirements.txt b/requirements.txt
index a6150bc..fc786a5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,5 +2,5 @@ service_identity==18.1.0
 pydkim==0.3
 pyopenssl==19.0.0
 dnspython==1.16.0
-validate_email==1.3
 twisted==19.2.1
+requests_oauthlib=1.0.0
diff --git a/share/locale/es.json b/share/locale/es.json
index 8ed6722..4d18f61 100644
--- a/share/locale/es.json
+++ b/share/locale/es.json
@@ -7,7 +7,7 @@
   "help_config": "Custom config file location (optional)",
   "smtp_links_subject": "[GetTor] Links for your request",
   "smtp_mirrors_subject": "[GetTor] Mirrors",
-  "smtp_help_subject": "[GetTor] Help",
+  "smtp_help_subject": "[GetTor] Ayuda",
   "smtp_unsupported_locale_subject": "[GetTor] Unsupported locale",
   "smtp_unsupported_locale_msg": "The locale you requested '{}' is not supported.",
   "smtp_vlinks_msg": "You requested Tor Browser for {}.\n\nYou will need only one of the links below to download the bundle. If a link does not work for you, try the next one.\n\n{}\n\nShould you have issues with any of the links above you can access the following Google Drive folder: https://drive.google.com/open?id=13CADQTsCwrGsIID09YQbNz2DfRMUoxUU\n\n Download the file: {}\n\n \n--\nGetTor",
diff --git a/tests/conftests.py b/tests/conftests.py
index 1f73f21..f5194a5 100644
--- a/tests/conftests.py
+++ b/tests/conftests.py
@@ -4,8 +4,11 @@ from __future__ import unicode_literals
 
 from gettor.utils import options
 from gettor.utils import strings
+from gettor.utils import twitter
 from gettor.services.email import sendmail
+from gettor.services.twitter import twitterdm
 from gettor.parse.email import EmailParser, AddressError, DKIMError
+from gettor.parse.twitter import TwitterParser
 
 from email import message_from_string
 from email.utils import parseaddr
diff --git a/tests/test_twitter.py b/tests/test_twitter.py
new file mode 100644
index 0000000..fe155cc
--- /dev/null
+++ b/tests/test_twitter.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+import pytest
+from twisted.trial import unittest
+from twisted.internet import defer, reactor
+from twisted.internet import task
+
+from . import conftests
+
+class TwitterTests(unittest.TestCase):
+    # Fail any tests which take longer than 15 seconds.
+    timeout = 15
+    def setUp(self):
+        self.settings = conftests.options.parse_settings()
+        self.tw_client = conftests.twitter.Twitter(self.settings)
+
+
+    def tearDown(self):
+        print("tearDown()")
+
+
+    def test_load_messages(self):
+        data = self.tw_client.twitter_data()
+        assert data['events']
+
+
+    def test_parse_tweet(self):
+        e = {'type': 'message_create', 'id': '1178649287208689669', 'created_timestamp': '1569846862972', 'message_create': {'target': {'recipient_id': '2514714800'}, 'sender_id': '1467062174', 'message_data': {'text': 'windows 10', 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}}}}
+        message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] }
+        message = e['message_create']['message_data']['text']
+        tp = conftests.TwitterParser(self.settings, message_id)
+        r = tp.parse(message, str(message_id))
+        self.assertEqual(r, {'command': 'links', 'id': "{'id': '1178649287208689669', 'twitter_handle': '1467062174'}", 'language': 'en', 'platform': 'windows','service': 'twitter'})
+
+
+if __name__ == "__main__":
+    unittest.main()





More information about the tor-commits mailing list