[tor-commits] [bridgedb/master] Support handing out decoy bridges to bots.
phw at torproject.org
phw at torproject.org
Tue Aug 20 16:56:57 UTC 2019
commit 7ceb25e306a5af456c4a4ba1f1f5b2a72d6eb77c
Author: Philipp Winter <phw at nymity.ch>
Date: Wed Aug 14 15:00:59 2019 -0700
Support handing out decoy bridges to bots.
This patch makes it possible to identify bots by inspecting HTTP request
headers. A CSV file, specified by BLACKLISTED_REQUEST_HEADERS_FILE,
contains mappings from request header to a regular expression of the
header's value, e.g.:
Accept-Language,[Kk]lingon
User-Agent,Spa+ce
...
Once a regular expression matches a client's request, we probably caught
a bot. This patch also makes it possible to respond to bot requests
with a decoy bridge, e.g., to study what the owners of the bot intend to
do with the bridge. Decoy bridges are configured in the CSV file
DECOY_BRIDGES_FILE. The file maps a transport type and its IP address
version to bridge lines, e.g.:
vanillav4,1.2.3.4:1234 FINGERPRINT
obfs4v4,obfs4 1.2.3.4:1234 FINGERPRINT ARGS
...
This fixes <https://bugs.torproject.org/31252>
---
CHANGELOG | 9 +++
bridgedb.conf | 19 ++++++
bridgedb/antibot.py | 123 ++++++++++++++++++++++++++++++++++
bridgedb/distributors/https/server.py | 10 +++
bridgedb/distributors/moat/server.py | 6 ++
bridgedb/main.py | 6 ++
bridgedb/test/test_antibot.py | 108 +++++++++++++++++++++++++++++
7 files changed, 281 insertions(+)
diff --git a/CHANGELOG b/CHANGELOG
index 32e6fe5..03390d6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,15 @@ Changes in version 0.8.0 - YYYY-MM-DD
Use stem instead of leekspin to create test descriptors. We now don't
need to depend on leekspin anymore.
+ * FIXES #31252 https://bugs.torproject.org/31252
+ Add an anti-bot mechanism that allows us to detect bots by matching
+ HTTP request headers for blacklisted patterns. For example, bots may
+ have their Accept-Language set to "Klingon". Blacklisted patterns are
+ configured in BLACKLISTED_REQUEST_HEADERS_FILE. When BridgeDB detects
+ a bot request, we can answer their request with a decoy bridge that's
+ only handed out to bots. Decoy bridges are configured in
+ DECOY_BRIDGES_FILE.
+
Changes in version 0.7.1 - 2019-06-07
* FIXES #28496 https://bugs.torproject.org/28496
diff --git a/bridgedb.conf b/bridgedb.conf
index ba43bb6..a0e00a8 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -301,6 +301,25 @@ PROBING_RESISTANT_TRANSPORTS = ['scramblesuit', 'obfs4']
# menu).
DEFAULT_TRANSPORT = 'obfs4'
+# HTTP headers that suggest that a request was issued by a bot. The CSV
+# file must have the following format:
+# <HEADER>,<REGEXP>
+# ...
+# For example:
+# Accept-Language,[Kk]lingon
+BLACKLISTED_REQUEST_HEADERS_FILE="blacklisted-request-headers.csv"
+
+# Decoy bridges that we are handing out to bots that we detected using the
+# regular expressions in BLACKLISTED_REQUEST_HEADERS_FILE. The CSV file must
+# have the following format:
+# <TRANSPORT>v<IP_VERSION>,<BRIDGE_LINE>
+# ...
+# For example:
+# vanillav4,1.2.3.4:1234 0123456789ABCDEF0123456789ABCDEF01234567
+# vanillav6,[::1]:1234 0123456789ABCDEF0123456789ABCDEF01234567
+# obfs4v4,obfs4 1.2.3.4:1234 public-key=... node-id=... iat-mode=...
+DECOY_BRIDGES_FILE="decoy-bridges.csv"
+
#-------------------------------
# Moat Distribution Options \
#------------------------------------------------------------------------------
diff --git a/bridgedb/antibot.py b/bridgedb/antibot.py
new file mode 100644
index 0000000..e724c68
--- /dev/null
+++ b/bridgedb/antibot.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
+# _____________________________________________________________________________
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: please see included AUTHORS file
+# :copyright: (c) 2019, The Tor Project, Inc.
+# (c) 2019, Philipp Winter
+# :license: see LICENSE for licensing information
+# _____________________________________________________________________________
+
+"""Functions for dealing with bot requests."""
+
+import re
+import logging
+
+# Maps transport types and IP version (e.g., "obfs4v4", "vanillav4", or
+# "vanillav6") to bridge lines (e.g., "1.2.3.4:1234 ...".
+DECOY_BRIDGES = {}
+
+# Maps HTTP request headers (e.g., "Accept-Language") to regular expressions
+# that suggest that the request was issued by a bot (e.g., "[Kk]lingon").
+BLACKLISTED_REQUEST_HEADERS = {}
+
+
+def _loadCSV(filename):
+ """Load and return the content of the given CSV file.
+
+ :param str filename: The filename to read.
+ :rtype: dict
+ :returns: A dictionary mapping keys (first column) to values (second
+ column).
+ """
+
+ csv = dict()
+ try:
+ with open(filename) as fh:
+ for line in fh.readlines():
+ if line.count(",") != 1:
+ logging.warning("Line must have exactly one comma: %s" %
+ line)
+ continue
+ key, value = line.split(",")
+ csv[key.strip()] = value.strip()
+ except IOError as err:
+ logging.warning("I/O error while reading from file %s: %s" %
+ (filename, err))
+
+ return csv
+
+
+def loadBlacklistedRequestHeaders(filename):
+ """Load and globally set a dictionary of blacklisted request headers.
+
+ :param str filename: The filename to read.
+ """
+
+ content = _loadCSV(filename)
+ blacklisted = dict()
+ # Turn dictionary values into compiled regular expressions.
+ for header, regexp in content.items():
+ try:
+ blacklisted[header] = re.compile(regexp)
+ except Exception as err:
+ logging.warning("Skipping regexp %s because we couldn't compile "
+ "it: %s" % (regexp, err))
+
+ global BLACKLISTED_REQUEST_HEADERS
+ BLACKLISTED_REQUEST_HEADERS = blacklisted
+
+
+def loadDecoyBridges(filename):
+ """Load and globally set a dictionary of decoy bridges.
+
+ :param str filename: The filename to read.
+ """
+
+ d = _loadCSV(filename)
+ # Turn our bridge lines (which are strings) into lists.
+ decoyBridges = {ttype: [line] for ttype, line in d.items()}
+
+ global DECOY_BRIDGES
+ DECOY_BRIDGES = decoyBridges
+
+
+def getDecoyBridge(transport, ipVersion):
+ """Return a decoy bridge or, if none is available, None.
+
+ :param str transport: The desired transport, e.g., "vanilla" or "obfs4".
+ :param int ipVersion: The IP version, which must be either 4 or 6.
+ :rtype: list
+ :returns: Return a list of bridge lines or, if we don't have any, None.
+ """
+
+ if ipVersion not in [4, 6]:
+ return None
+
+ logging.info("Returning IPv%d decoy bridge for transport %s." %
+ (ipVersion, transport))
+ return DECOY_BRIDGES.get("%sv%d" % (transport, ipVersion), None)
+
+
+def isRequestFromBot(request):
+ """Determine if the given request is coming from a bot.
+
+ :type request: :api:`twisted.web.http.Request`
+ :param request: A ``Request`` object, including POST arguments which
+ should include two key/value pairs.
+ :rtype: bool
+ :returns: True if the request is coming from a bot and False otherwise.
+ """
+
+ for header, badRegexp in BLACKLISTED_REQUEST_HEADERS.items():
+ value = request.getHeader(header)
+ if value is None:
+ continue
+
+ if badRegexp.search(value) is not None:
+ logging.info("Found bot request. Headers: %s" %
+ request.requestHeaders)
+ return True
+
+ return False
diff --git a/bridgedb/distributors/https/server.py b/bridgedb/distributors/https/server.py
index 732f8bf..e5df7da 100644
--- a/bridgedb/distributors/https/server.py
+++ b/bridgedb/distributors/https/server.py
@@ -53,6 +53,7 @@ from bridgedb import strings
from bridgedb import translations
from bridgedb import txrecaptcha
from bridgedb import metrics
+from bridgedb import antibot
from bridgedb.distributors.common.http import setFQDN
from bridgedb.distributors.common.http import getFQDN
from bridgedb.distributors.common.http import getClientIP
@@ -916,6 +917,15 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource):
bridgeLines = [replaceControlChars(bridge.getBridgeLine(
bridgeRequest, self.includeFingerprints)) for bridge in bridges]
+ if antibot.isRequestFromBot(request):
+ transports = bridgeRequest.transports
+ # Return either a decoy bridge or no bridge.
+ if len(transports) > 2:
+ logging.warning("More than one transport requested")
+ return self.renderAnswer(request)
+ ttype = "vanilla" if len(transports) == 0 else transports[0]
+ return self.renderAnswer(request, antibot.getDecoyBridge(ttype, bridgeRequest.ipVersion))
+
return self.renderAnswer(request, bridgeLines)
def getResponseFormat(self, request):
diff --git a/bridgedb/distributors/moat/server.py b/bridgedb/distributors/moat/server.py
index 73d2423..10096e7 100644
--- a/bridgedb/distributors/moat/server.py
+++ b/bridgedb/distributors/moat/server.py
@@ -41,6 +41,7 @@ from twisted.web.server import Site
from bridgedb import metrics
from bridgedb import captcha
from bridgedb import crypto
+from bridgedb import antibot
from bridgedb.distributors.common.http import setFQDN
from bridgedb.distributors.common.http import getFQDN
from bridgedb.distributors.common.http import getClientIP
@@ -735,6 +736,11 @@ class CaptchaCheckResource(CaptchaResource):
logging.warn(("Not enough bridges of the type specified to "
"fulfill the following request: %s") % bridgeRequest)
+ if antibot.isRequestFromBot(request):
+ ttype = transport or "vanilla"
+ bridgeLines = antibot.getDecoyBridge(ttype,
+ bridgeRequest.ipVersion)
+
# If we have no bridges at all to give to the client, then
# return a JSON API 404 error.
if not bridgeLines:
diff --git a/bridgedb/main.py b/bridgedb/main.py
index 5d9b0c6..94f4921 100644
--- a/bridgedb/main.py
+++ b/bridgedb/main.py
@@ -26,6 +26,7 @@ from bridgedb import proxy
from bridgedb import runner
from bridgedb import util
from bridgedb import metrics
+from bridgedb import antibot
from bridgedb.bridges import MalformedBridgeInfo
from bridgedb.bridges import MissingServerDescriptorDigest
from bridgedb.bridges import ServerDescriptorDigestMismatch
@@ -417,6 +418,11 @@ def run(options, reactor=reactor):
proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
metrics.setProxies(proxies)
+ logging.info("Reloading blacklisted request headers...")
+ antibot.loadBlacklistedRequestHeaders(config.BLACKLISTED_REQUEST_HEADERS_FILE)
+ logging.info("Reloading decoy bridges...")
+ antibot.loadDecoyBridges(config.DECOY_BRIDGES_FILE)
+
logging.info("Reparsing bridge descriptors...")
(hashring,
emailDistributorTmp,
diff --git a/bridgedb/test/test_antibot.py b/bridgedb/test/test_antibot.py
new file mode 100644
index 0000000..1cda86a
--- /dev/null
+++ b/bridgedb/test/test_antibot.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
+# _____________________________________________________________________________
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: please see included AUTHORS file
+# :copyright: (c) 2019, The Tor Project, Inc.
+# (c) 2019, Philipp Winter
+# :license: see LICENSE for licensing information
+# _____________________________________________________________________________
+
+"""Tests for :mod:`bridgedb.antibot`."""
+
+import os
+import tempfile
+
+from twisted.trial import unittest
+from twisted.web.test.requesthelper import DummyRequest
+
+from bridgedb import antibot
+
+
+class AntiBot(unittest.TestCase):
+ """Unittests for :mod:`bridgedb.antibot`."""
+
+ def write_file(self, content):
+ """
+ Write the given content to a temporary file.
+
+ We're responsible for deleting the file once we're done.
+ """
+ fd, filename = tempfile.mkstemp(prefix="bridgedb")
+ fh = os.fdopen(fd, "w")
+ fh.write(content)
+ fh.close()
+ return filename
+
+ def test_load_csv(self):
+ """Load a valid CSV file."""
+ content = "foo,bar\nbar,foo\n"
+ filename = self.write_file(content)
+
+ csv = antibot._loadCSV(filename)
+ self.assertEqual(csv["foo"], "bar")
+ self.assertEqual(csv["bar"], "foo")
+
+ os.unlink(filename)
+
+ def test_load_invalid_csv(self):
+ """Load an invalid CSV file that has two commas in one line."""
+ content = "foo,bar,bad\nbar,foo\n"
+ filename = self.write_file(content)
+
+ csv = antibot._loadCSV(filename)
+ self.assertEqual(len(csv), 1)
+
+ os.unlink(filename)
+
+ def test_load_blacklisted_headers(self):
+ """Load valid blacklisted request headers."""
+ content = "accept-language,[Kk]lingon"
+ filename = self.write_file(content)
+
+ antibot.loadBlacklistedRequestHeaders(filename)
+
+ request = DummyRequest([''])
+ verdict = antibot.isRequestFromBot(request)
+ self.assertFalse(verdict)
+
+ request.requestHeaders.setRawHeaders("accept-language",
+ ["i speak kllingon"])
+ antibot.loadBlacklistedRequestHeaders(filename)
+ verdict = antibot.isRequestFromBot(request)
+ self.assertFalse(verdict)
+
+ request.requestHeaders.setRawHeaders("accept-language",
+ ["i speak klingon"])
+ antibot.loadBlacklistedRequestHeaders(filename)
+ verdict = antibot.isRequestFromBot(request)
+ self.assertTrue(verdict)
+
+ os.unlink(filename)
+
+ def test_load_invalid_blacklisted_headers(self):
+ """Load invalid blacklisted request headers with a broken regexp."""
+ content = "accept-language,[Klingon\nuser-agent,foo*"
+ filename = self.write_file(content)
+
+ antibot.loadBlacklistedRequestHeaders(filename)
+ self.assertEqual(len(antibot.BLACKLISTED_REQUEST_HEADERS), 1)
+
+ os.unlink(filename)
+
+ def test_load_decoy_bridges(self):
+ """Load decoy bridges."""
+ obfs4_line = "obfs4 1.2.3.4:1234 FINGERPRINT FOO BAR"
+ vanilla_line = "1.2.3.4:1234 FINGERPRINT"
+
+ content = "vanillav4,%s\nobfs4v4,%s" % (vanilla_line, obfs4_line)
+ filename = self.write_file(content)
+
+ antibot.loadDecoyBridges(filename)
+ self.assertEqual(antibot.getDecoyBridge("obfs4", 4), [obfs4_line])
+ self.assertEqual(antibot.getDecoyBridge("vanilla", 4), [vanilla_line])
+ self.assertEqual(antibot.getDecoyBridge("vanilla", 6), None)
+ self.assertEqual(antibot.getDecoyBridge("vanilla", 7), None)
+
+ os.unlink(filename)
More information about the tor-commits
mailing list