[tor-commits] [bridgedb/develop] Fuzzy match incoming email addresses.
isis at torproject.org
isis at torproject.org
Thu Jul 24 04:37:11 UTC 2014
commit feea15673d1690ceb1050c9068eaa85de855b637
Author: Isis Lovecruft <isis at torproject.org>
Date: Sun Jul 6 20:37:05 2014 +0000
Fuzzy match incoming email addresses.
* ADD a branch to `bridgedb.email.autoresponder.SMTPAutoresponder.runChecks()`
which calculates the Levenshtein Distance of an incoming email address in
order to fuzzy match it against those in the EMAIL_BLACKLIST setting.
* FIXES #9385:
https://trac.torproject.org/projects/tor/ticket/9385
---
lib/bridgedb/email/autoresponder.py | 14 ++++++++++++++
lib/bridgedb/email/server.py | 7 +++++++
2 files changed, 21 insertions(+)
diff --git a/lib/bridgedb/email/autoresponder.py b/lib/bridgedb/email/autoresponder.py
index 3674702..8aa004e 100644
--- a/lib/bridgedb/email/autoresponder.py
+++ b/lib/bridgedb/email/autoresponder.py
@@ -38,6 +38,7 @@ from bridgedb.email import request
from bridgedb.email import templates
from bridgedb.parse import addr
from bridgedb.parse.addr import canonicalizeEmailDomain
+from bridgedb.util import levenshteinDistance
from bridgedb import translations
@@ -646,6 +647,19 @@ class SMTPAutoresponder(smtp.SMTPClient):
if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules):
return False
+ # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then
+ # calculate the Levenshtein String Distance (see
+ # :func:`~bridgedb.util.levenshteinDistance`):
+ if self.incoming.context.fuzzyMatch != 0:
+ for blacklistedAddress in self.incoming.context.blacklist:
+ distance = levenshteinDistance(self.incoming.canonicalFromEmail,
+ blacklistedAddress)
+ if distance <= self.incoming.context.fuzzyMatch:
+ logging.info("Fuzzy-matched %s to blacklisted address %s!"
+ % (self.incoming.canonicalFromEmail,
+ blacklistedAddress))
+ return False
+
return True
def send(self, response, retries=0, timeout=30, reaktor=reactor):
diff --git a/lib/bridgedb/email/server.py b/lib/bridgedb/email/server.py
index 4bcfed9..194e74a 100644
--- a/lib/bridgedb/email/server.py
+++ b/lib/bridgedb/email/server.py
@@ -57,6 +57,11 @@ class MailServerContext(object):
:ivar str fromAddr: Use this address in the email :header:`From:`
line for outgoing mail. (default: ``bridges at torproject.org``)
:ivar int nBridges: The number of bridges to send for each email.
+ :ivar list blacklist: A list of blacklisted email addresses, taken from
+ the ``EMAIL_BLACKLIST`` config setting.
+ :ivar int fuzzyMatch: An integer specifying the maximum Levenshtein
+ Distance from an incoming email address to a blacklisted email address
+ for the incoming email to be dropped.
:ivar gpgContext: A ``gpgme.GpgmeContext`` (as created by
:func:`bridgedb.crypto.getGPGContext`), or None if we couldn't create
a proper GPGME context for some reason.
@@ -92,6 +97,8 @@ class MailServerContext(object):
self.domainMap = config.EMAIL_DOMAIN_MAP or {}
self.canon = self.buildCanonicalDomainMap()
self.whitelist = config.EMAIL_WHITELIST or {}
+ self.blacklist = config.EMAIL_BLACKLIST or []
+ self.fuzzyMatch = config.EMAIL_FUZZY_MATCH or 0
self.gpgContext = getGPGContext(config)
More information about the tor-commits
mailing list