[tor-commits] [bridgedb/master] Add module bridgedb.safelog with automatic log filters.
isis at torproject.org
isis at torproject.org
Sat Apr 19 17:02:43 UTC 2014
commit 1206c811f73451d9e2a0e4c186ebcfe6a62ecaa3
Author: Isis Lovecruft <isis at torproject.org>
Date: Wed Feb 12 15:24:53 2014 +0000
Add module bridgedb.safelog with automatic log filters.
Include automatic filters for email addresses and IPv4/6
addresses. Things such as bridge fingerprints will still need to be
manually scrubbed by calling `bridgedb.safelog.logSafely()`.
---
lib/bridgedb/safelog.py | 196 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 196 insertions(+)
diff --git a/lib/bridgedb/safelog.py b/lib/bridgedb/safelog.py
new file mode 100644
index 0000000..dac65f8
--- /dev/null
+++ b/lib/bridgedb/safelog.py
@@ -0,0 +1,196 @@
+# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_safelog -*-
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis at torproject.org>
+# :copyright: (c) 2013-2014, Isis Lovecruft
+# (c) 2007-2014, The Tor Project, Inc.
+# :license: 3-Clause BSD, see LICENSE for licensing information
+
+"""Filters for log sanitisation.
+
+The ``Safelog*Filter`` classes within this module can be instantiated and
+adding to any :class:`logging.Handler`, in order to transparently filter
+substrings within log messages which match the given ``pattern``. Matching
+substrings may be optionally additionally validated by implementing the
+:meth:`~BaseSafelogFilter.doubleCheck` method before they are finally replaced
+with the ``replacement`` string. For example::
+
+ >>> import io
+ >>> import logging
+ >>> from bridgedb import safelog
+ >>> handler = logging.StreamHandler(io.BytesIO())
+ >>> logger = logging.getLogger()
+ >>> logger.addHandler(handler)
+ >>> logger.addFilter(safelog.SafelogEmailFilter())
+ >>> logger.info("Sent response email to: blackhole at torproject.org")
+
+..
+
+Module Overview:
+~~~~~~~~~~~~~~~~
+::
+ safelog
+ |
+ |_logSafely - Utility for manually sanitising a portion of a log message
+ |
+ |_BaseSafelogFilter - Base class for log message sanitisation filters
+ | |_doubleCheck - Optional stricter validation on matching substrings
+ | |_filter - Determine if some part of a log message should be filtered
+ |
+ |_SafelogEmailFilter - Filter for removing email addresses from logs
+ |_SafelogIPv6Filter - Filter for removing IPv4 addresses from logs
+ |_SafelogIPv6Filter - Filter for removing IPv6 addresses from logs
+::
+"""
+
+import functools
+import logging
+import re
+
+from bridgedb.parse import addr
+
+
+safe_logging = True
+
+
+def setSafeLogging(safe):
+ """Enable or disable automatic filtering of log messages.
+
+ :param bool safe: If ``True``, filter email and IP addresses from log
+ messages automagically.
+ """
+ global safe_logging
+ safe_logging = safe
+
+def logSafely(string):
+ """Utility for manually sanitising a portion of a log message.
+
+ :param str string: If ``SAFELOGGING`` is enabled, sanitise this **string**
+ by replacing it with ``"[scrubbed]"``. Otherwise, return the
+ **string** unchanged.
+ :rtype: str
+ :returns: ``"[scrubbed]"`` or the original string.
+ """
+ if safe_logging:
+ return "[scrubbed]"
+ return string
+
+
+class BaseSafelogFilter(logging.Filter):
+ """Base class for creating log message sanitisation filters.
+
+ A :class:`BaseSafelogFilter` uses a compiled regex :cvar:`pattern` to
+ match particular items of data in log messages which should be sanitised
+ (if ``SAFELOGGING`` is enabled in :file:`bridgedb.conf`).
+
+ .. note:: The ``pattern`` is used only for string *matching* purposes, and
+ *not* for validation. In other words, a ``pattern`` which matches email
+ addresses should simply match something which appears to be an email
+ address, even though that matching string might not technically be a
+ valid email address vis-á-vis :rfc:`5321`.
+
+ In addition, a ``BaseSafelogFilter`` uses a :cvar:`easyFind`, which is
+ simply a string or character to search for before running checking against
+ the regular expression, to attempt to avoid regexing *everything* which
+ passes through the logger.
+
+ :cvar pattern: A compiled regular expression, whose matches will be
+ scrubbed from log messages and replaced with :cvar:`replacement`.
+ :cvar easyFind: A simpler string to search for before regex matching.
+ :cvar replacement: The string to replace ``pattern`` matches
+ with. (default: ``"[scrubbed]"``)
+ """
+ pattern = re.compile("FILTERME")
+ easyFind = "FILTERME"
+ replacement = "[scrubbed]"
+
+ def doubleCheck(self, match):
+ """Subclasses should override this function to implement any additional
+ substring filtering to decrease the false positive rate, i.e. any
+ additional filtering or validation which is *more* costly than
+ checking against the regular expression, :cvar:`pattern`.
+
+ To use only the :cvar:`pattern` matching in :meth:`filter`, and not
+ use this method, simply do::
+
+ return True
+
+ :param str match: Some portion of the :ivar:`logging.LogRecord.msg`
+ string which has already passed the checks in :meth:`filter`, for
+ which additional validation/checking is required.
+ :rtype: bool
+ :returns: ``True`` if the additional validation passes (in other
+ words, the **match** *should* be filtered), and ``None`` or
+ ``False`` otherwise.
+ """
+ return True
+
+ def filter(self, record):
+ """Filter a log record.
+
+ The log **record** is filtered, and thus sanitised by replacing
+ matching substrings with the :cvar:`replacement` string, if the
+ following checks pass:
+
+ 0. ``SAFELOGGING`` is currently enabled.
+ 1. The ``record.msg`` string contains :cvar:`easyFind`.
+ 2. The ``record.msg`` matches the regular expression,
+ :cvar:`pattern`.
+
+ :type record: :class:`logging.LogRecord`
+ :param record: Basically, anything passed to :func:`logging.log`.
+ """
+ if safe_logging:
+ msg = str(record.msg)
+ if msg.find(self.easyFind) > 0:
+ matches = self.pattern.findall(msg)
+ for match in matches:
+ if self.doubleCheck(match):
+ msg = msg.replace(match, self.replacement)
+ record.msg = msg
+ return record
+
+
+class SafelogEmailFilter(BaseSafelogFilter):
+ """A log filter which removes email addresses from log messages."""
+
+ pattern = re.compile(
+ "([a-zA-Z0-9]+[.+a-zA-Z0-9]*[@]{1}[a-zA-Z0-9]+[.-a-zA-Z0-9]*[.]{1}[a-zA-Z]+)")
+ easyFind = "@"
+
+ @functools.wraps(BaseSafelogFilter.filter)
+ def filter(self, record):
+ return BaseSafelogFilter.filter(self, record)
+
+
+class SafelogIPv4Filter(BaseSafelogFilter):
+ """A log filter which removes IPv4 addresses from log messages."""
+
+ pattern = re.compile("(?:\d{1,3}\.?){4}")
+ easyFind = "."
+
+ def doubleCheck(self, match):
+ """Additional check to ensure that **match** is an IPv4 address."""
+ if addr.isIPv4(match):
+ return True
+
+ @functools.wraps(BaseSafelogFilter.filter)
+ def filter(self, record):
+ return BaseSafelogFilter.filter(self, record)
+
+
+class SafelogIPv6Filter(BaseSafelogFilter):
+ """A log filter which removes IPv6 addresses from log messages."""
+
+ pattern = re.compile("([:]?[a-fA-F0-9:]+[:]+[a-fA-F0-9:]+){1,8}")
+ easyFind = ":"
+
+ def doubleCheck(self, match):
+ """Additional check to ensure that **match** is an IPv6 address."""
+ if addr.isIPv6(match):
+ return True
+
+ @functools.wraps(BaseSafelogFilter.filter)
+ def filter(self, record):
+ return BaseSafelogFilter.filter(self, record)
More information about the tor-commits
mailing list