[tor-commits] [bridgedb/develop] Clean up *.unparseable descriptor files more than 24 hours old.
isis at torproject.org
isis at torproject.org
Thu Jul 28 16:41:11 UTC 2016
commit b6c740f79a4b9263e49dfe9f14314425b496aa40
Author: Isis Lovecruft <isis at torproject.org>
Date: Mon May 2 14:03:21 2016 +0000
Clean up *.unparseable descriptor files more than 24 hours old.
* ADD a config options for a new DELETE_UNPARSEABLE_DESCRIPTORS task, which,
by default, runs once every 24 hours and removes *.unparseable descriptor
files more than 24 hours old.
* ADD file deletion utility `bridgedb.util.deleteFilesOlderThan` and a
scheduled function, `bridgedb.runner.cleanupUnparseableDescriptors`, which
calls the former.
* ADD unittests for `bridgedb.util.deleteFilesOlderThan`.
* FIXES #18237: https://bugs.torproject.org/18237
---
bridgedb.conf | 2 ++
bridgedb/Main.py | 12 ++++++++++++
bridgedb/runner.py | 29 +++++++++++++++++++++++++++++
bridgedb/util.py | 21 +++++++++++++++++++++
test/test_util.py | 31 +++++++++++++++++++++++++++++++
5 files changed, 95 insertions(+)
diff --git a/bridgedb.conf b/bridgedb.conf
index 7805e15..52a8ca7 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -260,6 +260,8 @@ TASKS = {
# scripts/get-exit-list) and add those exit relays to the list of proxies
# loaded from the PROXY_LIST_FILES:
'GET_TOR_EXIT_LIST': 3 * 60 * 60,
+ # Delete *.unparseable descriptor files which are more than 24 hours old:
+ 'DELETE_UNPARSEABLE_DESCRIPTORS': 24 * 60 * 60,
}
# SUPPORTED_TRANSPORTS is a dictionary mapping Pluggable Transport methodnames
diff --git a/bridgedb/Main.py b/bridgedb/Main.py
index b281d21..bf4c213 100644
--- a/bridgedb/Main.py
+++ b/bridgedb/Main.py
@@ -23,6 +23,7 @@ from twisted.internet import task
from bridgedb import crypto
from bridgedb import persistent
from bridgedb import proxy
+from bridgedb import runner
from bridgedb import util
from bridgedb.bridges import MalformedBridgeInfo
from bridgedb.bridges import MissingServerDescriptorDigest
@@ -453,6 +454,17 @@ def run(options, reactor=reactor):
state.proxies,
config.SERVER_PUBLIC_EXTERNAL_IP)
+ if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'):
+ delUnparseableSecs = config.TASKS['DELETE_UNPARSEABLE_DESCRIPTORS']
+ else:
+ delUnparseableSecs = 24 * 60 * 60 # Default to 24 hours
+
+ # We use the directory name of STATUS_FILE, since that directory
+ # is where the *.unparseable descriptor files will be written to.
+ tasks['DELETE_UNPARSEABLE_DESCRIPTORS'] = task.LoopingCall(
+ runner.cleanupUnparseableDescriptors,
+ os.path.dirname(config.STATUS_FILE), delUnparseableSecs)
+
# Schedule all configured repeating tasks:
for name, seconds in config.TASKS.items():
if seconds:
diff --git a/bridgedb/runner.py b/bridgedb/runner.py
index 6ac069f..597b1b2 100644
--- a/bridgedb/runner.py
+++ b/bridgedb/runner.py
@@ -17,12 +17,41 @@
from __future__ import print_function
+import glob
import logging
import sys
import os
from twisted.python import procutils
+from bridgedb import util
+
+
+def cleanupUnparseableDescriptors(directory, seconds):
+ """Delete any ``*.unparseable`` descriptor files in ``directory`` with
+ mtimes more than ``seconds`` ago.
+
+ The :func:`bridgedb.parsers._copyUnparseableDescriptors` function
+ will make copies of any files we attempt to parse which contain
+ unparseable descriptors. This function should run on a timer to
+ clean them up.
+
+ :param str directory: The directory in which to search for unparseable
+ descriptors.
+ :param int olderThan: If a file's mtime is more than this number
+ (in seconds), it will be deleted.
+ """
+ files = []
+
+ for pattern in ["*.unparseable", "*.unparseable.xz"]:
+ files.extend(glob.glob(os.sep.join([directory, pattern])))
+
+ if files:
+ logging.info("Deleting old unparseable descriptor files...")
+ logging.debug("Considered for deletion: %s" % "\n".join(files))
+
+ deleted = util.deleteFilesOlderThan(files, seconds)
+ logging.info("Deleted %d unparseable descriptor files." % len(deleted))
def find(filename):
"""Find the executable ``filename``.
diff --git a/bridgedb/util.py b/bridgedb/util.py
index 4c558c4..42e4664 100644
--- a/bridgedb/util.py
+++ b/bridgedb/util.py
@@ -18,6 +18,7 @@ import logging
import logging.config
import logging.handlers
import os
+import time
from twisted.python import components
@@ -144,6 +145,26 @@ def configureLogging(cfg):
logging.info("Level: %s", logLevel)
logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
+def deleteFilesOlderThan(files, seconds):
+ """Delete any file in ``files`` with an mtime more than ``seconds`` ago.
+
+ :param list files: A list of paths to files which should be
+ considered for deletion.
+ :param int seconds: If a file's mtime is more than this number (in
+ seconds), it will be deleted.
+ :rtype: list
+ :returns: A list of the deleted files.
+ """
+ deleted = []
+ now = int(time.time())
+
+ for fn in files:
+ if (now - os.stat(fn).st_mtime) > seconds:
+ os.unlink(fn)
+ deleted.append(fn)
+
+ return deleted
+
def levenshteinDistance(s1, s2, len1=None, len2=None,
offset1=0, offset2=0, memo=None):
"""Compute the Levenstein Distance between two strings.
diff --git a/test/test_util.py b/test/test_util.py
index da4ddf4..848ce12 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -17,6 +17,7 @@ from __future__ import unicode_literals
import logging
import os
+import time
from twisted.mail.smtp import Address
from twisted.trial import unittest
@@ -71,6 +72,36 @@ class MiscLoggingUtilTests(unittest.TestCase):
util.logging.info("BridgeDB's email address: bridges at torproject.org")
+class FileUtilityTests(unittest.TestCase):
+ """Unittests for `bridgedb.util.deleteFilesOlderThan`."""
+
+ def setUp(self):
+ self._directory = self.id()
+ self.newfile = os.sep.join([self._directory, "newfile"])
+ self.oldfile = os.sep.join([self._directory, "oldfile"])
+ self.testfiles = [self.newfile, self.oldfile]
+ os.mkdir(self._directory)
+
+ now = time.time()
+
+ for fn in self.testfiles:
+ with open(fn, "w") as fd:
+ fd.flush()
+
+ # Change the mtime of the "oldfile" to be two days old:
+ os.utime(self.oldfile, (now, now - (48 * 60 * 60)))
+
+ def test_deleteFilesOlderThan_deletes_old_files(self):
+ """The function should delete appropriate files."""
+ deleted = util.deleteFilesOlderThan(self.testfiles ,24 * 60 * 60)
+ self.assertIn(self.oldfile, deleted)
+
+ def test_deleteFilesOlderThan_keeps_new_files(self):
+ """The function should delete appropriate files."""
+ deleted = util.deleteFilesOlderThan(self.testfiles ,24 * 60 * 60)
+ self.assertNotIn(self.newfile, deleted)
+
+
class LevenshteinDistanceTests(unittest.TestCase):
"""Unittests for `bridgedb.util.levenshteinDistance."""
More information about the tor-commits
mailing list