[tor-commits] [sbws/master] Refactor to clean v3bw files too
pastly at torproject.org
pastly at torproject.org
Thu Aug 9 14:21:19 UTC 2018
commit 9d6019f0921dcb4090e06d3489d0860d588c9f57
Author: juga0 <juga at riseup.net>
Date: Thu Jul 12 15:46:14 2018 +0000
Refactor to clean v3bw files too
---
sbws/core/cleanup.py | 87 +++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 69 insertions(+), 18 deletions(-)
diff --git a/sbws/core/cleanup.py b/sbws/core/cleanup.py
index 149cdd2..014b85d 100644
--- a/sbws/core/cleanup.py
+++ b/sbws/core/cleanup.py
@@ -10,6 +10,8 @@ import shutil
import logging
import time
+from sbws.util.timestamp import unixts_to_dt_obj
+
log = logging.getLogger(__name__)
@@ -27,17 +29,21 @@ def gen_parser(sub):
p.add_argument('--dry-run', action='store_true',
help='Don\'t actually compress or delete anything')
+ p.add_argument('--v3bw', action='store_true', help='Clean also v3bw files')
+
-def _get_older_files_than(dname, num_days_ago, extensions):
+def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False):
assert os.path.isdir(dname)
- assert isinstance(num_days_ago, int)
+ assert isinstance(time_delta, int)
assert isinstance(extensions, list)
for ext in extensions:
assert isinstance(ext, str)
assert ext[0] == '.'
# Determine oldest allowed date
today = datetime.utcfromtimestamp(time.time())
- oldest_day = today - timedelta(days=num_days_ago)
+ oldest_day = today - timedelta(days=time_delta)
+ if is_v3bw:
+ oldest = today - timedelta(minutes=time_delta)
# Compile a regex that can extract a date from a file name that looks like
# /path/to/foo/YYYY-MM-DD*.extension
extensions = [re.escape(e) for e in extensions]
@@ -50,38 +56,52 @@ def _get_older_files_than(dname, num_days_ago, extensions):
for root, dirs, files in os.walk(dname):
for f in files:
fname = os.path.join(root, f)
- match = regex.match(fname)
- if not match:
- log.debug('Ignoring %s because it doesn\'t look like '
- 'YYYY-MM-DD', fname)
- continue
- d = datetime(*[int(n) for n in match.group(1).split('-')])
- if d < oldest_day:
- yield fname
-
-
-def _remove_rotten_files(datadir, rotten_days, dry_run=True):
+ if is_v3bw: # or (v3bw_ext not in fname)
+ # not forcing files to have correct names just the extension
+ _, ext = os.path.splitext(fname)
+ if ext not in ['.v3bw']:
+ log.debug('Ignoring %s because it doesn\'t have extension '
+ '%s', fname, ext)
+ continue
+ dt = unixts_to_dt_obj(os.path.getmtime(fname))
+ if dt < oldest and os.path.splitext:
+ yield fname
+ else:
+ match = regex.match(fname)
+ if not match:
+ log.debug('Ignoring %s because it doesn\'t look like '
+ 'YYYY-MM-DD', fname)
+ continue
+ d = datetime(*[int(n) for n in match.group(1).split('-')])
+ if d < oldest_day:
+ yield fname
+
+
+def _remove_rotten_files(datadir, rotten_days, dry_run=True, is_v3bw=False):
assert os.path.isdir(datadir)
assert isinstance(rotten_days, int)
# Hold the lock for basically the entire time just in case someone else
# moves files between when we get the list of files and when we try to
# delete them.
+ exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw']
with DirectoryLock(datadir):
- for fname in _get_older_files_than(datadir, rotten_days,
- ['.txt', '.txt.gz']):
+ for fname in _get_older_files_than(datadir, rotten_days, exts,
+ is_v3bw):
log.info('Deleting %s', fname)
if not dry_run:
os.remove(fname)
-def _compress_stale_files(datadir, stale_days, dry_run=True):
+def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False):
assert os.path.isdir(datadir)
assert isinstance(stale_days, int)
# Hold the lock for basically the entire time just in case someone else
# moves files between when we get the list of files and when we try to
# compress them.
+ exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw']
with DirectoryLock(datadir):
- for fname in _get_older_files_than(datadir, stale_days, ['.txt']):
+ for fname in _get_older_files_than(datadir, stale_days, exts,
+ is_v3bw):
log.info('Compressing %s', fname)
if dry_run:
continue
@@ -92,6 +112,24 @@ def _compress_stale_files(datadir, stale_days, dry_run=True):
os.remove(fname)
+def _check_validity_periods(valid, stale, rotten):
+ if stale - 2 < valid:
+ fail_hard('For safetly, cleanup/stale_* (%d) must be at least 2 '
+ 'days larger than general/data_period or general/valid_ * '
+ '(%d)', stale, valid)
+ if rotten < stale:
+ fail_hard('cleanup/rotten_* (%d) must be the same or larger than '
+ 'cleanup/stale_* (%d)', rotten, stale)
+
+ if stale / 2 < valid:
+ log.warning(
+ 'cleanup/stale_ (%d) is less than twice '
+ 'general/data_period or general/valid_*(%d). '
+ 'For ease of parsing older results '
+ 'if necessary, it is recommended to make stale at least '
+ 'twice the data_period.', stale, valid)
+
+
def main(args, conf):
'''
Main entry point in to the cleanup command.
@@ -126,3 +164,16 @@ def main(args, conf):
_remove_rotten_files(datadir, rotten_days, dry_run=args.dry_run)
_compress_stale_files(datadir, stale_days, dry_run=args.dry_run)
+
+ if args.v3bw:
+ v3bw_dir = conf['paths']['v3bw_dname']
+ if not os.path.isdir(datadir):
+ fail_hard('%s does not exist', v3bw_dir)
+ valid = conf.getint('general', 'valid_mins_v3bw_files')
+ stale = conf.getint('cleanup', 'stale_mins_v3bw_files')
+ rotten = conf.getint('cleanup', 'rotten_mins_v3bw_files')
+ _check_validity_periods(valid, stale, rotten)
+ _remove_rotten_files(v3bw_dir, rotten, dry_run=args.dry_run,
+ is_v3bw=True)
+ _compress_stale_files(v3bw_dir, stale, dry_run=args.dry_run,
+ is_v3bw=True)
More information about the tor-commits
mailing list