[tor-commits] [collector/master] Delete files in out/ that are older than 7 weeks.

karsten at torproject.org karsten at torproject.org
Fri Nov 27 16:07:07 UTC 2020


commit 66ddc4d7d996ad2877aea44ea03982f14f069545
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Nov 25 16:09:14 2020 +0100

    Delete files in out/ that are older than 7 weeks.
    
    Fixes #21219.
---
 CHANGELOG.md                                       |  7 +++
 .../bridgedb/BridgedbMetricsProcessor.java         | 38 ++++----------
 .../bridgedescs/SanitizedBridgesWriter.java        | 37 +++++--------
 .../BridgePoolAssignmentsProcessor.java            | 33 ++++--------
 .../collector/exitlists/ExitListDownloader.java    | 27 ++++------
 .../collector/onionperf/OnionPerfDownloader.java   | 37 ++++++-------
 .../collector/persist/PersistenceUtils.java        | 50 +++++++++++++++---
 .../collector/relaydescs/ArchiveWriter.java        | 61 +++++-----------------
 .../snowflake/SnowflakeStatsDownloader.java        | 33 +++++-------
 .../metrics/collector/sync/SyncPersistence.java    |  7 +--
 .../collector/webstats/SanitizeWeblogs.java        | 12 +++--
 .../collector/persist/PersistUtilsTest.java        | 32 ++++++++++++
 12 files changed, 182 insertions(+), 192 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ff2e9e7..e292f9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# Changes in version 1.??.? - 2020-1?-??
+
+ * Medium changes
+  - Clean up descriptors written to the `out/` directory by deleting
+    files that are older than seven weeks.
+
+
 # Changes in version 1.16.1 - 2020-08-16
 
  * Medium changes
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
index 0073ee3..d05aa9c 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
@@ -12,6 +12,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -23,9 +24,7 @@ import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.time.Instant;
 import java.time.temporal.ChronoUnit;
-import java.util.Arrays;
 import java.util.SortedSet;
-import java.util.Stack;
 import java.util.TreeSet;
 
 public class BridgedbMetricsProcessor extends CollecTorMain {
@@ -127,10 +126,10 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
             descriptor.getClass(), descriptor.getDescriptorFile());
       }
     }
-    logger.info("Cleaning up directory {} containing recent files.",
-        this.recentPathName);
+    logger.info("Cleaning up directories {} and {}.",
+        this.recentPathName, this.outputPathName);
     this.writeProcessedFiles(this.parsedBridgedbMetricsFile, processedFiles);
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
     logger.info("Finished processing BridgeDB statistics file(s).");
   }
 
@@ -175,28 +174,13 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
   }
 
   /**
-   * Delete all files from the rsync directory that have not been modified in
-   * the last three days.
+   * Delete all files from the rsync (out) directory that have not been modified
+   * in the last three days (seven weeks).
    */
-  public void cleanUpRsyncDirectory() {
-    Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(this.recentPathName));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        File[] filesInDirectory = file.listFiles();
-        if (null != filesInDirectory) {
-          allFiles.addAll(Arrays.asList(filesInDirectory));
-        }
-      } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
-        try {
-          Files.deleteIfExists(file.toPath());
-        } catch (IOException e) {
-          logger.warn("Unable to delete file {} that is apparently older than "
-              + "three days.", file, e);
-        }
-      }
-    }
+  private void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index b8e7f2d..62288ad 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.Configuration;
 import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 
 import org.apache.commons.codec.DecoderException;
 import org.apache.commons.codec.binary.Base64;
@@ -34,15 +35,15 @@ import java.security.GeneralSecurityException;
 import java.security.SecureRandom;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
-import java.util.Stack;
 import java.util.TreeMap;
 
 /**
@@ -228,7 +229,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 
     this.checkStaleDescriptors();
 
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
   }
 
   private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
@@ -1388,27 +1389,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     }
   }
 
-  /** Delete all files from the rsync directory that have not been modified
-   * in the last three days, and remove the .tmp extension from newly
-   * written files. */
-  public void cleanUpRsyncDirectory() throws ConfigurationException {
-    long cutOffMillis = System.currentTimeMillis()
-        - 3L * 24L * 60L * 60L * 1000L;
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(config.getPath(Key.RecentPath).toFile(),
-        BRIDGE_DESCRIPTORS));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        allFiles.addAll(Arrays.asList(file.listFiles()));
-      } else if (file.lastModified() < cutOffMillis) {
-        file.delete();
-      } else if (file.getName().endsWith(".tmp")) {
-        file.renameTo(new File(file.getParentFile(),
-            file.getName().substring(0,
-            file.getName().lastIndexOf(".tmp"))));
-      }
-    }
+  /**
+   * Delete all files from the rsync (out) directory that have not been modified
+   * in the last three days (seven weeks), and remove the .tmp extension from
+   * newly written files. */
+  private void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }
 
diff --git a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java
index ffae262..9961d4c 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -8,6 +8,7 @@ import org.torproject.metrics.collector.conf.Configuration;
 import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 
 import org.apache.commons.codec.DecoderException;
 import org.apache.commons.codec.binary.Hex;
@@ -24,7 +25,6 @@ import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.time.DateTimeException;
@@ -178,7 +178,7 @@ public class BridgePoolAssignmentsProcessor extends CollecTorMain {
     }
     this.writeProcessedFiles(this.parsedBridgePoolAssignmentsFile,
         processedFiles);
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
     logger.info("Finished processing bridge pool assignment file(s).");
   }
 
@@ -363,29 +363,14 @@ public class BridgePoolAssignmentsProcessor extends CollecTorMain {
   }
 
   /**
-   * Delete all files from the rsync directory that have not been modified in
-   * the last three days.
+   * Delete all files from the rsync (out) directory that have not been modified
+   * in the last three days (seven weeks).
    */
-  public void cleanUpRsyncDirectory() {
-    Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(this.recentPathName));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        File[] filesInDirectory = file.listFiles();
-        if (null != filesInDirectory) {
-          allFiles.addAll(Arrays.asList(filesInDirectory));
-        }
-      } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
-        try {
-          Files.deleteIfExists(file.toPath());
-        } catch (IOException e) {
-          logger.warn("Unable to delete file {} that is apparently older than "
-              + "three days.", file, e);
-        }
-      }
-    }
+  public void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }
 
diff --git a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
index c6b45da..6b9b791 100644
--- a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 import org.torproject.metrics.collector.downloader.Downloader;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -24,6 +25,8 @@ import java.io.IOException;
 import java.net.URL;
 import java.nio.file.Paths;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.SortedSet;
@@ -168,24 +171,16 @@ public class ExitListDownloader extends CollecTorMain {
     }
     logger.info(dumpStats.toString());
 
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
   }
 
-  /** Delete all files from the rsync directory that have not been modified
-   * in the last three days. */
-  public void cleanUpRsyncDirectory() {
-    long cutOffMillis = System.currentTimeMillis()
-        - 3L * 24L * 60L * 60L * 1000L;
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(recentPathName));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        allFiles.addAll(Arrays.asList(file.listFiles()));
-      } else if (file.lastModified() < cutOffMillis) {
-        file.delete();
-      }
-    }
+  /** Delete all files from the rsync (out) directory that have not been
+   * modified in the last three days (seven weeks). */
+  private void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }
 
diff --git a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
index d22ac0b..f90bdfe 100644
--- a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
@@ -12,6 +12,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 import org.torproject.metrics.collector.downloader.Downloader;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 
 import org.apache.commons.compress.utils.IOUtils;
 import org.slf4j.Logger;
@@ -31,13 +32,13 @@ import java.nio.file.StandardCopyOption;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedSet;
-import java.util.Stack;
 import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -103,7 +104,7 @@ public class OnionPerfDownloader extends CollecTorMain {
       this.downloadFromOnionPerfHost(baseUrl);
     }
     this.writeDownloadedOnionPerfFiles();
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
   }
 
   private void readDownloadedOnionPerfFiles() {
@@ -441,21 +442,21 @@ public class OnionPerfDownloader extends CollecTorMain {
     }
   }
 
-  /** Delete all files from the rsync directory that have not been modified
-   * in the last three days. */
-  public void cleanUpRsyncDirectory() throws ConfigurationException {
-    long cutOffMillis = System.currentTimeMillis()
-        - 3L * 24L * 60L * 60L * 1000L;
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(config.getPath(Key.RecentPath).toFile(), TORPERF));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        allFiles.addAll(Arrays.asList(file.listFiles()));
-      } else if (file.lastModified() < cutOffMillis) {
-        file.delete();
-      }
-    }
+  /** Delete all files from the rsync (out) directories that have not been
+   * modified in the last three days (seven weeks). */
+  private void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(
+        new File(this.recentDirectory, TORPERF).toPath(),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(
+        new File(this.recentDirectory, ONIONPERF).toPath(),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(
+        new File(this.archiveDirectory, TORPERF).toPath(),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(
+        new File(this.archiveDirectory, ONIONPERF).toPath(),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }
 
diff --git a/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java b/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java
index c958aec..e787c39 100644
--- a/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java
+++ b/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java
@@ -18,6 +18,7 @@ import java.nio.file.StandardCopyOption;
 import java.nio.file.StandardOpenOption;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
 import java.util.Date;
 
 public class PersistenceUtils {
@@ -80,33 +81,70 @@ public class PersistenceUtils {
   }
 
   /** Move temporary files to their final location. */
-  public static void cleanDirectory(Path pathToClean) throws IOException {
+  public static void cleanDirectory(Path pathToClean) {
     PersistenceUtils.cleanDirectory(pathToClean, -1L);
   }
 
   /** Clean up the given directory by deleting files that are older than the
    * given cut-off timestamp, and by moving temporary files to their final
    * location. */
-  public static void cleanDirectory(Path pathToClean, long cutOffMillis)
-      throws IOException {
+  public static void cleanDirectory(Path pathToClean, long cutOffMillis) {
+    PersistenceUtils.cleanDirectory(pathToClean, cutOffMillis, null);
+  }
+
+  /** Clean up the given directory, excluding the given subdirectory, by
+   * deleting files that are older than the given cut-off timestamp, and by
+   * moving temporary files to their final location. */
+  public static void cleanDirectory(Path pathToClean, long cutOffMillis,
+      Path pathToExclude) {
+    if (!Files.exists(pathToClean)) {
+      return;
+    }
+    logger.info("Cleaning up directory {} with cut-off time {}.",
+        pathToClean, Instant.ofEpochMilli(cutOffMillis));
     SimpleFileVisitor<Path> sfv = new SimpleFileVisitor<Path>() {
+      @Override
+      public FileVisitResult preVisitDirectory(Path dir,
+          BasicFileAttributes attrs) {
+        if (null == pathToExclude || !pathToExclude.equals(dir)) {
+          return FileVisitResult.CONTINUE;
+        } else {
+          return FileVisitResult.SKIP_SUBTREE;
+        }
+      }
+
       @Override
       public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
           throws IOException {
         String tempName = file.toString();
         if (cutOffMillis >= 0L
             && attrs.lastModifiedTime().toMillis() < cutOffMillis) {
-          file.toFile().delete();
+          Files.delete(file);
         } else if (tempName.endsWith(TEMPFIX)) {
           Path outputPath = Paths
               .get(tempName.substring(0, tempName.length() - TEMPFIX.length()));
           Files.deleteIfExists(outputPath);
-          file.toFile().renameTo(outputPath.toFile());
+          Files.move(file, outputPath);
+        }
+        return FileVisitResult.CONTINUE;
+      }
+
+      @Override
+      public FileVisitResult postVisitDirectory(Path dir, IOException exc)
+          throws IOException {
+        if (!Files.list(dir).findFirst().isPresent()) {
+          Files.delete(dir);
         }
         return FileVisitResult.CONTINUE;
       }
     };
-    Files.walkFileTree(pathToClean, sfv);
+    try {
+      Files.walkFileTree(pathToClean, sfv);
+    } catch (IOException e) {
+      logger.warn("Caught I/O exception while cleaning up directory {} with "
+          + "cut-off time {}. Continuing.",
+          pathToClean, Instant.ofEpochMilli(cutOffMillis), e);
+    }
   }
 
   /** Return all date-time parts as array. */
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
index 8addd5e..28472f8 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
@@ -18,6 +18,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.conf.SourceType;
 import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -38,7 +39,7 @@ import java.time.Instant;
 import java.time.LocalDateTime;
 import java.time.ZoneOffset;
 import java.time.format.DateTimeFormatter;
-import java.util.Arrays;
+import java.time.temporal.ChronoUnit;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -46,7 +47,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.SortedSet;
-import java.util.Stack;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
@@ -197,7 +197,7 @@ public class ArchiveWriter extends CollecTorMain {
 
     this.checkStaledescriptors();
 
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
 
     this.saveDescriptorDigests();
 
@@ -549,51 +549,16 @@ public class ArchiveWriter extends CollecTorMain {
     }
   }
 
-  /** Delete all files from the rsync directory that have not been modified
-   * in the last three days (except for microdescriptors which are kept
-   * for up to thirty days), and remove the .tmp extension from newly
-   * written files. */
-  public void cleanUpRsyncDirectory() {
-    long cutOffMillis = System.currentTimeMillis()
-        - 3L * 24L * 60L * 60L * 1000L;
-    long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L;
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(recentPathName, RELAY_DESCRIPTORS));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        File[] containedFiles = file.listFiles();
-        if (null == containedFiles) {
-          logger.warn("Unable to list files contained in directory {}.", file);
-        } else {
-          allFiles.addAll(Arrays.asList(containedFiles));
-        }
-      } else if (file.getName().endsWith("-micro")) {
-        if (file.lastModified() < cutOffMicroMillis) {
-          if (!file.delete()) {
-            logger.warn("Unable to delete outdated descriptor file {}.", file);
-          }
-        }
-      } else if (file.lastModified() < cutOffMillis) {
-        if (!file.delete()) {
-          logger.warn("Unable to delete outdated descriptor file {}.", file);
-        }
-      } else if (file.getName().endsWith(".tmp")) {
-        File destinationFile = new File(file.getParentFile(),
-            file.getName().substring(0, file.getName().lastIndexOf(".tmp")));
-        if (destinationFile.exists()) {
-          logger.warn("Attempting to rename descriptor file {} to existing "
-              + "file {}.", file, destinationFile);
-        } else {
-          logger.info("Renaming descriptor file {} to non-existing file {}.",
-              file, destinationFile);
-        }
-        if (!file.renameTo(destinationFile)) {
-          logger.warn("Unable to rename descriptor file {} to {}.", file,
-              destinationFile);
-        }
-      }
-    }
+  /** Delete all files from the rsync (out) directory that have not been
+   * modified in the last three days (seven weeks), and remove the .tmp
+   * extension from newly written files. */
+  public void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(
+        Paths.get(recentPathName, RELAY_DESCRIPTORS),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(Paths.get(outputDirectory),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli(),
+        Paths.get(this.outputDirectory, "certs"));
   }
 
   private void saveDescriptorDigests() {
diff --git a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
index cbca74a..93388d5 100644
--- a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 import org.torproject.metrics.collector.downloader.Downloader;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
 import org.torproject.metrics.collector.persist.SnowflakeStatsPersistence;
 
 import org.slf4j.Logger;
@@ -25,10 +26,10 @@ import java.io.OutputStream;
 import java.net.URL;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.time.Instant;
 import java.time.LocalDateTime;
-import java.util.Arrays;
+import java.time.temporal.ChronoUnit;
 import java.util.SortedSet;
-import java.util.Stack;
 import java.util.TreeSet;
 
 public class SnowflakeStatsDownloader extends CollecTorMain {
@@ -38,6 +39,8 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
 
   private String recentPathName;
 
+  private String outputPathName;
+
   /** Instantiate the snowflake-stats module using the given configuration. */
   public SnowflakeStatsDownloader(Configuration config) {
     super(config);
@@ -81,7 +84,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
     DescriptorParser descriptorParser =
         DescriptorSourceFactory.createDescriptorParser();
     SortedSet<LocalDateTime> snowflakeStatsEnds = new TreeSet<>();
-    String outputPathName = config.getPath(Key.OutputPath).toString();
+    this.outputPathName = config.getPath(Key.OutputPath).toString();
     for (Descriptor descriptor : descriptorParser.parseDescriptors(
         downloadedBytes, null, null)) {
       if (descriptor instanceof SnowflakeStats) {
@@ -119,7 +122,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
     }
 
     this.writeProcessedFiles(parsedSnowflakeStatsFile, processedFiles);
-    this.cleanUpRsyncDirectory();
+    this.cleanUpDirectories();
   }
 
   /**
@@ -150,21 +153,13 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
     }
   }
 
-  /** Delete all files from the rsync directory that have not been modified
-   * in the last three days. */
-  public void cleanUpRsyncDirectory() {
-    long cutOffMillis = System.currentTimeMillis()
-        - 3L * 24L * 60L * 60L * 1000L;
-    Stack<File> allFiles = new Stack<>();
-    allFiles.add(new File(recentPathName));
-    while (!allFiles.isEmpty()) {
-      File file = allFiles.pop();
-      if (file.isDirectory()) {
-        allFiles.addAll(Arrays.asList(file.listFiles()));
-      } else if (file.lastModified() < cutOffMillis) {
-        file.delete();
-      }
-    }
+  /** Delete all files from the rsync (out) directory that have not been
+   * modified in the last three days (seven weeks). */
+  private void cleanUpDirectories() {
+    PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+        Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+    PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+        Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }
 
diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
index adffb93..af48b1f 100644
--- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
@@ -42,7 +42,6 @@ import org.torproject.metrics.collector.persist.WebServerAccessLogPersistence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.nio.file.Path;
 
 /** Provides persistence for descriptors based on the descriptor type. */
@@ -69,11 +68,7 @@ public class SyncPersistence {
    * Cleans the directory in {@code RecentPath} after storing descriptors.
    */
   public void cleanDirectory() {
-    try {
-      PersistenceUtils.cleanDirectory(recentPath);
-    } catch (IOException ioe) {
-      logger.error("Cleaning of {} failed.", recentPath.toString(), ioe);
-    }
+    PersistenceUtils.cleanDirectory(recentPath);
   }
 
   /**
diff --git a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
index 670f686..e4f427e 100644
--- a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
@@ -31,8 +31,10 @@ import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.time.Instant;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -106,10 +108,12 @@ public class SanitizeWeblogs extends CollecTorMain {
             = this.findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins),
             previouslyProcessedWebstats);
         this.writeProcessedWebstats(newlyProcessedWebstats);
-        long cutOffMillis = System.currentTimeMillis()
-            - 3L * 24L * 60L * 60L * 1000L;
-        PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath),
-            cutOffMillis);
+        PersistenceUtils.cleanDirectory(
+            Paths.get(this.recentDirectory.toString(), WEBSTATS),
+            Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+        PersistenceUtils.cleanDirectory(
+            Paths.get(this.outputDirectory.toString(), WEBSTATS),
+            Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
       }
     } catch (Exception e) {
       logger.error("Cannot sanitize web-logs: {}", e.getMessage(), e);
diff --git a/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java b/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java
index a33b94f..054d0e6 100644
--- a/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java
+++ b/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java
@@ -16,6 +16,9 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
+import java.nio.file.attribute.FileTime;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
 import java.util.List;
 
 public class PersistUtilsTest {
@@ -107,4 +110,33 @@ public class PersistUtilsTest {
     assertEquals("File contained: " + text, theText2, text.get(3));
   }
 
+  @Test()
+  public void testCleanDirectory() throws Exception {
+    /*
+     * out/
+     *   a/           # empty after deleting x
+     *     x          # too old file, delete
+     *   b/           # keep together with recent file y
+     *     y.tmp      # recent enough, rename to y
+     *   c/           # exclude (empty) subdirectory
+     */
+    Instant now = Instant.now();
+    Path out = tmpf.newFolder().toPath();
+    Path dirA = Files.createDirectory(out.resolve("a"));
+    Path fileX = Files.createFile(dirA.resolve("x"));
+    Files.setLastModifiedTime(fileX,
+        FileTime.from(now.minus(9L, ChronoUnit.DAYS)));
+    Path dirB = Files.createDirectory(out.resolve("b"));
+    Path fileYTmp = Files.createFile(dirB.resolve("y.tmp"));
+    Files.setLastModifiedTime(fileYTmp, FileTime.from(now));
+    Path dirC = Files.createDirectory(out.resolve("c"));
+    PersistenceUtils.cleanDirectory(out,
+        now.minus(3L, ChronoUnit.DAYS).toEpochMilli(), dirC);
+    assertFalse(Files.exists(dirA));
+    assertFalse(Files.exists(fileX));
+    assertTrue(Files.exists(dirB));
+    assertFalse(Files.exists(fileYTmp));
+    assertTrue(Files.exists(dirB.resolve("y")));
+    assertTrue(Files.exists(dirC));
+  }
 }



More information about the tor-commits mailing list