[tor-commits] [collector/master] Delete files in out/ that are older than 7 weeks.
karsten at torproject.org
karsten at torproject.org
Fri Nov 27 16:07:07 UTC 2020
commit 66ddc4d7d996ad2877aea44ea03982f14f069545
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Nov 25 16:09:14 2020 +0100
Delete files in out/ that are older than 7 weeks.
Fixes #21219.
---
CHANGELOG.md | 7 +++
.../bridgedb/BridgedbMetricsProcessor.java | 38 ++++----------
.../bridgedescs/SanitizedBridgesWriter.java | 37 +++++--------
.../BridgePoolAssignmentsProcessor.java | 33 ++++--------
.../collector/exitlists/ExitListDownloader.java | 27 ++++------
.../collector/onionperf/OnionPerfDownloader.java | 37 ++++++-------
.../collector/persist/PersistenceUtils.java | 50 +++++++++++++++---
.../collector/relaydescs/ArchiveWriter.java | 61 +++++-----------------
.../snowflake/SnowflakeStatsDownloader.java | 33 +++++-------
.../metrics/collector/sync/SyncPersistence.java | 7 +--
.../collector/webstats/SanitizeWeblogs.java | 12 +++--
.../collector/persist/PersistUtilsTest.java | 32 ++++++++++++
12 files changed, 182 insertions(+), 192 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ff2e9e7..e292f9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# Changes in version 1.??.? - 2020-1?-??
+
+ * Medium changes
+ - Clean up descriptors written to the `out/` directory by deleting
+ files that are older than seven weeks.
+
+
# Changes in version 1.16.1 - 2020-08-16
* Medium changes
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
index 0073ee3..d05aa9c 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
@@ -12,6 +12,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -23,9 +24,7 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
-import java.util.Arrays;
import java.util.SortedSet;
-import java.util.Stack;
import java.util.TreeSet;
public class BridgedbMetricsProcessor extends CollecTorMain {
@@ -127,10 +126,10 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
descriptor.getClass(), descriptor.getDescriptorFile());
}
}
- logger.info("Cleaning up directory {} containing recent files.",
- this.recentPathName);
+ logger.info("Cleaning up directories {} and {}.",
+ this.recentPathName, this.outputPathName);
this.writeProcessedFiles(this.parsedBridgedbMetricsFile, processedFiles);
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
logger.info("Finished processing BridgeDB statistics file(s).");
}
@@ -175,28 +174,13 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
}
/**
- * Delete all files from the rsync directory that have not been modified in
- * the last three days.
+ * Delete all files from the rsync (out) directory that have not been modified
+ * in the last three days (seven weeks).
*/
- public void cleanUpRsyncDirectory() {
- Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(this.recentPathName));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- File[] filesInDirectory = file.listFiles();
- if (null != filesInDirectory) {
- allFiles.addAll(Arrays.asList(filesInDirectory));
- }
- } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
- try {
- Files.deleteIfExists(file.toPath());
- } catch (IOException e) {
- logger.warn("Unable to delete file {} that is apparently older than "
- + "three days.", file, e);
- }
- }
- }
+ private void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index b8e7f2d..62288ad 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.Configuration;
import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Base64;
@@ -34,15 +35,15 @@ import java.security.GeneralSecurityException;
import java.security.SecureRandom;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.Instant;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
-import java.util.Stack;
import java.util.TreeMap;
/**
@@ -228,7 +229,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.checkStaleDescriptors();
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
}
private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
@@ -1388,27 +1389,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
- /** Delete all files from the rsync directory that have not been modified
- * in the last three days, and remove the .tmp extension from newly
- * written files. */
- public void cleanUpRsyncDirectory() throws ConfigurationException {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(config.getPath(Key.RecentPath).toFile(),
- BRIDGE_DESCRIPTORS));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- } else if (file.getName().endsWith(".tmp")) {
- file.renameTo(new File(file.getParentFile(),
- file.getName().substring(0,
- file.getName().lastIndexOf(".tmp"))));
- }
- }
+ /**
+ * Delete all files from the rsync (out) directory that have not been modified
+ * in the last three days (seven weeks), and remove the .tmp extension from
+ * newly written files. */
+ private void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java
index ffae262..9961d4c 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -8,6 +8,7 @@ import org.torproject.metrics.collector.conf.Configuration;
import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
@@ -24,7 +25,6 @@ import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.DateTimeException;
@@ -178,7 +178,7 @@ public class BridgePoolAssignmentsProcessor extends CollecTorMain {
}
this.writeProcessedFiles(this.parsedBridgePoolAssignmentsFile,
processedFiles);
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
logger.info("Finished processing bridge pool assignment file(s).");
}
@@ -363,29 +363,14 @@ public class BridgePoolAssignmentsProcessor extends CollecTorMain {
}
/**
- * Delete all files from the rsync directory that have not been modified in
- * the last three days.
+ * Delete all files from the rsync (out) directory that have not been modified
+ * in the last three days (seven weeks).
*/
- public void cleanUpRsyncDirectory() {
- Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(this.recentPathName));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- File[] filesInDirectory = file.listFiles();
- if (null != filesInDirectory) {
- allFiles.addAll(Arrays.asList(filesInDirectory));
- }
- } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
- try {
- Files.deleteIfExists(file.toPath());
- } catch (IOException e) {
- logger.warn("Unable to delete file {} that is apparently older than "
- + "three days.", file, e);
- }
- }
- }
+ public void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
index c6b45da..6b9b791 100644
--- a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.downloader.Downloader;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -24,6 +25,8 @@ import java.io.IOException;
import java.net.URL;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Date;
import java.util.SortedSet;
@@ -168,24 +171,16 @@ public class ExitListDownloader extends CollecTorMain {
}
logger.info(dumpStats.toString());
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
}
- /** Delete all files from the rsync directory that have not been modified
- * in the last three days. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(recentPathName));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- }
- }
+ /** Delete all files from the rsync (out) directory that have not been
+ * modified in the last three days (seven weeks). */
+ private void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
index d22ac0b..f90bdfe 100644
--- a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
@@ -12,6 +12,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.downloader.Downloader;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
@@ -31,13 +32,13 @@ import java.nio.file.StandardCopyOption;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
-import java.util.Stack;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -103,7 +104,7 @@ public class OnionPerfDownloader extends CollecTorMain {
this.downloadFromOnionPerfHost(baseUrl);
}
this.writeDownloadedOnionPerfFiles();
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
}
private void readDownloadedOnionPerfFiles() {
@@ -441,21 +442,21 @@ public class OnionPerfDownloader extends CollecTorMain {
}
}
- /** Delete all files from the rsync directory that have not been modified
- * in the last three days. */
- public void cleanUpRsyncDirectory() throws ConfigurationException {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(config.getPath(Key.RecentPath).toFile(), TORPERF));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- }
- }
+ /** Delete all files from the rsync (out) directories that have not been
+ * modified in the last three days (seven weeks). */
+ private void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(
+ new File(this.recentDirectory, TORPERF).toPath(),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(
+ new File(this.recentDirectory, ONIONPERF).toPath(),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(
+ new File(this.archiveDirectory, TORPERF).toPath(),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(
+ new File(this.archiveDirectory, ONIONPERF).toPath(),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java b/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java
index c958aec..e787c39 100644
--- a/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java
+++ b/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java
@@ -18,6 +18,7 @@ import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.text.SimpleDateFormat;
+import java.time.Instant;
import java.util.Date;
public class PersistenceUtils {
@@ -80,33 +81,70 @@ public class PersistenceUtils {
}
/** Move temporary files to their final location. */
- public static void cleanDirectory(Path pathToClean) throws IOException {
+ public static void cleanDirectory(Path pathToClean) {
PersistenceUtils.cleanDirectory(pathToClean, -1L);
}
/** Clean up the given directory by deleting files that are older than the
* given cut-off timestamp, and by moving temporary files to their final
* location. */
- public static void cleanDirectory(Path pathToClean, long cutOffMillis)
- throws IOException {
+ public static void cleanDirectory(Path pathToClean, long cutOffMillis) {
+ PersistenceUtils.cleanDirectory(pathToClean, cutOffMillis, null);
+ }
+
+ /** Clean up the given directory, excluding the given subdirectory, by
+ * deleting files that are older than the given cut-off timestamp, and by
+ * moving temporary files to their final location. */
+ public static void cleanDirectory(Path pathToClean, long cutOffMillis,
+ Path pathToExclude) {
+ if (!Files.exists(pathToClean)) {
+ return;
+ }
+ logger.info("Cleaning up directory {} with cut-off time {}.",
+ pathToClean, Instant.ofEpochMilli(cutOffMillis));
SimpleFileVisitor<Path> sfv = new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult preVisitDirectory(Path dir,
+ BasicFileAttributes attrs) {
+ if (null == pathToExclude || !pathToExclude.equals(dir)) {
+ return FileVisitResult.CONTINUE;
+ } else {
+ return FileVisitResult.SKIP_SUBTREE;
+ }
+ }
+
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
throws IOException {
String tempName = file.toString();
if (cutOffMillis >= 0L
&& attrs.lastModifiedTime().toMillis() < cutOffMillis) {
- file.toFile().delete();
+ Files.delete(file);
} else if (tempName.endsWith(TEMPFIX)) {
Path outputPath = Paths
.get(tempName.substring(0, tempName.length() - TEMPFIX.length()));
Files.deleteIfExists(outputPath);
- file.toFile().renameTo(outputPath.toFile());
+ Files.move(file, outputPath);
+ }
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path dir, IOException exc)
+ throws IOException {
+ if (!Files.list(dir).findFirst().isPresent()) {
+ Files.delete(dir);
}
return FileVisitResult.CONTINUE;
}
};
- Files.walkFileTree(pathToClean, sfv);
+ try {
+ Files.walkFileTree(pathToClean, sfv);
+ } catch (IOException e) {
+ logger.warn("Caught I/O exception while cleaning up directory {} with "
+ + "cut-off time {}. Continuing.",
+ pathToClean, Instant.ofEpochMilli(cutOffMillis), e);
+ }
}
/** Return all date-time parts as array. */
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
index 8addd5e..28472f8 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
@@ -18,6 +18,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.conf.SourceType;
import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -38,7 +39,7 @@ import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
-import java.util.Arrays;
+import java.time.temporal.ChronoUnit;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -46,7 +47,6 @@ import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
-import java.util.Stack;
import java.util.TreeMap;
import java.util.TreeSet;
@@ -197,7 +197,7 @@ public class ArchiveWriter extends CollecTorMain {
this.checkStaledescriptors();
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
this.saveDescriptorDigests();
@@ -549,51 +549,16 @@ public class ArchiveWriter extends CollecTorMain {
}
}
- /** Delete all files from the rsync directory that have not been modified
- * in the last three days (except for microdescriptors which are kept
- * for up to thirty days), and remove the .tmp extension from newly
- * written files. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(recentPathName, RELAY_DESCRIPTORS));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- File[] containedFiles = file.listFiles();
- if (null == containedFiles) {
- logger.warn("Unable to list files contained in directory {}.", file);
- } else {
- allFiles.addAll(Arrays.asList(containedFiles));
- }
- } else if (file.getName().endsWith("-micro")) {
- if (file.lastModified() < cutOffMicroMillis) {
- if (!file.delete()) {
- logger.warn("Unable to delete outdated descriptor file {}.", file);
- }
- }
- } else if (file.lastModified() < cutOffMillis) {
- if (!file.delete()) {
- logger.warn("Unable to delete outdated descriptor file {}.", file);
- }
- } else if (file.getName().endsWith(".tmp")) {
- File destinationFile = new File(file.getParentFile(),
- file.getName().substring(0, file.getName().lastIndexOf(".tmp")));
- if (destinationFile.exists()) {
- logger.warn("Attempting to rename descriptor file {} to existing "
- + "file {}.", file, destinationFile);
- } else {
- logger.info("Renaming descriptor file {} to non-existing file {}.",
- file, destinationFile);
- }
- if (!file.renameTo(destinationFile)) {
- logger.warn("Unable to rename descriptor file {} to {}.", file,
- destinationFile);
- }
- }
- }
+ /** Delete all files from the rsync (out) directory that have not been
+ * modified in the last three days (seven weeks), and remove the .tmp
+ * extension from newly written files. */
+ public void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(
+ Paths.get(recentPathName, RELAY_DESCRIPTORS),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(Paths.get(outputDirectory),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli(),
+ Paths.get(this.outputDirectory, "certs"));
}
private void saveDescriptorDigests() {
diff --git a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
index cbca74a..93388d5 100644
--- a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.downloader.Downloader;
+import org.torproject.metrics.collector.persist.PersistenceUtils;
import org.torproject.metrics.collector.persist.SnowflakeStatsPersistence;
import org.slf4j.Logger;
@@ -25,10 +26,10 @@ import java.io.OutputStream;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.time.Instant;
import java.time.LocalDateTime;
-import java.util.Arrays;
+import java.time.temporal.ChronoUnit;
import java.util.SortedSet;
-import java.util.Stack;
import java.util.TreeSet;
public class SnowflakeStatsDownloader extends CollecTorMain {
@@ -38,6 +39,8 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
private String recentPathName;
+ private String outputPathName;
+
/** Instantiate the snowflake-stats module using the given configuration. */
public SnowflakeStatsDownloader(Configuration config) {
super(config);
@@ -81,7 +84,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
DescriptorParser descriptorParser =
DescriptorSourceFactory.createDescriptorParser();
SortedSet<LocalDateTime> snowflakeStatsEnds = new TreeSet<>();
- String outputPathName = config.getPath(Key.OutputPath).toString();
+ this.outputPathName = config.getPath(Key.OutputPath).toString();
for (Descriptor descriptor : descriptorParser.parseDescriptors(
downloadedBytes, null, null)) {
if (descriptor instanceof SnowflakeStats) {
@@ -119,7 +122,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
}
this.writeProcessedFiles(parsedSnowflakeStatsFile, processedFiles);
- this.cleanUpRsyncDirectory();
+ this.cleanUpDirectories();
}
/**
@@ -150,21 +153,13 @@ public class SnowflakeStatsDownloader extends CollecTorMain {
}
}
- /** Delete all files from the rsync directory that have not been modified
- * in the last three days. */
- public void cleanUpRsyncDirectory() {
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- Stack<File> allFiles = new Stack<>();
- allFiles.add(new File(recentPathName));
- while (!allFiles.isEmpty()) {
- File file = allFiles.pop();
- if (file.isDirectory()) {
- allFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.lastModified() < cutOffMillis) {
- file.delete();
- }
- }
+ /** Delete all files from the rsync (out) directory that have not been
+ * modified in the last three days (seven weeks). */
+ private void cleanUpDirectories() {
+ PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
index adffb93..af48b1f 100644
--- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
@@ -42,7 +42,6 @@ import org.torproject.metrics.collector.persist.WebServerAccessLogPersistence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
import java.nio.file.Path;
/** Provides persistence for descriptors based on the descriptor type. */
@@ -69,11 +68,7 @@ public class SyncPersistence {
* Cleans the directory in {@code RecentPath} after storing descriptors.
*/
public void cleanDirectory() {
- try {
- PersistenceUtils.cleanDirectory(recentPath);
- } catch (IOException ioe) {
- logger.error("Cleaning of {} failed.", recentPath.toString(), ioe);
- }
+ PersistenceUtils.cleanDirectory(recentPath);
}
/**
diff --git a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
index 670f686..e4f427e 100644
--- a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java
@@ -31,8 +31,10 @@ import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.time.Instant;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -106,10 +108,12 @@ public class SanitizeWeblogs extends CollecTorMain {
= this.findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins),
previouslyProcessedWebstats);
this.writeProcessedWebstats(newlyProcessedWebstats);
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
- PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath),
- cutOffMillis);
+ PersistenceUtils.cleanDirectory(
+ Paths.get(this.recentDirectory.toString(), WEBSTATS),
+ Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
+ PersistenceUtils.cleanDirectory(
+ Paths.get(this.outputDirectory.toString(), WEBSTATS),
+ Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
} catch (Exception e) {
logger.error("Cannot sanitize web-logs: {}", e.getMessage(), e);
diff --git a/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java b/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java
index a33b94f..054d0e6 100644
--- a/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java
+++ b/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java
@@ -16,6 +16,9 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
+import java.nio.file.attribute.FileTime;
+import java.time.Instant;
+import java.time.temporal.ChronoUnit;
import java.util.List;
public class PersistUtilsTest {
@@ -107,4 +110,33 @@ public class PersistUtilsTest {
assertEquals("File contained: " + text, theText2, text.get(3));
}
+ @Test()
+ public void testCleanDirectory() throws Exception {
+ /*
+ * out/
+ * a/ # empty after deleting x
+ * x # too old file, delete
+ * b/ # keep together with recent file y
+ * y.tmp # recent enough, rename to y
+ * c/ # exclude (empty) subdirectory
+ */
+ Instant now = Instant.now();
+ Path out = tmpf.newFolder().toPath();
+ Path dirA = Files.createDirectory(out.resolve("a"));
+ Path fileX = Files.createFile(dirA.resolve("x"));
+ Files.setLastModifiedTime(fileX,
+ FileTime.from(now.minus(9L, ChronoUnit.DAYS)));
+ Path dirB = Files.createDirectory(out.resolve("b"));
+ Path fileYTmp = Files.createFile(dirB.resolve("y.tmp"));
+ Files.setLastModifiedTime(fileYTmp, FileTime.from(now));
+ Path dirC = Files.createDirectory(out.resolve("c"));
+ PersistenceUtils.cleanDirectory(out,
+ now.minus(3L, ChronoUnit.DAYS).toEpochMilli(), dirC);
+ assertFalse(Files.exists(dirA));
+ assertFalse(Files.exists(fileX));
+ assertTrue(Files.exists(dirB));
+ assertFalse(Files.exists(fileYTmp));
+ assertTrue(Files.exists(dirB.resolve("y")));
+ assertTrue(Files.exists(dirC));
+ }
}
More information about the tor-commits
mailing list