[tor-commits] [collector/master] Add new BridgedbMetrics module.
karsten at torproject.org
karsten at torproject.org
Fri Oct 18 09:10:12 UTC 2019
commit f2abf679c890f3f050c839cf32910900ae73ee76
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Sep 18 15:57:52 2019 +0200
Add new BridgedbMetrics module.
Implements part of #19332.
---
CHANGELOG.md | 2 +
build.xml | 2 +-
.../org/torproject/metrics/collector/Main.java | 3 +
.../bridgedb/BridgedbMetricsProcessor.java | 190 +++++++++++++++++++++
.../metrics/collector/conf/Annotation.java | 1 +
.../metrics/collector/conf/Configuration.java | 1 +
.../org/torproject/metrics/collector/conf/Key.java | 8 +-
.../persist/BridgedbMetricsPersistence.java | 37 ++++
.../collector/persist/DescriptorPersistence.java | 1 +
.../metrics/collector/sync/SyncPersistence.java | 5 +
src/main/resources/collector.properties | 17 ++
.../metrics/collector/conf/ConfigurationTest.java | 2 +-
.../metrics/collector/cron/CollecTorMainTest.java | 1 +
13 files changed, 267 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e72f02..ca4d7b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,8 @@
* Medium changes
- Require Mockito 1.10.19 as dependency for running tests.
+ - Archive BridgeDB statistics.
+ - Update to metrics-lib 2.8.0.
# Changes in version 1.11.1 - 2019-09-19
diff --git a/build.xml b/build.xml
index 6b097a3..892566b 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
<property name="release.version" value="1.11.1-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
- <property name="metricslibversion" value="2.7.0" />
+ <property name="metricslibversion" value="2.8.0" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
diff --git a/src/main/java/org/torproject/metrics/collector/Main.java b/src/main/java/org/torproject/metrics/collector/Main.java
index 3150ffc..3438bda 100644
--- a/src/main/java/org/torproject/metrics/collector/Main.java
+++ b/src/main/java/org/torproject/metrics/collector/Main.java
@@ -3,6 +3,7 @@
package org.torproject.metrics.collector;
+import org.torproject.metrics.collector.bridgedb.BridgedbMetricsProcessor;
import org.torproject.metrics.collector.bridgedescs.SanitizedBridgesWriter;
import org.torproject.metrics.collector.bridgepools.BridgePoolAssignmentsProcessor;
import org.torproject.metrics.collector.conf.Configuration;
@@ -59,6 +60,8 @@ public class Main {
collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class);
collecTorMains.put(Key.SnowflakeStatsActivated,
SnowflakeStatsDownloader.class);
+ collecTorMains.put(Key.BridgedbMetricsActivated,
+ BridgedbMetricsProcessor.class);
}
private static Configuration conf = new Configuration();
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
new file mode 100644
index 0000000..7ae4502
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
@@ -0,0 +1,190 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedb;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.UnparseableDescriptor;
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.ConfigurationException;
+import org.torproject.metrics.collector.conf.Key;
+import org.torproject.metrics.collector.cron.CollecTorMain;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
+import java.util.Arrays;
+import java.util.Stack;
+
+public class BridgedbMetricsProcessor extends CollecTorMain {
+
+ /**
+ * Class logger.
+ */
+ private static final Logger logger = LoggerFactory.getLogger(
+ BridgedbMetricsProcessor.class);
+
+ /**
+ * Directory for reading BridgeDB statistics files.
+ */
+ private File inputDirectory;
+
+ /**
+ * Directory for writing BridgeDB statistics files to be archived in tarballs.
+ */
+ private String outputPathName;
+
+ /**
+ * Directory for writing recently processed BridgeDB statistics files.
+ */
+ private String recentPathName;
+
+ /**
+ * File name format.
+ */
+ private DateTimeFormatter filenameFormat = DateTimeFormatter.ofPattern(
+ "uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss");
+
+ /**
+ * Initialize this class with the given configuration.
+ */
+ public BridgedbMetricsProcessor(Configuration config) {
+ super(config);
+ }
+
+ /**
+ * Return the module identifier.
+ *
+ * @return Module identifier.
+ */
+ @Override
+ public String module() {
+ return "BridgedbMetrics";
+ }
+
+ /**
+ * Return the synchronization marker.
+ *
+ * @return Synchronization marker.
+ */
+ @Override
+ protected String syncMarker() {
+ return "BridgedbMetrics";
+ }
+
+ /**
+ * Start processing files, which includes reading BridgeDB statistics files
+ * from disk, possibly decompressing them and splitting them by date, and
+ * writing them back to disk.
+ *
+ * @throws ConfigurationException Thrown if configuration values cannot be
+ * obtained.
+ */
+ @Override
+ protected void startProcessing() throws ConfigurationException {
+ logger.info("Starting BridgeDB statistics module of CollecTor.");
+ this.initializeConfiguration();
+ logger.info("Reading BridgeDB statistics files in {}.",
+ this.inputDirectory);
+ for (Descriptor descriptor
+ : DescriptorSourceFactory.createDescriptorReader()
+ .readDescriptors(this.inputDirectory)) {
+ if (descriptor instanceof BridgedbMetrics) {
+ BridgedbMetrics bridgedbMetrics = (BridgedbMetrics) descriptor;
+ Path tarballPath = Paths.get(this.outputPathName,
+ bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat));
+ Path rsyncPath = Paths.get(this.recentPathName,
+ bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat));
+ this.writeDescriptor(bridgedbMetrics.getRawDescriptorBytes(),
+ tarballPath, rsyncPath);
+ } else if (descriptor instanceof UnparseableDescriptor) {
+ logger.warn("Skipping unparseable descriptor in file {}.",
+ descriptor.getDescriptorFile(),
+ ((UnparseableDescriptor) descriptor).getDescriptorParseException());
+ } else {
+ logger.warn("Skipping unexpected descriptor of type {} in file {}.",
+ descriptor.getClass(), descriptor.getDescriptorFile());
+ }
+ }
+ logger.info("Cleaning up directory {} containing recent files.",
+ this.recentPathName);
+ this.cleanUpRsyncDirectory();
+ logger.info("Finished processing BridgeDB statistics file(s).");
+ }
+
+ /**
+ * Initialize configuration by obtaining current configuration values and
+ * storing them in instance attributes.
+ */
+ private void initializeConfiguration() throws ConfigurationException {
+ this.outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(),
+ "bridgedb-metrics").toString();
+ this.recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(),
+ "bridgedb-metrics").toString();
+ this.inputDirectory =
+ config.getPath(Key.BridgedbMetricsLocalOrigins).toFile();
+ }
+
+ /**
+ * Write the given raw descriptor bytes to the given files, and stop at the
+ * first file that already exists.
+ *
+ * @param rawDescriptorBytes Raw descriptor bytes to write.
+ * @param outputPaths One or more paths to write to.
+ */
+ private void writeDescriptor(byte[] rawDescriptorBytes,
+ Path ... outputPaths) {
+ for (Path outputPath : outputPaths) {
+ try {
+ File outputFile = outputPath.toFile();
+ if (outputFile.exists()) {
+ continue;
+ }
+ if (!outputFile.getParentFile().exists()
+ && !outputFile.getParentFile().mkdirs()) {
+ logger.warn("Could not create parent directories of {}.", outputFile);
+ return;
+ }
+ Files.write(outputPath, rawDescriptorBytes);
+ } catch (IOException e) {
+ logger.warn("Unable to write descriptor to file {}.", outputPath, e);
+ }
+ }
+ }
+
+ /**
+ * Delete all files from the rsync directory that have not been modified in
+ * the last three days.
+ */
+ public void cleanUpRsyncDirectory() {
+ Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
+ Stack<File> allFiles = new Stack<>();
+ allFiles.add(new File(this.recentPathName));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ File[] filesInDirectory = file.listFiles();
+ if (null != filesInDirectory) {
+ allFiles.addAll(Arrays.asList(filesInDirectory));
+ }
+ } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
+ try {
+ Files.deleteIfExists(file.toPath());
+ } catch (IOException e) {
+ logger.warn("Unable to delete file {} that is apparently older than "
+ + "three days.", file, e);
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
index 7d2bbe9..ff5119e 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
@@ -7,6 +7,7 @@ package org.torproject.metrics.collector.conf;
public enum Annotation {
BandwidthFile("@type bandwidth-file 1.0\n"),
+ BridgedbMetrics("@type bridgedb-metrics 1.0\n"),
BridgeExtraInfo("@type bridge-extra-info 1.3\n"),
BridgePoolAssignment("@type bridge-pool-assignment 1.0\n"),
BridgeServer("@type bridge-server-descriptor 1.2\n"),
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
index 59229e3..56be34c 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
@@ -89,6 +89,7 @@ public class Configuration extends Observable implements Cloneable {
if (!(this.getBool(Key.RelaydescsActivated)
|| this.getBool(Key.BridgedescsActivated)
|| this.getBool(Key.BridgePoolAssignmentsActivated)
+ || this.getBool(Key.BridgedbMetricsActivated)
|| this.getBool(Key.ExitlistsActivated)
|| this.getBool(Key.UpdateindexActivated)
|| this.getBool(Key.OnionPerfActivated)
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java
index dfef673..390feed 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Key.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java
@@ -78,7 +78,13 @@ public enum Key {
SnowflakeStatsPeriodMinutes(Integer.class),
SnowflakeStatsUrl(URL.class),
SnowflakeStatsSources(SourceType[].class),
- SnowflakeStatsSyncOrigins(URL[].class);
+ SnowflakeStatsSyncOrigins(URL[].class),
+ BridgedbMetricsActivated(Boolean.class),
+ BridgedbMetricsOffsetMinutes(Integer.class),
+ BridgedbMetricsPeriodMinutes(Integer.class),
+ BridgedbMetricsSources(SourceType[].class),
+ BridgedbMetricsLocalOrigins(Path.class),
+ BridgedbMetricsSyncOrigins(URL[].class);
private Class clazz;
private static Set<String> keys;
diff --git a/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java
new file mode 100644
index 0000000..a72ffe2
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java
@@ -0,0 +1,37 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.persist;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.metrics.collector.conf.Annotation;
+
+import java.nio.file.Paths;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+
+public class BridgedbMetricsPersistence
+ extends DescriptorPersistence<BridgedbMetrics> {
+
+ private static final String BRIDGEDB_STATS = "bridgedb-metrics";
+
+ public BridgedbMetricsPersistence(BridgedbMetrics desc) {
+ super(desc, Annotation.BridgedbMetrics.bytes());
+ calculatePaths();
+ }
+
+ private void calculatePaths() {
+ DateTimeFormatter directoriesFormatter = DateTimeFormatter
+ .ofPattern("uuuu/MM/dd").withZone(ZoneOffset.UTC);
+ String[] directories = this.desc.bridgedbMetricsEnd()
+ .format(directoriesFormatter).split("/");
+ DateTimeFormatter fileFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC);
+ String fileOut = this.desc.bridgedbMetricsEnd().format(fileFormatter)
+ + "-bridgedb-metrics";
+ this.recentPath = Paths.get(BRIDGEDB_STATS, fileOut).toString();
+ this.storagePath = Paths.get(BRIDGEDB_STATS, directories[0], directories[1],
+ directories[2], fileOut).toString();
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
index 3e7a06b..20cd570 100644
--- a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
@@ -20,6 +20,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> {
protected static final String BRIDGEDESCS = "bridge-descriptors";
protected static final String BRIDGEPOOLASSIGNMENTS
= "bridge-pool-assignments";
+ protected static final String BRIDGEDBSTATS = "bridgedb-stats";
protected static final String DASH = "-";
protected static final String DOT = ".";
protected static final String MICRODESC = "microdesc";
diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
index cfc3dbe..f6678fe 100644
--- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
@@ -8,6 +8,7 @@ import org.torproject.descriptor.BridgeExtraInfoDescriptor;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgePoolAssignment;
import org.torproject.descriptor.BridgeServerDescriptor;
+import org.torproject.descriptor.BridgedbMetrics;
import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.ExitList;
import org.torproject.descriptor.RelayExtraInfoDescriptor;
@@ -24,6 +25,7 @@ import org.torproject.metrics.collector.persist.BandwidthFilePersistence;
import org.torproject.metrics.collector.persist.BridgeExtraInfoPersistence;
import org.torproject.metrics.collector.persist.BridgePoolAssignmentPersistence;
import org.torproject.metrics.collector.persist.BridgeServerDescriptorPersistence;
+import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence;
import org.torproject.metrics.collector.persist.ConsensusPersistence;
import org.torproject.metrics.collector.persist.DescriptorPersistence;
import org.torproject.metrics.collector.persist.ExitlistPersistence;
@@ -154,6 +156,9 @@ public class SyncPersistence {
case "SnowflakeStats":
descPersist = new SnowflakeStatsPersistence((SnowflakeStats) desc);
break;
+ case "BridgedbStats":
+ descPersist = new BridgedbMetricsPersistence((BridgedbMetrics) desc);
+ break;
default:
log.trace("Invalid descriptor type {} for sync-merge.",
clazz.getName());
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index b180a3e..e7cadf7 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -59,6 +59,12 @@ SnowflakeStatsActivated = false
SnowflakeStatsPeriodMinutes = 480
# offset in minutes since the epoch and
SnowflakeStatsOffsetMinutes = 100
+# the following defines, if this module is activated
+BridgedbMetricsActivated = false
+# period in minutes
+BridgedbMetricsPeriodMinutes = 480
+# offset in minutes since the epoch and
+BridgedbMetricsOffsetMinutes = 340
##########################################
## All below can be changed at runtime.
@@ -216,3 +222,14 @@ SnowflakeStatsSyncOrigins = https://collector.torproject.org
## Where to download snowflake statistics from.
SnowflakeStatsUrl = https://snowflake-broker.torproject.net/metrics
#
+######## BridgeDB statistics ########
+#
+## Define descriptor sources
+# possible values: Local, Sync
+BridgedbMetricsSources = Local
+## Relative path to directory to import BridgeDB metrics from.
+BridgedbMetricsLocalOrigins = in/bridgedb-stats
+## Retrieve files from the following instances.
+## List of URLs separated by comma.
+BridgedbMetricsSyncOrigins = https://collector.torproject.org
+#
\ No newline at end of file
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 7845909..7e9ea28 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -39,7 +39,7 @@ public class ConfigurationTest {
public void testKeyCount() {
assertEquals("The number of properties keys in enum Key changed."
+ "\n This test class should be adapted.",
- 65, Key.values().length);
+ 71, Key.values().length);
}
@Test()
diff --git a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
index 99f1f48..cc124a4 100644
--- a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
+++ b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
@@ -70,6 +70,7 @@ public class CollecTorMainTest {
case "Relay":
case "Bridge":
case "BridgePoolAssignments":
+ case "BridgedbMetrics":
case "Exitlist":
case "OnionPerf":
case "Webstats":
More information about the tor-commits
mailing list