[tor-commits] [collector/master] Archive bandwidth files in relaydescs module.
karsten at torproject.org
karsten at torproject.org
Mon May 13 13:41:54 UTC 2019
commit dcbac68b48ae31b1bfbabab7a9c32f5577b78571
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu May 2 22:02:23 2019 +0200
Archive bandwidth files in relaydescs module.
Also update to metrics-lib 2.6.1.
Implements #30218.
---
CHANGELOG.md | 7 ++++
build.xml | 4 +--
.../metrics/collector/conf/Annotation.java | 1 +
.../collector/relaydescs/ArchiveWriter.java | 42 +++++++++++++++++++++-
.../relaydescs/RelayDescriptorDownloader.java | 24 ++++++++++---
.../relaydescs/RelayDescriptorParser.java | 42 ++++++++++++++++++++++
src/main/resources/create-tarballs.sh | 7 ++++
7 files changed, 120 insertions(+), 7 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0307748..0e592ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# Changes in version 1.9.0 - 2019-05-??
+
+ * Medium changes
+ - Archive bandwidth files in relaydescs module.
+ - Update to metrics-lib 2.6.1.
+
+
# Changes in version 1.8.0 - 2018-10-11
* Medium changes
diff --git a/build.xml b/build.xml
index 5cea51a..5874013 100644
--- a/build.xml
+++ b/build.xml
@@ -11,7 +11,7 @@
<property name="release.version" value="1.8.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
- <property name="metricslibversion" value="2.4.0" />
+ <property name="metricslibversion" value="2.6.1" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
@@ -21,7 +21,7 @@
<include name="jackson-core-2.8.6.jar"/>
<include name="jackson-databind-2.8.6.jar"/>
<include name="xz-1.6.jar"/>
- <include name="metrics-lib-${metricslibversion}.jar"/>
+ <include name="metrics-lib-${metricslibversion}-thin.jar"/>
<include name="logback-core-1.1.9.jar" />
<include name="logback-classic-1.1.9.jar" />
<include name="slf4j-api-1.7.22.jar" />
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
index f90516b..2e47df0 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
@@ -6,6 +6,7 @@ package org.torproject.metrics.collector.conf;
/** This enum contains all currently valid descriptor annotations. */
public enum Annotation {
+ BandwidthFile("@type bandwidth-file 1.0\n"),
BridgeExtraInfo("@type bridge-extra-info 1.3\n"),
BridgeServer("@type bridge-server-descriptor 1.2\n"),
Cert("@type dir-key-certificate-3 1.0\n"),
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
index 966b649..e1279ee 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
@@ -3,6 +3,7 @@
package org.torproject.metrics.collector.relaydescs;
+import org.torproject.descriptor.BandwidthFile;
import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.DescriptorParser;
import org.torproject.descriptor.DescriptorSourceFactory;
@@ -33,6 +34,10 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
@@ -44,6 +49,7 @@ import java.util.SortedSet;
import java.util.Stack;
import java.util.TimeZone;
import java.util.TreeMap;
+import java.util.TreeSet;
public class ArchiveWriter extends CollecTorMain {
@@ -51,12 +57,15 @@ public class ArchiveWriter extends CollecTorMain {
ArchiveWriter.class);
private long now = System.currentTimeMillis();
+ private LocalDateTime nowLocalDateTime
+ = LocalDateTime.ofInstant(Instant.ofEpochMilli(this.now), ZoneOffset.UTC);
private String outputDirectory;
private String rsyncCatString;
private DescriptorParser descriptorParser;
private int storedConsensusesCounter = 0;
private int storedMicrodescConsensusesCounter = 0;
private int storedVotesCounter = 0;
+ private int storedBandwidthsCounter = 0;
private int storedCertsCounter = 0;
private int storedServerDescriptorsCounter = 0;
private int storedExtraInfoDescriptorsCounter = 0;
@@ -74,6 +83,8 @@ public class ArchiveWriter extends CollecTorMain {
private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
new TreeMap<>();
private SortedMap<Long, Set<String>> storedMicrodescriptors = new TreeMap<>();
+ private SortedMap<LocalDateTime, Set<String>> storedBandwidths
+ = new TreeMap<>();
private File storedServerDescriptorsFile;
private File storedExtraInfoDescriptorsFile;
@@ -103,6 +114,8 @@ public class ArchiveWriter extends CollecTorMain {
RelayServerDescriptor.class);
this.mapPathDescriptors.put("recent/relay-descriptors/extra-infos",
RelayExtraInfoDescriptor.class);
+ this.mapPathDescriptors.put("recent/relay-descriptors/bandwidths",
+ BandwidthFile.class);
}
@Override
@@ -203,6 +216,7 @@ public class ArchiveWriter extends CollecTorMain {
this.storedConsensuses.clear();
this.storedMicrodescConsensuses.clear();
this.storedVotes.clear();
+ this.storedBandwidths.clear();
this.storedServerDescriptors.clear();
this.storedExtraInfoDescriptors.clear();
this.storedMicrodescriptors.clear();
@@ -299,7 +313,8 @@ public class ArchiveWriter extends CollecTorMain {
.append(this.storedConsensusesCounter).append(" consensus(es), ")
.append(this.storedMicrodescConsensusesCounter).append(" microdesc ")
.append("consensus(es), ").append(this.storedVotesCounter)
- .append(" vote(s), ").append(this.storedCertsCounter)
+ .append(" vote(s), ").append(this.storedBandwidthsCounter)
+ .append(" bandwidth file(s), ").append(this.storedCertsCounter)
.append(" certificate(s), ").append(this.storedServerDescriptorsCounter)
.append(" server descriptor(s), ")
.append(this.storedExtraInfoDescriptorsCounter).append(" extra-info ")
@@ -309,6 +324,7 @@ public class ArchiveWriter extends CollecTorMain {
this.storedConsensusesCounter = 0;
this.storedMicrodescConsensusesCounter = 0;
this.storedVotesCounter = 0;
+ this.storedBandwidthsCounter = 0;
this.storedCertsCounter = 0;
this.storedServerDescriptorsCounter = 0;
this.storedExtraInfoDescriptorsCounter = 0;
@@ -727,6 +743,30 @@ public class ArchiveWriter extends CollecTorMain {
}
}
+ /** Stores a bandwidth file to disk. */
+ void storeBandwidthFile(byte[] data, LocalDateTime fileCreatedOrTimestamp,
+ String bandwidthFileDigest) {
+ DateTimeFormatter printFormat = DateTimeFormatter
+ .ofPattern("uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC);
+ File tarballFile = Paths.get(this.outputDirectory, "bandwidth",
+ fileCreatedOrTimestamp.format(printFormat) + "-bandwidth-"
+ + bandwidthFileDigest).toFile();
+ boolean tarballFileExistedBefore = tarballFile.exists();
+ File rsyncFile = Paths.get(recentPathName, RELAY_DESCRIPTORS, "bandwidths",
+ tarballFile.getName()).toFile();
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(Annotation.BandwidthFile.bytes(), data, outputFiles, null)) {
+ this.storedVotesCounter++;
+ }
+ if (!tarballFileExistedBefore
+ && this.nowLocalDateTime.isAfter(fileCreatedOrTimestamp.plusDays(3L))) {
+ this.storedBandwidths.putIfAbsent(fileCreatedOrTimestamp,
+ new TreeSet<>());
+ this.storedBandwidths.get(fileCreatedOrTimestamp)
+ .add(bandwidthFileDigest);
+ }
+ }
+
/** Stores a key certificate to disk. */
public void storeCertificate(byte[] data, String fingerprint,
long published) {
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
index 4764a4b..5a241f4 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
@@ -258,6 +258,8 @@ public class RelayDescriptorDownloader {
private int requestedVotes = 0;
+ private int requestedBandwidthFiles = 0;
+
private int requestedMissingServerDescriptors = 0;
private int requestedAllServerDescriptors = 0;
@@ -274,6 +276,8 @@ public class RelayDescriptorDownloader {
private int downloadedVotes = 0;
+ private int downloadedBandwidthFiles = 0;
+
private int downloadedMissingServerDescriptors = 0;
private int downloadedAllServerDescriptors = 0;
@@ -729,6 +733,14 @@ public class RelayDescriptorDownloader {
}
}
+ /* Now try to download the bandwidth file, regardless of whether this
+ * authority might provide one or when we last downloaded a bandwidth
+ * file from it. */
+ this.requestedBandwidthFiles++;
+ this.downloadedBandwidthFiles +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/next/bandwidth");
+
/* Download either all server and extra-info descriptors or only
* those that we're missing. Start with server descriptors, then
* request extra-info descriptors. Finally, request missing
@@ -886,7 +898,7 @@ public class RelayDescriptorDownloader {
allData == null ? 0 : allData.length);
int receivedDescriptors = 0;
if (allData != null) {
- if (resource.startsWith("/tor/status-vote/current/")) {
+ if (resource.startsWith("/tor/status-vote/")) {
this.rdp.parse(allData);
receivedDescriptors = 1;
} else if (resource.startsWith("/tor/server/")
@@ -1067,11 +1079,13 @@ public class RelayDescriptorDownloader {
this.newMissingServerDescriptors, this.newMissingExtraInfoDescriptors,
this.newMissingMicrodescriptors);
logger.info("We requested {} consensus(es), {} microdesc consensus(es), "
- + "{} vote(s), {} missing server descriptor(s), {} times all server "
+ + "{} vote(s), {} bandwidth file(s), {} missing server descriptor(s), "
+ + "{} times all server "
+ "descriptors, {} missing extra-info descriptor(s), {} times all "
+ "extra-info descriptors, and {} missing microdescriptor(s) from the "
+ "directory authorities.", this.requestedConsensuses,
this.requestedMicrodescConsensuses, this.requestedVotes,
+ this.requestedBandwidthFiles,
this.requestedMissingServerDescriptors,
this.requestedAllServerDescriptors,
this.requestedMissingExtraInfoDescriptors,
@@ -1085,12 +1099,14 @@ public class RelayDescriptorDownloader {
logger.info("We sent these numbers of requests to the directory "
+ "authorities:{}", sb.toString());
logger.info("We successfully downloaded {} consensus(es), {} microdesc "
- + "consensus(es), {} vote(s), {} missing server descriptor(s), {} "
+ + "consensus(es), {} vote(s), {} bandwidth file(s), "
+ + "{} missing server descriptor(s), {} "
+ "server descriptor(s) when downloading all descriptors, {} missing "
+ "extra-info descriptor(s), {} extra-info descriptor(s) when "
+ "downloading all descriptors, and {} missing microdescriptor(s).",
this.downloadedConsensuses, this.downloadedMicrodescConsensuses,
- this.downloadedVotes, this.downloadedMissingServerDescriptors,
+ this.downloadedVotes, this.downloadedBandwidthFiles,
+ this.downloadedMissingServerDescriptors,
this.downloadedAllServerDescriptors,
this.downloadedMissingExtraInfoDescriptors,
this.downloadedAllExtraInfoDescriptors,
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java
index 5224a61..113ac77 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java
@@ -14,6 +14,10 @@ import java.io.IOException;
import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeParseException;
import java.util.SortedSet;
import java.util.TimeZone;
import java.util.TreeSet;
@@ -318,6 +322,44 @@ public class RelayDescriptorParser {
* time(s) of microdesc consensuses containing them, because we
* don't know which month directories to put them in. Have to use
* storeMicrodescriptor below. */
+ } else if (line.matches("[0-9]{10}")) {
+ /* The following code is a much more lenient version of the parser in
+ * metrics-lib that we need for storing a bandwidth file even if
+ * metrics-lib has trouble verifying its format. As in metrics-lib,
+ * identifying bandwidth files by a 10-digit timestamp in the first line
+ * breaks with files generated before 2002 or after 2286 and when the
+ * next descriptor identifier starts with just a timestamp in the first
+ * line rather than a document type identifier. */
+ String timestampLine = line;
+ LocalDateTime fileCreatedOrTimestamp = null;
+ try {
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("file_created=")) {
+ fileCreatedOrTimestamp = LocalDateTime.parse(
+ line.substring("file_created=".length()));
+ break;
+ } else if (line.startsWith("bw=") || line.contains(" bw=")
+ || "====".equals(line) || "=====".equals(line)) {
+ break;
+ }
+ }
+ } catch (IOException | DateTimeParseException e) {
+ /* Fall back to using timestamp in first line. */
+ }
+ if (null == fileCreatedOrTimestamp) {
+ try {
+ fileCreatedOrTimestamp = LocalDateTime.ofInstant(
+ Instant.ofEpochSecond(Long.parseLong(timestampLine)),
+ ZoneOffset.UTC);
+ } catch (NumberFormatException | DateTimeParseException e) {
+ logger.warn("Could not parse timestamp or file_created time from "
+ + "bandwidth file. Storing with timestamp 2000-01-01 00:00:00");
+ fileCreatedOrTimestamp = LocalDateTime.of(2000, 1, 1, 0, 0, 0);
+ }
+ }
+ this.aw.storeBandwidthFile(data, fileCreatedOrTimestamp,
+ DigestUtils.sha256Hex(data).toUpperCase());
+ stored = true;
}
br.close();
} catch (IOException | ParseException e) {
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index d247c52..7e4668a 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -47,6 +47,8 @@ TARBALLS=(
consensuses-$YEARTWO-$MONTHTWO
votes-$YEARONE-$MONTHONE
votes-$YEARTWO-$MONTHTWO
+ bandwidths-$YEARONE-$MONTHONE
+ bandwidths-$YEARTWO-$MONTHTWO
server-descriptors-$YEARONE-$MONTHONE
server-descriptors-$YEARTWO-$MONTHTWO
extra-infos-$YEARONE-$MONTHONE
@@ -72,6 +74,8 @@ DIRECTORIES=(
$OUTDIR/relay-descriptors/consensus/$YEARTWO/$MONTHTWO
$OUTDIR/relay-descriptors/vote/$YEARONE/$MONTHONE/
$OUTDIR/relay-descriptors/vote/$YEARTWO/$MONTHTWO/
+ $OUTDIR/relay-descriptors/bandwidth/$YEARONE/$MONTHONE/
+ $OUTDIR/relay-descriptors/bandwidth/$YEARTWO/$MONTHTWO/
$OUTDIR/relay-descriptors/server-descriptor/$YEARONE/$MONTHONE/
$OUTDIR/relay-descriptors/server-descriptor/$YEARTWO/$MONTHTWO/
$OUTDIR/relay-descriptors/extra-info/$YEARONE/$MONTHONE/
@@ -156,6 +160,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/tor/ $TARBALLTARGETDIR/tor-20??-??.tar
mkdir -p $ARCHIVEDIR/relay-descriptors/votes/
ln -f -s -t $ARCHIVEDIR/relay-descriptors/votes/ $TARBALLTARGETDIR/votes-20??-??.tar.xz
+mkdir -p $ARCHIVEDIR/relay-descriptors/bandwidths/
+ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidths-20??-??.tar.xz
+
mkdir -p $ARCHIVEDIR/torperf/
ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz
More information about the tor-commits
mailing list