[tor-commits] [collector/master] Extend index.json by additional file meta data.
karsten at torproject.org
karsten at torproject.org
Sat Nov 9 10:01:47 UTC 2019
commit 500b7c5ad3d94a0f0f8b8c7fdb110813895c25f0
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed Oct 30 21:56:41 2019 +0100
Extend index.json by additional file meta data.
Implements #31204.
---
CHANGELOG.md | 13 +
build.xml | 2 +-
src/build | 2 +-
.../org/torproject/metrics/collector/conf/Key.java | 5 +-
.../metrics/collector/indexer/CreateIndexJson.java | 678 +++++++++++++++++----
.../metrics/collector/indexer/DirectoryNode.java | 36 ++
.../metrics/collector/indexer/FileNode.java | 125 ++++
.../metrics/collector/indexer/IndexNode.java | 30 +
.../metrics/collector/indexer/IndexerTask.java | 225 +++++++
src/main/resources/collector.properties | 14 +-
src/main/resources/create-tarballs.sh | 2 +-
.../metrics/collector/conf/ConfigurationTest.java | 2 +-
.../collector/indexer/CreateIndexJsonTest.java | 522 ++++++++++++++++
.../metrics/collector/indexer/IndexerTaskTest.java | 226 +++++++
14 files changed, 1756 insertions(+), 126 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb328ad..38754c4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
# Changes in version 1.??.? - 2019-??-??
+ * Medium changes
+ - Extend index.json by including descriptor types, first and last
+ publication timestamp, and SHA-256 file digest. Requires making
+ configuration changes in collector.properties:
+ 1) IndexedPath is a new directory with subdirectories for
+ archived and recent descriptors,
+ 2) ArchivePath and IndexPath are hard-wired to be subdirectories
+ of IndexedPath,
+ 3) RecentPath must be set to be a subdirectory of IndexedPath,
+ 4) ContribPath has disappeared, and
+ 5) HtdocsPath is a new directory with files served by the web
+ server.
+
# Changes in version 1.12.0 - 2019-10-18
diff --git a/build.xml b/build.xml
index 8276e63..019461d 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
<property name="release.version" value="1.12.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
- <property name="metricslibversion" value="2.8.0" />
+ <property name="metricslibversion" value="2.9.0" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
diff --git a/src/build b/src/build
index d82fff9..eb16cb3 160000
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit d82fff984634fe006ac7b0b102e7f48a52ca20d9
+Subproject commit eb16cb359db41722e6089bafb1e26808df4338df
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java
index 390feed..d59438b 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Key.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java
@@ -18,13 +18,12 @@ public enum Key {
RunOnce(Boolean.class),
ExitlistUrl(URL.class),
InstanceBaseUrl(String.class),
- ArchivePath(Path.class),
- ContribPath(Path.class),
+ IndexedPath(Path.class),
RecentPath(Path.class),
OutputPath(Path.class),
- IndexPath(Path.class),
StatsPath(Path.class),
SyncPath(Path.class),
+ HtdocsPath(Path.class),
RelaySources(SourceType[].class),
BridgeSources(SourceType[].class),
BridgePoolAssignmentsSources(SourceType[].class),
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java b/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
index a40798e..15aa31d 100644
--- a/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
+++ b/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
@@ -1,73 +1,190 @@
-/* Copyright 2015--2018 The Tor Project
+/* Copyright 2015--2019 The Tor Project
* See LICENSE for licensing information */
package org.torproject.metrics.collector.indexer;
-import org.torproject.descriptor.index.DirectoryNode;
-import org.torproject.descriptor.index.FileNode;
-import org.torproject.descriptor.index.IndexNode;
-import org.torproject.descriptor.internal.FileType;
import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.PropertyAccessor;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.PropertyNamingStrategy;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
+import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStreamWriter;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Locale;
+import java.io.OutputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.TemporalAmount;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
import java.util.Properties;
+import java.util.Set;
+import java.util.SortedMap;
import java.util.SortedSet;
-import java.util.TimeZone;
+import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
-/* Create a fresh index.json containing all directories and files in the
- * archive/ and recent/ directories.
+/**
+ * Create an index file called {@code index.json} containing metadata of all
+ * files in the {@code indexed/} directory and update the {@code htdocs/}
+ * directory to contain all files to be served via the web server.
*
- * Note that if this ever takes longer than a few seconds, we'll have to
- * cache index parts of directories or files that haven't changed.
- * Example: if we parse include cryptographic hashes or @type information,
- * we'll likely have to do that. */
+ * <p>File metadata includes:</p>
+ * <ul>
+ * <li>Path for downloading this file from the web server.</li>
+ * <li>Size of the file in bytes.</li>
+ * <li>Timestamp when the file was last modified.</li>
+ * <li>Descriptor types as found in {@code @type} annotations of contained
+ * descriptors.</li>
+ * <li>Earliest and latest publication timestamp of contained
+ * descriptors.</li>
+ * <li>SHA-256 digest of the file.</li>
+ * </ul>
+ *
+ * <p>This class maintains its own working directory {@code htdocs/} with
+ * subdirectories like {@code htdocs/archive/} or {@code htdocs/recent/} and
+ * another subdirectory {@code htdocs/index/}. The first two subdirectories
+ * contain (hard) links created and deleted by this class, the third
+ * subdirectory contains the {@code index.json} file in uncompressed and
+ * compressed forms.</p>
+ *
+ * <p>The main reason for having the {@code htdocs/} directory is that indexing
+ * a large descriptor file can be time consuming. New or updated files in
+ * {@code indexed/} first need to be indexed before their metadata can be
+ * included in {@code index.json}. Another reason is that files removed from
+ * {@code indexed/} shall still be available for download for a limited period
+ * of time after disappearing from {@code index.json}.</p>
+ *
+ * <p>The reason for creating (hard) links in {@code htdocs/}, rather than
+ * copies, is that links do not consume additional disk space. All directories
+ * must be located on the same file system. Storing symbolic links in
+ * {@code htdocs/} would not have worked with replaced or deleted files in the
+ * original directories. Symbolic links in original directories are allowed as
+ * long as they target to the same file system.</p>
+ *
+ * <p>This class does not write, modify, or delete any files in the
+ * {@code indexed/} directory. At the same time it does not expect any other
+ * classes to write, modify, or delete contents in the {@code htdocs/}
+ * directory.</p>
+ */
public class CreateIndexJson extends CollecTorMain {
+ /**
+ * Class logger.
+ */
private static final Logger logger =
LoggerFactory.getLogger(CreateIndexJson.class);
- private static File indexJsonFile;
+ /**
+ * Delay between finding out that a file has been deleted and deleting its
+ * link.
+ */
+ private static final TemporalAmount deletionDelay = Duration.ofHours(2L);
- private static String basePath;
+ /**
+ * Index tarballs with no more than this many threads at a time.
+ */
+ private static final int tarballIndexerThreads = 3;
- private static File[] indexedDirectories;
+ /**
+ * Index flat files with no more than this many threads at a time.
+ */
+ private static final int flatFileIndexerThreads = 3;
- private static final String dateTimePattern = "yyyy-MM-dd HH:mm";
+ /**
+ * Parser and formatter for all timestamps found in {@code index.json}.
+ */
+ private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
- private static final Locale dateTimeLocale = Locale.US;
+ /**
+ * Object mapper for parsing and formatting {@code index.json} files.
+ */
+ private static ObjectMapper objectMapper = new ObjectMapper()
+ .setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE)
+ .setSerializationInclusion(JsonInclude.Include.NON_EMPTY)
+ .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
+ .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
- private static final TimeZone dateTimezone = TimeZone.getTimeZone("UTC");
+ /**
+ * Path to the {@code indexed/} directory.
+ */
+ private Path indexedPath;
- private static String buildRevision = null;
+ /**
+ * Path to the {@code htdocs/} directory.
+ */
+ private Path htdocsPath;
- /** Creates indexes of directories containing archived and recent
- * descriptors and write index files to disk. */
- public CreateIndexJson(Configuration conf) {
- super(conf);
- Properties buildProperties = new Properties();
- try (InputStream is = getClass().getClassLoader()
- .getResourceAsStream("collector.buildrevision.properties")) {
- buildProperties.load(is);
- buildRevision = buildProperties.getProperty("collector.build.revision",
- null);
- } catch (Exception ex) {
- // This doesn't hamper the index creation: only log a warning.
- logger.warn("No build revision available.", ex);
- buildRevision = null;
- }
+ /**
+ * Path to the uncompressed {@code index.json} file.
+ */
+ private Path indexJsonPath;
+
+ /**
+ * Base URL of all resources included in {@code index.json}.
+ */
+ private String basePathString;
+
+ /**
+ * Git revision of this software to be included in {@code index.json} or
+ * omitted if unknown.
+ */
+ private String buildRevisionString;
+
+ /**
+ * Index containing metadata of files in {@code indexed/}, including new or
+ * updated files that still need to be indexed and deleted files that are
+ * still linked in {@code htdocs/}.
+ *
+ * <p>This map is initialized by reading the last known {@code index.json}
+ * file and remains available in memory between executions until shutdown.</p>
+ */
+ private SortedMap<Path, FileNode> index;
+
+ /**
+ * Executor for indexing tarballs.
+ */
+ private ExecutorService tarballsExecutor
+ = Executors.newFixedThreadPool(tarballIndexerThreads);
+
+ /**
+ * Executor for indexing flat files (non-tarballs).
+ */
+ private ExecutorService flatFilesExecutor
+ = Executors.newFixedThreadPool(flatFileIndexerThreads);
+
+ /**
+ * Initialize this class with the given {@code configuration}.
+ *
+ * @param configuration Configuration values.
+ */
+ public CreateIndexJson(Configuration configuration) {
+ super(configuration);
}
@Override
@@ -80,96 +197,433 @@ public class CreateIndexJson extends CollecTorMain {
return "IndexJson";
}
+
+ /**
+ * Run the indexer by (1) adding new files from {@code indexed/} to the index,
+ * (2) adding old files from {@code htdocs/} for which only links exist to the
+ * index, (3) scheduling new tasks and updating links in {@code htdocs/} to
+ * reflect what's contained in the in-memory index, and (4) writing new
+ * uncompressed and compressed {@code index.json} files to disk.
+ */
@Override
- protected void startProcessing() {
+ public void startProcessing() {
+ this.startProcessing(Instant.now());
+ }
+
+ /**
+ * Helper method to {@link #startProcessing()} that accepts the current
+ * execution time and which is used by tests.
+ *
+ * @param now Current execution time.
+ */
+ protected void startProcessing(Instant now) {
+ try {
+ this.basePathString = this.config.getProperty(Key.InstanceBaseUrl.name());
+ this.indexedPath = config.getPath(Key.IndexedPath);
+ this.htdocsPath = config.getPath(Key.HtdocsPath);
+ } catch (ConfigurationException e) {
+ logger.error("Unable to read one or more configuration values. Not "
+ + "indexing in this execution.", e);
+ }
+ this.buildRevisionString = this.obtainBuildRevision();
+ this.indexJsonPath = this.htdocsPath
+ .resolve(Paths.get("index", "index.json"));
try {
- indexJsonFile = new File(config.getPath(Key.IndexPath).toFile(),
- "index.json");
- basePath = config.getProperty(Key.InstanceBaseUrl.name());
- indexedDirectories = new File[] {
- config.getPath(Key.ArchivePath).toFile(),
- config.getPath(Key.ContribPath).toFile(),
- config.getPath(Key.RecentPath).toFile() };
- writeIndex(indexDirectories());
- } catch (Exception e) {
- logger.error("Cannot run index creation: {}", e.getMessage(), e);
- throw new RuntimeException(e);
+ this.prepareHtdocsDirectory();
+ if (null == this.index) {
+ logger.info("Reading index.json file from last execution.");
+ this.index = this.readIndex();
+ }
+ logger.info("Going through indexed/ and adding new files to the index.");
+ this.addNewFilesToIndex(this.indexedPath);
+ logger.info("Going through htdocs/ and adding links to deleted files to "
+ + "the index.");
+ this.addOldLinksToIndex();
+ logger.info("Going through the index, scheduling tasks, and updating "
+ + "links.");
+ this.scheduleTasksAndUpdateLinks(now);
+ logger.info("Writing uncompressed and compressed index.json files to "
+ + "disk.");
+ this.writeIndex(this.index, now);
+ logger.info("Pausing until next index update run.");
+ } catch (IOException e) {
+ logger.error("I/O error while updating index.json files. Trying again in "
+ + "the next execution.", e);
}
}
- private static DateFormat dateTimeFormat;
-
- static {
- dateTimeFormat = new SimpleDateFormat(dateTimePattern,
- dateTimeLocale);
- dateTimeFormat.setLenient(false);
- dateTimeFormat.setTimeZone(dateTimezone);
+ /**
+ * Prepare the {@code htdocs/} directory by checking whether all required
+ * subdirectories exist and by creating them if not.
+ *
+ * @throws IOException Thrown if one or more directories could not be created.
+ */
+ private void prepareHtdocsDirectory() throws IOException {
+ for (Path requiredPath : new Path[] {
+ this.htdocsPath,
+ this.indexJsonPath.getParent() }) {
+ if (!Files.exists(requiredPath)) {
+ Files.createDirectories(requiredPath);
+ }
+ }
}
- private IndexNode indexDirectories() {
- SortedSet<DirectoryNode> directoryNodes = new TreeSet<>();
- logger.trace("indexing: {} {}", indexedDirectories[0],
- indexedDirectories[1]);
- for (File directory : indexedDirectories) {
- if (directory.exists() && directory.isDirectory()) {
- DirectoryNode dn = indexDirectory(directory);
- if (null != dn) {
- directoryNodes.add(dn);
+ /**
+ * Read the {@code index.json} file written by the previous execution and
+ * populate our index with its contents, or leave the index empty if this is
+ * the first execution and that file does not yet exist.
+ *
+ * @return Index read from disk, or empty map if {@code index.json} does not
+ * exist.
+ */
+ private SortedMap<Path, FileNode> readIndex() throws IOException {
+ SortedMap<Path, FileNode> index = new TreeMap<>();
+ if (Files.exists(this.indexJsonPath)) {
+ IndexNode indexNode = objectMapper.readValue(
+ Files.newInputStream(this.indexJsonPath), IndexNode.class);
+ SortedMap<Path, DirectoryNode> directoryNodes = new TreeMap<>();
+ directoryNodes.put(Paths.get(""), indexNode);
+ while (!directoryNodes.isEmpty()) {
+ Path directoryPath = directoryNodes.firstKey();
+ DirectoryNode directoryNode = directoryNodes.remove(directoryPath);
+ if (null != directoryNode.files) {
+ for (FileNode fileNode : directoryNode.files) {
+ Path filePath = this.indexedPath.resolve(directoryPath)
+ .resolve(Paths.get(fileNode.path));
+ index.put(filePath, fileNode);
+ }
}
+ if (null != directoryNode.directories) {
+ boolean isRootDirectory = directoryNode == indexNode;
+ for (DirectoryNode subdirectoryNode : directoryNode.directories) {
+ Path subdirectoryPath = isRootDirectory
+ ? Paths.get(subdirectoryNode.path)
+ : directoryPath.resolve(Paths.get(subdirectoryNode.path));
+ directoryNodes.put(subdirectoryPath, subdirectoryNode);
+ }
+ }
+ }
+ }
+ return index;
+ }
+
+ /**
+ * Obtain and return the build revision string that was generated during the
+ * build process with {@code git rev-parse --short HEAD} and written to
+ * {@code collector.buildrevision.properties}, or return {@code null} if the
+ * build revision string cannot be obtained.
+ *
+ * @return Build revision string.
+ */
+ protected String obtainBuildRevision() {
+ String buildRevision = null;
+ Properties buildProperties = new Properties();
+ String propertiesFile = "collector.buildrevision.properties";
+ try (InputStream is = getClass().getClassLoader()
+ .getResourceAsStream(propertiesFile)) {
+ if (null == is) {
+ logger.warn("File {}, which is supposed to contain the build revision "
+ + "string, does not exist in our class path. Writing index.json "
+ + "without the \"build_revision\" field.", propertiesFile);
+ return null;
}
+ buildProperties.load(is);
+ buildRevision = buildProperties.getProperty(
+ "collector.build.revision", null);
+ } catch (IOException e) {
+ logger.warn("I/O error while trying to obtain build revision string. "
+ + "Writing index.json without the \"build_revision\" field.");
}
- return new IndexNode(dateTimeFormat.format(
- System.currentTimeMillis()), buildRevision, basePath, null,
- directoryNodes);
- }
-
- private DirectoryNode indexDirectory(File directory) {
- SortedSet<FileNode> fileNodes = new TreeSet<>();
- SortedSet<DirectoryNode> directoryNodes = new TreeSet<>();
- logger.trace("indexing: {}", directory);
- File[] fileList = directory.listFiles();
- if (null == fileList) {
- logger.warn("Indexing dubious directory: {}", directory);
- return null;
+ return buildRevision;
+ }
+
+ /**
+ * Walk the given file tree and add all previously unknown files to the
+ * in-memory index (except for files starting with "." or ending with ".tmp").
+ *
+ * @param path File tree to walk.
+ */
+ private void addNewFilesToIndex(Path path) throws IOException {
+ if (!Files.exists(path)) {
+ return;
}
- for (File fileOrDirectory : fileList) {
- if (fileOrDirectory.getName().startsWith(".")
- || fileOrDirectory.getName().endsWith(".tmp")) {
- continue;
+ Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path filePath,
+ BasicFileAttributes basicFileAttributes) {
+ if (!filePath.toString().startsWith(".")
+ && !filePath.toString().endsWith(".tmp")) {
+ index.putIfAbsent(filePath, new FileNode());
+ }
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ }
+
+ /**
+ * Walk the file tree of the {@code htdocs/} directory and add all previously
+ * unknown links to the in-memory index to ensure their deletion when they're
+ * known to be deleted from their original directories.
+ */
+ private void addOldLinksToIndex() throws IOException {
+ Path htdocsIndexPath = this.indexJsonPath.getParent();
+ Files.walkFileTree(this.htdocsPath, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path linkPath,
+ BasicFileAttributes basicFileAttributes) {
+ if (!linkPath.startsWith(htdocsIndexPath)) {
+ Path filePath = indexedPath.resolve(htdocsPath.relativize(linkPath));
+ index.putIfAbsent(filePath, new FileNode());
+ }
+ return FileVisitResult.CONTINUE;
}
- if (fileOrDirectory.isFile()) {
- fileNodes.add(indexFile(fileOrDirectory));
+ });
+ }
+
+ /**
+ * Go through the index, schedule tasks to index files, and update links.
+ *
+ * @throws IOException Thrown if an I/O exception occurs while creating or
+ * deleting links.
+ */
+ private void scheduleTasksAndUpdateLinks(Instant now) throws IOException {
+ int queuedIndexerTasks = 0;
+ Map<Path, FileNode> indexingResults = new HashMap<>();
+ SortedSet<Path> filesToIndex = new TreeSet<>();
+ Map<Path, Path> linksToCreate = new HashMap<>();
+ Set<FileNode> linksToMarkForDeletion = new HashSet<>();
+ Map<Path, Path> linksToDelete = new HashMap<>();
+ for (Map.Entry<Path, FileNode> e : this.index.entrySet()) {
+ Path filePath = e.getKey();
+ Path linkPath = this.htdocsPath
+ .resolve(this.indexedPath.relativize(filePath));
+ FileNode fileNode = e.getValue();
+ if (Files.exists(filePath)) {
+ if (null != fileNode.indexerResult) {
+ if (!fileNode.indexerResult.isDone()) {
+ /* This file is currently being indexed, so we should just skip it
+ * and wait until the indexer is done. */
+ queuedIndexerTasks++;
+ continue;
+ }
+ try {
+ /* Indexing is done, obtain index results. */
+ fileNode = fileNode.indexerResult.get();
+ indexingResults.put(filePath, fileNode);
+ } catch (InterruptedException | ExecutionException ex) {
+ /* Clear index result, so that we can give this file another try
+ * next time. */
+ fileNode.indexerResult = null;
+ }
+ }
+ String originalLastModified = dateTimeFormatter
+ .format(Files.getLastModifiedTime(filePath).toInstant());
+ if (!originalLastModified.equals(fileNode.lastModified)) {
+ /* We either don't have any index results for this file, or we only
+ * have index results for an older version of this file. */
+ filesToIndex.add(filePath);
+ } else if (!Files.exists(linkPath)) {
+ /* We do have index results, but we don't have a link yet, so we're
+ * going to create a link. */
+ linksToCreate.put(linkPath, filePath);
+ } else {
+ String linkLastModified = dateTimeFormatter
+ .format(Files.getLastModifiedTime(linkPath).toInstant());
+ if (!linkLastModified.equals(fileNode.lastModified)) {
+ /* We do have index results plus a link to an older version of this
+ * file, so we'll have to update the link. */
+ linksToCreate.put(linkPath, filePath);
+ }
+ }
} else {
- DirectoryNode dn = indexDirectory(fileOrDirectory);
- if (null != dn) {
- directoryNodes.add(dn);
+ if (null == fileNode.markedForDeletion) {
+ /* We're noticing just now that the file doesn't exist anymore, so
+ * we're going to mark it for deletion but not deleting the link right
+ * away. */
+ linksToMarkForDeletion.add(fileNode);
+ } else if (fileNode.markedForDeletion
+ .isBefore(now.minus(deletionDelay))) {
+ /* The file doesn't exist anymore, and we found out long enough ago,
+ * so we can now go ahead and delete the link. */
+ linksToDelete.put(linkPath, filePath);
+ }
+ }
+ }
+ if (queuedIndexerTasks > 0) {
+ logger.info("Counting {} file(s) being currently indexed or in the "
+ + "queue.", queuedIndexerTasks);
+ }
+ this.updateIndex(indexingResults);
+ this.scheduleTasks(filesToIndex);
+ this.createLinks(linksToCreate);
+ this.markForDeletion(linksToMarkForDeletion, now);
+ this.deleteLinks(linksToDelete);
+ }
+
+ /**
+ * Update index with index results.
+ */
+ private void updateIndex(Map<Path, FileNode> indexResults) {
+ if (!indexResults.isEmpty()) {
+ logger.info("Updating {} index entries with index results.",
+ indexResults.size());
+ this.index.putAll(indexResults);
+ }
+ }
+
+ /**
+ * Schedule indexing the given set of descriptor files, using different queues
+ * for tarballs and flat files.
+ *
+ * @param filesToIndex Paths to descriptor files to index.
+ */
+ private void scheduleTasks(SortedSet<Path> filesToIndex) {
+ if (!filesToIndex.isEmpty()) {
+ logger.info("Scheduling {} indexer task(s).", filesToIndex.size());
+ for (Path fileToIndex : filesToIndex) {
+ IndexerTask indexerTask = this.createIndexerTask(fileToIndex);
+ if (fileToIndex.getFileName().toString().endsWith(".tar.xz")) {
+ this.index.get(fileToIndex).indexerResult
+ = this.tarballsExecutor.submit(indexerTask);
+ } else {
+ this.index.get(fileToIndex).indexerResult
+ = this.flatFilesExecutor.submit(indexerTask);
}
}
}
- return new DirectoryNode(
- directory.getName(), fileNodes.isEmpty() ? null : fileNodes,
- directoryNodes.isEmpty() ? null : directoryNodes);
- }
-
- private FileNode indexFile(File file) {
- return new FileNode(file.getName(), file.length(),
- dateTimeFormat.format(file.lastModified()));
- }
-
- private void writeIndex(IndexNode indexNode) throws Exception {
- indexJsonFile.getParentFile().mkdirs();
- String indexNodeString = IndexNode.makeJsonString(indexNode);
- for (String filename : new String[] {indexJsonFile.toString(),
- indexJsonFile + ".gz", indexJsonFile + ".xz", indexJsonFile + ".bz2"}) {
- FileType type = FileType.valueOf(
- filename.substring(filename.lastIndexOf(".") + 1).toUpperCase());
- try (BufferedWriter bufferedWriter
- = new BufferedWriter(new OutputStreamWriter(type.outputStream(
- new FileOutputStream(filename))))) {
- bufferedWriter.write(indexNodeString);
+ }
+
+ /**
+ * Create an indexer task for indexing the given file.
+ *
+ * <p>The reason why this is a separate method is that it can be overriden by
+ * tests that don't actually want to index files but instead provide their own
+ * index results.</p>
+ *
+ * @param fileToIndex File to index.
+ * @return Indexer task.
+ */
+ protected IndexerTask createIndexerTask(Path fileToIndex) {
+ return new IndexerTask(fileToIndex);
+ }
+
+ /**
+ * Create links in {@code htdocs/}, including all necessary parent
+ * directories.
+ *
+ * @param linksToCreate Map of links to be created with keys being link paths
+ * and values being original file paths.
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void createLinks(Map<Path, Path> linksToCreate) throws IOException {
+ if (!linksToCreate.isEmpty()) {
+ logger.info("Creating {} new link(s).", linksToCreate.size());
+ for (Map.Entry<Path, Path> e : linksToCreate.entrySet()) {
+ Path linkPath = e.getKey();
+ Path originalPath = e.getValue();
+ Files.createDirectories(linkPath.getParent());
+ Files.deleteIfExists(linkPath);
+ Files.createLink(linkPath, originalPath);
+ }
+ }
+ }
+
+ /**
+ * Mark the given links for deletion in the in-memory index.
+ *
+ * @param linksToMarkForDeletion Files to be marked for deletion.
+ */
+ private void markForDeletion(Set<FileNode> linksToMarkForDeletion,
+ Instant now) {
+ if (!linksToMarkForDeletion.isEmpty()) {
+ logger.info("Marking {} old link(s) for deletion.",
+ linksToMarkForDeletion.size());
+ for (FileNode fileNode : linksToMarkForDeletion) {
+ fileNode.markedForDeletion = now;
+ }
+ }
+ }
+
+ /**
+ * Delete the given links from {@code htdocs/}.
+ *
+ * @param linksToDelete Map of links to be deleted with keys being link paths
+ * and values being original file paths.
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void deleteLinks(Map<Path, Path> linksToDelete) throws IOException {
+ if (!linksToDelete.isEmpty()) {
+ logger.info("Deleting {} old link(s).", linksToDelete.size());
+ for (Map.Entry<Path, Path> e : linksToDelete.entrySet()) {
+ Path linkPath = e.getKey();
+ Path originalPath = e.getValue();
+ Files.deleteIfExists(linkPath);
+ index.remove(originalPath);
}
}
}
+ /**
+ * Write the in-memory index to {@code index.json} and its compressed
+ * variants, but exclude files that have not yet been indexed or that are
+ * marked for deletion.
+ *
+ * @throws IOException Thrown if an I/O error occurs while writing files.
+ */
+ private void writeIndex(SortedMap<Path, FileNode> index,
+ Instant now) throws IOException {
+ IndexNode indexNode = new IndexNode();
+ indexNode.indexCreated = dateTimeFormatter.format(now);
+ indexNode.buildRevision = this.buildRevisionString;
+ indexNode.path = this.basePathString;
+ SortedMap<Path, DirectoryNode> directoryNodes = new TreeMap<>();
+ for (Map.Entry<Path, FileNode> indexEntry : index.entrySet()) {
+ Path filePath = this.indexedPath.relativize(indexEntry.getKey());
+ FileNode fileNode = indexEntry.getValue();
+ if (null == fileNode.lastModified || null != fileNode.markedForDeletion) {
+ /* Skip unindexed or deleted files. */
+ continue;
+ }
+ Path directoryPath = null;
+ DirectoryNode parentDirectoryNode = indexNode;
+ if (null != filePath.getParent()) {
+ for (Path pathPart : filePath.getParent()) {
+ directoryPath = null == directoryPath ? pathPart
+ : directoryPath.resolve(pathPart);
+ DirectoryNode directoryNode = directoryNodes.get(directoryPath);
+ if (null == directoryNode) {
+ directoryNode = new DirectoryNode();
+ directoryNode.path = pathPart.toString();
+ if (null == parentDirectoryNode.directories) {
+ parentDirectoryNode.directories = new ArrayList<>();
+ }
+ parentDirectoryNode.directories.add(directoryNode);
+ directoryNodes.put(directoryPath, directoryNode);
+ }
+ parentDirectoryNode = directoryNode;
+ }
+ }
+ if (null == parentDirectoryNode.files) {
+ parentDirectoryNode.files = new ArrayList<>();
+ }
+ parentDirectoryNode.files.add(fileNode);
+ }
+ Path htdocsIndexPath = this.indexJsonPath.getParent();
+ try (OutputStream uncompressed
+ = Files.newOutputStream(htdocsIndexPath.resolve(".index.json.tmp"));
+ OutputStream bz2Compressed = new BZip2CompressorOutputStream(
+ Files.newOutputStream(htdocsIndexPath.resolve("index.json.bz2")));
+ OutputStream gzCompressed = new GzipCompressorOutputStream(
+ Files.newOutputStream(htdocsIndexPath.resolve("index.json.gz")));
+ OutputStream xzCompressed = new XZCompressorOutputStream(
+ Files.newOutputStream(htdocsIndexPath.resolve("index.json.xz")))) {
+ objectMapper.writeValue(uncompressed, indexNode);
+ objectMapper.writeValue(bz2Compressed, indexNode);
+ objectMapper.writeValue(gzCompressed, indexNode);
+ objectMapper.writeValue(xzCompressed, indexNode);
+ }
+ Files.move(htdocsIndexPath.resolve(".index.json.tmp"), this.indexJsonPath,
+ StandardCopyOption.REPLACE_EXISTING);
+ }
}
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java b/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java
new file mode 100644
index 0000000..a369d08
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java
@@ -0,0 +1,36 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import java.util.List;
+
+/**
+ * Directory node in {@code index.json} which is discarded after reading and
+ * re-created before writing that file.
+ */
+ at JsonPropertyOrder({ "path", "files", "directories" })
+class DirectoryNode {
+
+ /**
+ * Relative path of the directory.
+ */
+ @JsonProperty("path")
+ String path;
+
+ /**
+ * List of file objects of files available from this directory.
+ */
+ @JsonProperty("files")
+ List<FileNode> files;
+
+ /**
+ * List of directory objects of directories available from this directory.
+ */
+ @JsonProperty("directories")
+ List<DirectoryNode> directories;
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java b/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java
new file mode 100644
index 0000000..c007196
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java
@@ -0,0 +1,125 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import java.time.Instant;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Future;
+
+/**
+ * File node in {@code index.json}, also used for storing volatile metadata
+ * like whether a descriptor file is currently being indexed or whether its
+ * link in {@code htdocs/} is marked for deletion.
+ */
+ at JsonPropertyOrder({ "path", "size", "last_modified", "types",
+ "first_published", "last_published", "sha256" })
+class FileNode {
+
+ /**
+ * Relative path of the file.
+ */
+ @JsonProperty("path")
+ String path;
+
+ /**
+ * Size of the file in bytes.
+ */
+ @JsonProperty("size")
+ Long size;
+
+ /**
+ * Timestamp when the file was last modified using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("last_modified")
+ String lastModified;
+
+ /**
+ * Descriptor types as found in {@code @type} annotations of contained
+ * descriptors.
+ */
+ @JsonProperty("types")
+ SortedSet<String> types;
+
+ /**
+ * Earliest publication timestamp of contained descriptors using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("first_published")
+ String firstPublished;
+
+ /**
+ * Latest publication timestamp of contained descriptors using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("last_published")
+ String lastPublished;
+
+ /**
+ * SHA-256 digest of this file.
+ */
+ @JsonProperty("sha256")
+ String sha256;
+
+ /**
+ * Indexer result that will be available as soon as the indexer has completed
+ * its task.
+ */
+ @JsonIgnore
+ Future<FileNode> indexerResult;
+
+ /**
+ * Timestamp when this file was first not found anymore in {@code indexed/},
+ * used to keep the link in {@code htdocs/} around for another 2 hours before
+ * deleting it, too.
+ *
+ * <p>This field is ignored when writing {@code index.json}, because it's an
+ * internal detail that nobody else cares about. The effect is that links
+ * might be around for longer than 2 hours in case of a restart, which seems
+ * acceptable.</p>
+ */
+ @JsonIgnore
+ Instant markedForDeletion;
+
+ /**
+ * Create and return a {@link FileNode} instance with the given values.
+ *
+ * @param path Relative path of the file.
+ * @param size Size of the file in bytes.
+ * @param lastModified Timestamp when the file was last modified using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ * @param types Descriptor types as found in {@code @type} annotations of
+ * contained descriptors.
+ * @param firstPublished Earliest publication timestamp of contained
+ * descriptors using pattern {@code "YYYY-MM-DD HH:MM"} in the UTC
+ * timezone.
+ * @param lastPublished Latest publication timestamp of contained descriptors
+ * using pattern {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ * @param sha256 SHA-256 digest of this file.
+ *
+ * @return {@link FileNode} instance with the given values.
+ */
+ static FileNode of(String path, Long size, String lastModified,
+ Iterable<String> types, String firstPublished, String lastPublished,
+ String sha256) {
+ FileNode fileNode = new FileNode();
+ fileNode.path = path;
+ fileNode.size = size;
+ fileNode.lastModified = lastModified;
+ fileNode.types = new TreeSet<>();
+ for (String type : types) {
+ fileNode.types.add(type);
+ }
+ fileNode.firstPublished = firstPublished;
+ fileNode.lastPublished = lastPublished;
+ fileNode.sha256 = sha256;
+ return fileNode;
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java b/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java
new file mode 100644
index 0000000..8b7a46b
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java
@@ -0,0 +1,30 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+/**
+ * Root node in {@code index.json} containing additional information about index
+ * creation time or Git revision used for creating it.
+ */
+ at JsonPropertyOrder({ "index_created", "build_revision", "path", "files",
+ "directories" })
+class IndexNode extends DirectoryNode {
+
+ /**
+ * Timestamp when this index was created using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("index_created")
+ String indexCreated;
+
+ /**
+ * Git revision of this software.
+ */
+ @JsonProperty("build_revision")
+ String buildRevision;
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java b/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java
new file mode 100644
index 0000000..03c750b
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java
@@ -0,0 +1,225 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import org.torproject.descriptor.BandwidthFile;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.BridgePoolAssignment;
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.DirectoryKeyCertificate;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.Microdescriptor;
+import org.torproject.descriptor.RelayDirectory;
+import org.torproject.descriptor.RelayNetworkStatus;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
+import org.torproject.descriptor.SnowflakeStats;
+import org.torproject.descriptor.TorperfResult;
+import org.torproject.descriptor.UnparseableDescriptor;
+import org.torproject.descriptor.WebServerAccessLog;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Callable;
+
+/**
+ * Callable task that indexes a given descriptor file.
+ */
+class IndexerTask implements Callable<FileNode> {
+
+ /**
+ * Class logger.
+ */
+ private static final Logger logger
+ = LoggerFactory.getLogger(IndexerTask.class);
+
+ /**
+ * Formatter for all timestamps found in {@code index.json}.
+ */
+ private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
+
+ /**
+ * Path to the descriptor file to index.
+ */
+ private Path path;
+
+ /**
+ * Index results object, which starts out empty and gets populated as indexing
+ * proceeds.
+ */
+ private FileNode indexResult;
+
+ /**
+ * Create a new instance to parse the given descriptor file, but don't start
+ * parsing just yet.
+ *
+ * @param path Descriptor file to index.
+ */
+ IndexerTask(Path path) {
+ this.path = path;
+ }
+
+ /**
+ * Index the given file and return index results when done.
+ *
+ * @return Index results.
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ @Override
+ public FileNode call() throws IOException {
+ this.indexResult = new FileNode();
+ this.requestBasicFileAttributes();
+ this.computeFileDigest();
+ this.parseDescriptorFile();
+ return this.indexResult;
+ }
+
+ /**
+ * Request and store basic file attributes like file name, last-modified time,
+ * and size.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void requestBasicFileAttributes() throws IOException {
+ this.indexResult.path = this.path.getFileName().toString();
+ this.indexResult.lastModified = dateTimeFormatter
+ .format(Files.getLastModifiedTime(this.path).toInstant());
+ this.indexResult.size = Files.size(this.path);
+ }
+
+ /**
+ * Compute and store the file's SHA-256 digest.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void computeFileDigest() throws IOException {
+ try (InputStream stream = Files.newInputStream(this.path)) {
+ this.indexResult.sha256
+ = Base64.encodeBase64String(DigestUtils.sha256(stream));
+ }
+ }
+
+ /**
+ * Parse the descriptor file to extract contained descriptor types and first
+ * and last published time.
+ */
+ private void parseDescriptorFile() {
+ Long firstPublishedMillis = null;
+ Long lastPublishedMillis = null;
+ this.indexResult.types = new TreeSet<>();
+ SortedSet<String> unknownDescriptorSubclasses = new TreeSet<>();
+ for (Descriptor descriptor : DescriptorSourceFactory
+ .createDescriptorReader().readDescriptors(this.path.toFile())) {
+ if (descriptor instanceof UnparseableDescriptor) {
+ /* Skip unparseable descriptor. */
+ continue;
+ }
+ for (String annotation : descriptor.getAnnotations()) {
+ if (annotation.startsWith("@type ")) {
+ this.indexResult.types.add(annotation.substring(6));
+ }
+ }
+ Long publishedMillis;
+ if (descriptor instanceof BandwidthFile) {
+ BandwidthFile bandwidthFile = (BandwidthFile) descriptor;
+ LocalDateTime fileCreatedOrTimestamp
+ = bandwidthFile.fileCreated().isPresent()
+ ? bandwidthFile.fileCreated().get()
+ : bandwidthFile.timestamp();
+ publishedMillis = fileCreatedOrTimestamp
+ .toInstant(ZoneOffset.UTC).toEpochMilli();
+ } else if (descriptor instanceof BridgeNetworkStatus) {
+ publishedMillis = ((BridgeNetworkStatus) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof BridgePoolAssignment) {
+ publishedMillis = ((BridgePoolAssignment) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof BridgedbMetrics) {
+ publishedMillis = ((BridgedbMetrics) descriptor)
+ .bridgedbMetricsEnd().toInstant(ZoneOffset.UTC).toEpochMilli();
+ } else if (descriptor instanceof DirectoryKeyCertificate) {
+ publishedMillis = ((DirectoryKeyCertificate) descriptor)
+ .getDirKeyPublishedMillis();
+ } else if (descriptor instanceof ExitList) {
+ publishedMillis = ((ExitList) descriptor)
+ .getDownloadedMillis();
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ publishedMillis = ((ExtraInfoDescriptor) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof Microdescriptor) {
+ /* Microdescriptors don't contain useful timestamps for this purpose,
+ * but we already knew that, so there's no need to log a warning
+ * further down below. */
+ continue;
+ } else if (descriptor instanceof RelayDirectory) {
+ publishedMillis = ((RelayDirectory) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof RelayNetworkStatus) {
+ publishedMillis = ((RelayNetworkStatus) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+ publishedMillis = ((RelayNetworkStatusConsensus) descriptor)
+ .getValidAfterMillis();
+ } else if (descriptor instanceof RelayNetworkStatusVote) {
+ publishedMillis = ((RelayNetworkStatusVote) descriptor)
+ .getValidAfterMillis();
+ } else if (descriptor instanceof ServerDescriptor) {
+ publishedMillis = ((ServerDescriptor) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof SnowflakeStats) {
+ publishedMillis = ((SnowflakeStats) descriptor)
+ .snowflakeStatsEnd().toInstant(ZoneOffset.UTC).toEpochMilli();
+ } else if (descriptor instanceof TorperfResult) {
+ publishedMillis = ((TorperfResult) descriptor)
+ .getStartMillis();
+ } else if (descriptor instanceof WebServerAccessLog) {
+ publishedMillis = ((WebServerAccessLog) descriptor)
+ .getLogDate().atStartOfDay(ZoneOffset.UTC)
+ .toInstant().toEpochMilli();
+ } else {
+ /* Skip published timestamp if descriptor type is unknown or doesn't
+ * contain such a timestamp. */
+ unknownDescriptorSubclasses.add(
+ descriptor.getClass().getSimpleName());
+ continue;
+ }
+ if (null == firstPublishedMillis
+ || publishedMillis < firstPublishedMillis) {
+ firstPublishedMillis = publishedMillis;
+ }
+ if (null == lastPublishedMillis
+ || publishedMillis > lastPublishedMillis) {
+ lastPublishedMillis = publishedMillis;
+ }
+ }
+ if (!unknownDescriptorSubclasses.isEmpty()) {
+ logger.warn("Ran into unknown/unexpected Descriptor subclass(es) in "
+ + "{}: {}. Ignoring for index.json, but maybe worth looking into.",
+ this.path, unknownDescriptorSubclasses);
+ }
+ this.indexResult.firstPublished = null == firstPublishedMillis ? null
+ : dateTimeFormatter.format(Instant.ofEpochMilli(firstPublishedMillis));
+ this.indexResult.lastPublished = null == lastPublishedMillis ? null
+ : dateTimeFormatter.format(Instant.ofEpochMilli(lastPublishedMillis));
+ }
+}
+
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index e7cadf7..65e0f99 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -76,16 +76,11 @@ BridgedbMetricsOffsetMinutes = 340
# The URL of this instance. This will be the base URL
# written to index.json, i.e. please change this to the mirrors url!
InstanceBaseUrl = https://collector.torproject.org
-# The target location for index.json and its compressed
-# versions index.json.gz, index.json.bz2, and index.json.xz
-IndexPath = index
# The top-level directory for archived descriptors.
-ArchivePath = archive
-# The top-level directory for third party data.
-ContribPath = contrib
+IndexedPath = indexed
# The top-level directory for the recent descriptors that were
# published in the last 72 hours.
-RecentPath = recent
+RecentPath = indexed/recent
# The top-level directory for the retrieved descriptors that will
# be archived.
OutputPath = out
@@ -93,6 +88,11 @@ OutputPath = out
StatsPath = stats
# Path for descriptors downloaded from other instances
SyncPath = sync
+# Directory served via an external web server and managed by us which contains
+# (hard) links to files in ArchivePath and RecentPath and which therefore must
+# be located on the same file system. Also contains index.json and its
+# compressed versions index.json.gz, index.json.bz2, and index.json.xz.
+HtdocsPath = htdocs
######## Relay descriptors ########
#
## Define descriptor sources
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index 5802020..695bb24 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -10,7 +10,7 @@
# Configuration section:
# The following path should be adjusted, if the CollecTor server layout differs.
# All paths should be given absolute.
-ARCHIVEDIR="/srv/collector.torproject.org/collector/archive"
+ARCHIVEDIR="/srv/collector.torproject.org/collector/indexed/archive"
WORKDIR="/srv/collector.torproject.org/collector/tarballs"
OUTDIR="/srv/collector.torproject.org/collector/out"
TARBALLTARGETDIR="/srv/collector.torproject.org/collector/data"
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 7e9ea28..887f3ae 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -39,7 +39,7 @@ public class ConfigurationTest {
public void testKeyCount() {
assertEquals("The number of properties keys in enum Key changed."
+ "\n This test class should be adapted.",
- 71, Key.values().length);
+ 70, Key.values().length);
}
@Test()
diff --git a/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java b/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java
new file mode 100644
index 0000000..db00032
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java
@@ -0,0 +1,522 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.Key;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.FileTime;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Test class for {@link CreateIndexJson}.
+ */
+public class CreateIndexJsonTest {
+
+ /**
+ * Mocked indexer task that does not actually index a file but instead waits
+ * for the test class to set index results.
+ */
+ static class MockedIndexerTask extends IndexerTask {
+
+ /**
+ * Index result, to be set by the test.
+ */
+ private FileNode result;
+
+ /**
+ * Create a new mocked indexer task for the given path.
+ *
+ * @param path Path to index.
+ */
+ MockedIndexerTask(Path path) {
+ super(path);
+ }
+
+ /**
+ * Set index results.
+ *
+ * @param result Index results.
+ */
+ synchronized void setResult(FileNode result) {
+ this.result = result;
+ this.notifyAll();
+ }
+
+ /**
+ * Execute the task by waiting for the test to set index results.
+ *
+ * @return Index results provided by the test.
+ */
+ @Override
+ public FileNode call() {
+ synchronized (this) {
+ while (null == result) {
+ try {
+ wait();
+ } catch (InterruptedException e) {
+ /* Don't care about being interrupted, just keep waiting. */
+ }
+ }
+ return this.result;
+ }
+ }
+ }
+
+ /**
+ * List of mocked indexer tasks in the order of creation.
+ */
+ private List<MockedIndexerTask> indexerTasks = new ArrayList<>();
+
+ /**
+ * Testable version of the class under test.
+ */
+ class TestableCreateIndexJson extends CreateIndexJson {
+
+ /**
+ * Create a new instance with the given configuration.
+ *
+ * @param configuration Configuration for this test.
+ */
+ TestableCreateIndexJson(Configuration configuration) {
+ super(configuration);
+ }
+
+ /**
+ * Create an indexer task that doesn't actually index a file but that can
+ * be controlled by the test, and add that task to the list of tasks.
+ *
+ * @param fileToIndex File to index.
+ * @return Created (mocked) indexer task.
+ */
+ @Override
+ protected IndexerTask createIndexerTask(Path fileToIndex) {
+ MockedIndexerTask indexerTask = new MockedIndexerTask(fileToIndex);
+ indexerTasks.add(indexerTask);
+ return indexerTask;
+ }
+
+ /**
+ * Return {@code null} as build revision string to make it easier to compare
+ * written {@code index.json} files in tests.
+ *
+ * @return Always {@code null}.
+ */
+ @Override
+ protected String obtainBuildRevision() {
+ return null;
+ }
+ }
+
+ /**
+ * Temporary folder containing all files for this test.
+ */
+ @Rule
+ public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ /**
+ * Path to recent exit list file in {@code indexed/recent/}.
+ */
+ private Path recentExitListFilePath;
+
+ /**
+ * Path to archived exit list file in {@code indexed/archive/}.
+ */
+ private Path archiveExitListFilePath;
+
+ /**
+ * Path to exit list link in {@code htdocs/recent/}.
+ */
+ private Path recentExitListLinkPath;
+
+ /**
+ * Path to {@code index.json} file in {@code htdocs/index/}.
+ */
+ private Path indexJsonPath;
+
+ /**
+ * Class under test.
+ */
+ private CreateIndexJson cij;
+
+ /**
+ * Prepares the temporary folder and configuration for this test.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ @Before
+ public void prepareDirectoriesAndConfiguration() throws IOException {
+ Path indexedPath = this.temporaryFolder.newFolder("indexed").toPath();
+ this.recentExitListFilePath = indexedPath.resolve(
+ Paths.get("recent", "exit-lists", "2016-09-20-13-02-00"));
+ this.archiveExitListFilePath = indexedPath.resolve(
+ Paths.get("archive", "exit-lists", "exit-list-2016-09.tar.xz"));
+ Path htdocsPath = this.temporaryFolder.newFolder("htdocs").toPath();
+ this.recentExitListLinkPath = htdocsPath.resolve(
+ Paths.get("recent", "exit-lists", "2016-09-20-13-02-00"));
+ this.indexJsonPath = htdocsPath.resolve(
+ Paths.get("index", "index.json"));
+ Configuration configuration = new Configuration();
+ configuration.setProperty(Key.IndexedPath.name(),
+ indexedPath.toAbsolutePath().toString());
+ configuration.setProperty(Key.HtdocsPath.name(),
+ htdocsPath.toAbsolutePath().toString());
+ configuration.setProperty(Key.InstanceBaseUrl.name(),
+ "https://collector.torproject.org");
+ this.cij = new TestableCreateIndexJson(configuration);
+ }
+
+ /**
+ * First execution time.
+ */
+ private static final Instant firstExecution
+ = Instant.parse("2016-09-20T13:04:00Z");
+
+ /**
+ * Second execution time, two minutes after the first execution time, which is
+ * the default rate for executing this module.
+ */
+ private static final Instant secondExecution
+ = Instant.parse("2016-09-20T13:06:00Z");
+
+ /**
+ * Third execution, three hours later than the second execution time, to see
+ * if links to files that have been marked for deletion are actually deleted.
+ */
+ private static final Instant thirdExecution
+ = Instant.parse("2016-09-20T16:06:00Z");
+
+ /**
+ * Index result from indexing recent exit list.
+ */
+ private FileNode recentExitListFileNode = FileNode.of(
+ "2016-09-20-13-02-00", 177_090L, "2016-09-20 13:02",
+ Collections.singletonList("tordnsel 1.0"), "2016-09-20 13:02",
+ "2016-09-20 13:02", "4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw=");
+
+ /**
+ * Index result from indexing archived exit list.
+ */
+ private FileNode archiveExitListFileNode = FileNode.of(
+ "exit-list-2016-09.tar.xz", 1_008_748L, "2016-10-04 03:31",
+ Collections.singletonList("tordnsel 1.0"), "2016-09-01 00:02",
+ "2016-09-30 23:02", "P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=");
+
+ /**
+ * Index result from indexing <i>updated</i> archived exit list.
+ */
+ private FileNode updatedArchiveExitListFileNode = FileNode.of(
+ "exit-list-2016-09.tar.xz", 1_008_748L, "2016-10-07 03:31",
+ Collections.singletonList("tordnsel 1.0"), "2016-09-01 00:02",
+ "2016-09-30 23:02", "P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=");
+
+ /**
+ * Finish the oldest indexer task by providing the given file node as index
+ * result.
+ *
+ * @param fileNode Index result.
+ */
+ private void finishIndexing(FileNode fileNode) {
+ assertFalse(this.indexerTasks.isEmpty());
+ this.indexerTasks.remove(0).setResult(fileNode);
+ }
+
+ /**
+ * (Almost) empty {@code index.json} file.
+ */
+ private static final String emptyIndexJsonString
+ = "{\"index_created\":\"2016-09-20 13:06\","
+ + "\"path\":\"https://collector.torproject.org\"}";
+
+ /**
+ * {@code index.json} file containing a single recent exit list.
+ */
+ private static final String recentExitListIndexJsonString
+ = "{\"index_created\":\"2016-09-20 13:06\","
+ + "\"path\":\"https://collector.torproject.org\",\"directories\":[{"
+ + "\"path\":\"recent\",\"directories\":[{"
+ + "\"path\":\"exit-lists\",\"files\":[{"
+ + "\"path\":\"2016-09-20-13-02-00\",\"size\":177090,"
+ + "\"last_modified\":\"2016-09-20 13:02\","
+ + "\"types\":[\"tordnsel 1.0\"],"
+ + "\"first_published\":\"2016-09-20 13:02\","
+ + "\"last_published\":\"2016-09-20 13:02\","
+ + "\"sha256\":\"4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw=\"}]}]}]}";
+
+ /**
+ * {@code index.json} file containing a single archived exit list with a
+ * placeholder for the last-modified time.
+ */
+ private static final String archiveExitListIndexJsonString
+ = "{\"index_created\":\"2016-09-20 13:06\","
+ + "\"path\":\"https://collector.torproject.org\",\"directories\":[{"
+ + "\"path\":\"archive\",\"directories\":[{"
+ + "\"path\":\"exit-lists\",\"files\":[{"
+ + "\"path\":\"exit-list-2016-09.tar.xz\",\"size\":1008748,"
+ + "\"last_modified\":\"%s\","
+ + "\"types\":[\"tordnsel 1.0\"],"
+ + "\"first_published\":\"2016-09-01 00:02\","
+ + "\"last_published\":\"2016-09-30 23:02\","
+ + "\"sha256\":\"P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=\"}]}]}]}";
+
+ /**
+ * Delete the given file.
+ *
+ * @param fileToDelete Path to file to delete.
+ */
+ private static void deleteFile(Path fileToDelete) {
+ try {
+ Files.delete(fileToDelete);
+ } catch (IOException e) {
+ fail(String.format("I/O error while deleting %s.", fileToDelete));
+ }
+ }
+
+ /**
+ * Create the given file.
+ *
+ * @param fileToCreate Path to file to create.
+ * @param lastModified Last-modified time of file to create.
+ */
+ private static void createFile(Path fileToCreate, Instant lastModified) {
+ try {
+ Files.createDirectories(fileToCreate.getParent());
+ Files.createFile(fileToCreate);
+ Files.setLastModifiedTime(fileToCreate, FileTime.from(lastModified));
+ } catch (IOException e) {
+ fail(String.format("I/O error while creating %s.", fileToCreate));
+ }
+ }
+
+ /**
+ * Return whether the given file exists.
+ *
+ * @param fileToCheck Path to file to check.
+ * @return Whether the file exists.
+ */
+ private boolean fileExists(Path fileToCheck) {
+ return Files.exists(fileToCheck);
+ }
+
+ /**
+ * Change last-modified time of the given file.
+ *
+ * @param fileToChange File to change.
+ * @param lastModified New last-modified time.
+ */
+ private void changeLastModified(Path fileToChange, Instant lastModified) {
+ try {
+ Files.setLastModifiedTime(fileToChange, FileTime.from(lastModified));
+ } catch (IOException e) {
+ fail(String.format("I/O error while changing last-modified time of %s.",
+ fileToChange));
+ }
+ }
+
+ /**
+ * Write the given string to the {@code index.json} file.
+ *
+ * @param indexJsonString String to write.
+ * @param formatArguments Optional format arguments.
+ */
+ private void writeIndexJson(String indexJsonString,
+ Object ... formatArguments) {
+ try {
+ Files.createDirectories(indexJsonPath.getParent());
+ Files.write(indexJsonPath,
+ String.format(indexJsonString, formatArguments).getBytes());
+ } catch (IOException e) {
+ fail("I/O error while writing index.json file.");
+ }
+ }
+
+ /**
+ * Read and return the first line from the {@code index.json} file.
+ *
+ * @return First line from the {@code index.json} file.
+ */
+ private String readIndexJson() {
+ try {
+ return Files.readAllLines(indexJsonPath).get(0);
+ } catch (IOException e) {
+ fail("I/O error while reading index.json file.");
+ return null;
+ }
+ }
+
+ /**
+ * Run the module with the given system time.
+ *
+ * @param now Time when running the module.
+ */
+ private void startProcessing(Instant now) {
+ this.cij.startProcessing(now);
+ }
+
+ /**
+ * Test whether two executions on an empty {@code indexed/} directory produce
+ * an {@code index.json} file without any files or directories.
+ */
+ @Test
+ public void testEmptyDirs() {
+ startProcessing(firstExecution);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether a new exit list in {@code indexed/recent/} gets indexed and
+ * then included in {@code index.json}.
+ */
+ @Test
+ public void testNewRecentExitList() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ startProcessing(firstExecution);
+ finishIndexing(this.recentExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(recentExitListIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether an existing exit list in {@code indexed/recent/} that is
+ * already contained in {@code index.json} gets ignored by the indexers.
+ */
+ @Test
+ public void testExistingRecentExitList() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ writeIndexJson(recentExitListIndexJsonString);
+ startProcessing(firstExecution);
+ startProcessing(secondExecution);
+ assertEquals(recentExitListIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether a deleted exit list in {@code indexed/recent/} is first
+ * removed from {@code index.json} and later deleted from
+ * {@code htdocs/recent/}.
+ */
+ @Test
+ public void testDeletedRecentExitList() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ writeIndexJson(recentExitListIndexJsonString);
+ startProcessing(firstExecution);
+ assertTrue(fileExists(recentExitListLinkPath));
+ deleteFile(recentExitListFilePath);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ fileExists(recentExitListLinkPath);
+ assertTrue(fileExists(recentExitListLinkPath));
+ startProcessing(thirdExecution);
+ assertFalse(fileExists(recentExitListLinkPath));
+ }
+
+ /**
+ * Test whether a link in {@code htdocs/recent/} for which no corresponding
+ * file in {@code indexed/recent/} exists is eventually deleted.
+ */
+ @Test
+ public void testDeletedLink() {
+ createFile(recentExitListLinkPath, Instant.parse("2016-09-20T13:02:00Z"));
+ startProcessing(firstExecution);
+ assertTrue(Files.exists(recentExitListLinkPath));
+ startProcessing(secondExecution);
+ assertTrue(Files.exists(recentExitListLinkPath));
+ startProcessing(thirdExecution);
+ assertFalse(Files.exists(recentExitListLinkPath));
+ }
+
+ /**
+ * Test whether a tarball that gets deleted while being indexed is not
+ * included in {@code index.json} even after indexing is completed.
+ */
+ @Test
+ public void testIndexingDisappearingTarball() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ startProcessing(firstExecution);
+ deleteFile(recentExitListFilePath);
+ finishIndexing(recentExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether a tarball that gets updated in {@code indexed/archive/} gets
+ * re-indexed and updated in {@code index.json}.
+ */
+ @Test
+ public void testUpdatedFile() {
+ writeIndexJson(archiveExitListIndexJsonString, "2016-10-04 03:31");
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-07T03:31:00Z"));
+ startProcessing(firstExecution);
+ finishIndexing(updatedArchiveExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(String.format(archiveExitListIndexJsonString,
+ "2016-10-07 03:31"), readIndexJson());
+ }
+
+ /**
+ * Test whether a tarball that gets updated while being indexed is not
+ * included in {@code index.json} even after indexing is completed.
+ */
+ @Test
+ public void testUpdateFileWhileIndexing() {
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-07T03:31:00Z"));
+ startProcessing(firstExecution);
+ changeLastModified(archiveExitListFilePath,
+ Instant.parse("2016-10-07T03:31:00Z"));
+ finishIndexing(archiveExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(String.format(archiveExitListIndexJsonString,
+ "2016-10-04 03:31"), readIndexJson());
+ }
+
+ /**
+ * Test whether a tarball that gets updated after being indexed but before
+ * being included in {@code index.json} is not being updated in
+ * {@code index.json} until the updated file is being indexed. */
+ @Test
+ public void testUpdateFileAfterIndexing() {
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-04T03:31:00Z"));
+ startProcessing(firstExecution);
+ finishIndexing(archiveExitListFileNode);
+ changeLastModified(archiveExitListFilePath,
+ Instant.parse("2016-10-07T03:31:00Z"));
+ startProcessing(secondExecution);
+ assertEquals(String.format(archiveExitListIndexJsonString,
+ "2016-10-04 03:31"), readIndexJson());
+ }
+
+ /**
+ * Test whether a long-running indexer task is being given the time to finish,
+ * rather than starting another task for the same file.
+ */
+ @Test
+ public void testLongRunningIndexerTask() {
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-04T03:31:00Z"));
+ startProcessing(firstExecution);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ finishIndexing(archiveExitListFileNode);
+ startProcessing(thirdExecution);
+ assertTrue(this.indexerTasks.isEmpty());
+ }
+}
+
diff --git a/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java b/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java
new file mode 100644
index 0000000..8e5e6f4
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java
@@ -0,0 +1,226 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.FileTime;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * Test class for {@link IndexerTask}.
+ */
+ at RunWith(Parameterized.class)
+public class IndexerTaskTest {
+
+ @Parameterized.Parameter
+ public String path;
+
+ @Parameterized.Parameter(1)
+ public Long size;
+
+ @Parameterized.Parameter(2)
+ public String lastModified;
+
+ @Parameterized.Parameter(3)
+ public String[] types;
+
+ @Parameterized.Parameter(4)
+ public String firstPublished;
+
+ @Parameterized.Parameter(5)
+ public String lastPublished;
+
+ @Parameterized.Parameter(6)
+ public String sha256;
+
+ /**
+ * Initialize test parameters.
+ *
+ * @return Test parameters.
+ */
+ @Parameterized.Parameters
+ public static Collection<Object[]> pathFilename() {
+ return Arrays.asList(new Object[][]{
+
+ {"2016-09-20-13-00-00-consensus", /* Path in src/test/resources/ */
+ 1_618_103L, /* Size in bytes */
+ "2017-09-07 12:13", /* Last-modified time */
+ new String[] { "network-status-consensus-3 1.17" }, /* Types */
+ "2016-09-20 13:00", /* First published */
+ "2016-09-20 13:00", /* Last published */
+ "3mLpDZmP/NSgOgmuPDyljxh0Lup1L6FtD16266ZCGAw="}, /* SHA-256 */
+
+ {"2016-09-20-13-00-00-vote-49015F787433103580E3B66A1707A00E60F2D15B-"
+ + "60ADC6BEC262AE921A1037D54C8A3976367DBE87",
+ 3_882_514L,
+ "2017-09-07 12:13",
+ new String[] { "network-status-vote-3 1.17" },
+ "2016-09-20 13:00",
+ "2016-09-20 13:00",
+ "UCnSSrvdm26dJOriFgEQNQVrBLpVKbH/fF0VPRX3TGc="},
+
+ {"2016-09-20-13-02-00",
+ 177_090L,
+ "2017-01-13 16:55",
+ new String[] { "tordnsel 1.0" },
+ "2016-09-20 13:02",
+ "2016-09-20 13:02",
+ "4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw="},
+
+ {"2016-10-01-16-00-00-vote-0232AF901C31A04EE9848595AF9BB7620D4C5B2E-"
+ + "FEE63B4AB7CE5A6BDD09E9A5C4F01BD61EB7E4F1",
+ 3_226_152L,
+ "2017-01-13 16:55",
+ new String[] { "network-status-vote-3 1.0" },
+ "2016-10-01 16:00",
+ "2016-10-01 16:00",
+ "bilv6zEXr0Y9f5o24RMN0lUujsJJiSQAn9LkG0XJrZE="},
+
+ {"2016-10-02-17-00-00-consensus-microdesc",
+ 1_431_627L,
+ "2017-09-07 12:13",
+ new String[] { "network-status-microdesc-consensus-3 1.17" },
+ "2016-10-02 17:00",
+ "2016-10-02 17:00",
+ "rrkxuLahYENLExX99Jio587/kUz9NtOoaYyKXxvX5EA="},
+
+ {"20160920-063816-1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1",
+ 339_256L,
+ "2017-09-07 12:13",
+ new String[] { "bridge-network-status 1.17" },
+ "2016-09-20 06:38",
+ "2016-09-20 06:38",
+ "sMAcyFrZ2rxj50b6iGe3icCNMC4gBSA1y9ZH4EWTa8s="},
+
+ {"bridge-2016-10-02-08-09-00-extra-infos",
+ 11_561L,
+ "2017-09-07 12:13",
+ new String[] { "bridge-extra-info 1.3" },
+ "2016-10-02 06:09",
+ "2016-10-02 06:09",
+ "hat+vbyE04eH9JBQa0s6ezB6sLaStUUhvUj8CZ1aoEY="},
+
+ {"bridge-2016-10-02-16-09-00-server-descriptors",
+ 5_336L,
+ "2017-01-13 16:55",
+ new String[] { "bridge-server-descriptor 1.2" },
+ "2016-10-02 14:09",
+ "2016-10-02 14:09",
+ "6CtHdo+eRFOi5xBjJcOVszC1hibC5gTB+YWvn1VmIIc="},
+
+ {"moria-1048576-2016-10-05.tpf",
+ 20_405L,
+ "2017-09-07 12:13",
+ new String[0],
+ null,
+ null,
+ "DZyk6c0lQQ7OVZo1cmA+SuxPA+1thmuiooVifQPPOiA="},
+
+ {"op-nl-1048576-2017-04-11.tpf",
+ 4_220L,
+ "2017-09-20 12:14",
+ new String[] { "torperf 1.1" },
+ "2017-04-11 06:24",
+ "2017-04-11 15:54",
+ "Gwex5yN3+s2PrhekjA68XmPg+UorOfx7mUa4prd7Dt8="},
+
+ {"relay-2016-10-02-08-05-00-extra-infos",
+ 20_541L,
+ "2017-01-13 16:55",
+ new String[] { "extra-info 1.0" },
+ "2016-10-02 07:01",
+ "2016-10-02 07:01",
+ "3ZSO3+9ed9OwMVPx2LcVIiJfC+O30eEXEdbz64Hrp0w="},
+
+ {"relay-2016-10-02-16-05-00-server-descriptors",
+ 17_404L,
+ "2017-01-13 16:55",
+ new String[] { "server-descriptor 1.0" },
+ "2016-10-02 14:58",
+ "2016-10-02 15:01",
+ "uWKHHzq4+oVNdOGh0mfkLUSjwGrBlLtEtN2DtF5qcLU="},
+
+ {"siv-1048576-2016-10-03.tpf",
+ 39_193L,
+ "2017-01-13 16:55",
+ new String[] { "torperf 1.0" },
+ "2016-10-03 00:02",
+ "2016-10-03 23:32",
+ "paaFPI6BVuIDQ32aIuHYNCuKmBvFxsDvVCCwp+oM0GE="},
+
+ {"torperf-51200-2016-10-02.tpf",
+ 233_763L,
+ "2017-01-13 16:55",
+ new String[] { "torperf 1.0" },
+ "2016-10-02 00:00",
+ "2016-10-02 23:55",
+ "fqeVAXamvB4yQ/8UlZAxhJx0+1Y7IfipqIpOUqQ57rE="}
+ });
+ }
+
+ /**
+ * Formatter for all timestamps found in {@code index.json}.
+ */
+ private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
+
+ /**
+ * Temporary folder containing all files for this test.
+ */
+ @Rule
+ public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ /**
+ * Test indexing a file.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ @Test
+ public void testIndexFile() throws IOException {
+ Path indexedDirectory = this.temporaryFolder.newFolder().toPath();
+ Path temporaryFile = indexedDirectory.resolve(this.path);
+ try (InputStream is = getClass()
+ .getClassLoader().getResourceAsStream(this.path)) {
+ if (null == is) {
+ fail(String.format("Unable to read test resource %s.", this.path));
+ return;
+ }
+ Files.copy(is, temporaryFile);
+ }
+ Files.setLastModifiedTime(temporaryFile,
+ FileTime.from(LocalDateTime.parse(this.lastModified, dateTimeFormatter)
+ .toInstant(ZoneOffset.UTC)));
+ assertTrue(Files.exists(temporaryFile));
+ IndexerTask indexerTask = new IndexerTask(temporaryFile);
+ FileNode indexResult = indexerTask.call();
+ assertEquals(this.path, indexResult.path);
+ assertEquals(this.size, indexResult.size);
+ assertEquals(this.lastModified, indexResult.lastModified);
+ SortedSet<String> expectedTypes = new TreeSet<>(Arrays.asList(this.types));
+ assertEquals(expectedTypes, indexResult.types);
+ assertEquals(this.firstPublished, indexResult.firstPublished);
+ assertEquals(this.lastPublished, indexResult.lastPublished);
+ assertEquals(this.sha256, indexResult.sha256);
+ }
+}
+
More information about the tor-commits
mailing list