[tor-commits] [exonerator/master] Use metrics-lib for parsing descriptors.
karsten at torproject.org
karsten at torproject.org
Wed Nov 2 12:53:14 UTC 2016
commit 7b2c08bb7724614964ad5f318cc7016f558e3849
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Tue Nov 1 16:09:22 2016 +0100
Use metrics-lib for parsing descriptors.
We're using metrics-lib for downloading descriptors from CollecTor,
but we're still using our own parsing code. Let's avoid duplicating
code by using what metrics-lib provides.
---
build.xml | 2 +
.../exonerator/ExoneraTorDatabaseImporter.java | 291 +++++----------------
2 files changed, 66 insertions(+), 227 deletions(-)
diff --git a/build.xml b/build.xml
index 42c0ee0..1b8fecd 100644
--- a/build.xml
+++ b/build.xml
@@ -24,6 +24,8 @@
<include name="logback-core-1.1.2.jar" />
<include name="logback-classic-1.1.2.jar" />
<include name="slf4j-api-1.7.7.jar"/>
+ <include name="commons-compress-1.9.jar"/>
+ <include name="xz-1.5.jar"/>
</fileset>
</path>
<path id="checkstyle.classpath" >
diff --git a/src/main/java/org/torproject/exonerator/ExoneraTorDatabaseImporter.java b/src/main/java/org/torproject/exonerator/ExoneraTorDatabaseImporter.java
index 3777908..68bc8cc 100644
--- a/src/main/java/org/torproject/exonerator/ExoneraTorDatabaseImporter.java
+++ b/src/main/java/org/torproject/exonerator/ExoneraTorDatabaseImporter.java
@@ -3,37 +3,39 @@
package org.torproject.exonerator;
+import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.DescriptorCollector;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitList.Entry;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
-import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
-import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
import java.io.File;
-import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
-import java.io.StringReader;
import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.sql.Types;
-import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
-import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import java.util.Stack;
+import java.util.SortedMap;
import java.util.TimeZone;
+import java.util.TreeMap;
/* Import Tor descriptors into the ExoneraTor database. */
public class ExoneraTorDatabaseImporter {
@@ -159,11 +161,11 @@ public class ExoneraTorDatabaseImporter {
/* Last and next parse histories containing paths of parsed files and
* last modified times. */
- private static Map<String, Long>
- lastImportHistory = new HashMap<String, Long>();
+ private static SortedMap<String, Long>
+ lastImportHistory = new TreeMap<String, Long>();
- private static Map<String, Long>
- nextImportHistory = new HashMap<String, Long>();
+ private static SortedMap<String, Long>
+ nextImportHistory = new TreeMap<String, Long>();
/* Read stats/exonerator-import-history file from disk and remember
* locally when files were last parsed. */
@@ -201,103 +203,26 @@ public class ExoneraTorDatabaseImporter {
/* Parse descriptors in the import directory and its subdirectories. */
private static void parseDescriptors() {
- File file = new File(importDirString);
- if (!file.exists()) {
- System.out.println("File or directory " + importDirString + " does "
- + "not exist. Exiting.");
- return;
- }
- Stack<File> files = new Stack<File>();
- files.add(file);
- while (!files.isEmpty()) {
- file = files.pop();
- if (file.isDirectory()) {
- for (File f : file.listFiles()) {
- files.add(f);
- }
- } else {
- parseFile(file);
- }
- }
- }
-
- /* Import a file if it wasn't imported before, and add it to the import
- * history for the next execution. */
- private static void parseFile(File file) {
- long lastModified = file.lastModified();
- String filename = file.getName();
- nextImportHistory.put(filename, lastModified);
- if (!lastImportHistory.containsKey(filename)
- || lastImportHistory.get(filename) < lastModified) {
- try {
- FileInputStream fis = new FileInputStream(file);
- BufferedInputStream bis = new BufferedInputStream(fis);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] bytes = new byte[1024];
- while ((len = bis.read(bytes, 0, 1024)) >= 0) {
- baos.write(bytes, 0, len);
- }
- bis.close();
- byte[] allBytes = baos.toByteArray();
- splitFile(file, allBytes);
- } catch (IOException e) {
- System.out.println("Could not read '" + file + "' to memory. "
- + "Skipping.");
- nextImportHistory.remove(filename);
- }
- }
- }
-
- /* Detect what descriptor type is contained in a file and split it to
- * parse the single descriptors. */
- private static void splitFile(File file, byte[] bytes) {
- try {
- String asciiString = new String(bytes, "US-ASCII");
- BufferedReader br = new BufferedReader(new StringReader(
- asciiString));
- String line = br.readLine();
- while (line != null && line.startsWith("@")) {
- line = br.readLine();
- }
- if (line == null) {
- return;
- }
- br.close();
- String startToken = null;
- if (line.equals("network-status-version 3")) {
- startToken = "network-status-version 3";
- } else if (line.startsWith("Downloaded ")
- || line.startsWith("ExitNode ")) {
- startToken = "ExitNode ";
- } else {
- System.out.println("Unknown descriptor type in file '" + file
- + "'. Ignoring.");
- return;
- }
- String splitToken = "\n" + startToken;
- int length = bytes.length;
- int start = asciiString.indexOf(startToken);
- while (start < length) {
- int end = asciiString.indexOf(splitToken, start);
- if (end < 0) {
- end = length;
- } else {
- end += 1;
- }
- byte[] descBytes = new byte[end - start];
- System.arraycopy(bytes, start, descBytes, 0, end - start);
- if (startToken.equals("network-status-version 3")) {
- parseConsensus(file, descBytes);
- } else if (startToken.equals("ExitNode ")) {
- parseExitList(file, descBytes);
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(new File(importDirString));
+ descriptorReader.setMaxDescriptorFilesInQueue(20);
+ descriptorReader.setExcludedFiles(lastImportHistory);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof RelayNetworkStatusConsensus) {
+ parseConsensus((RelayNetworkStatusConsensus) descriptor);
+ } else if (descriptor instanceof ExitList) {
+ parseExitList((ExitList) descriptor);
}
- start = end;
}
- } catch (IOException e) {
- System.out.println("Could not parse descriptor '" + file + "'. "
- + "Skipping.");
}
+ nextImportHistory.putAll(
+ descriptorReader.getExcludedFiles());
+ nextImportHistory.putAll(descriptorReader.getParsedFiles());
}
/* Date format to parse UTC timestamps. */
@@ -309,72 +234,20 @@ public class ExoneraTorDatabaseImporter {
}
/* Parse a consensus. */
- private static void parseConsensus(File file, byte[] bytes) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- bytes, "US-ASCII")));
- String line;
- String fingerprint = null;
- String descriptor = null;
- Set<String> orAddresses = new HashSet<String>();
- long validAfterMillis = -1L;
- StringBuilder rawStatusentryBuilder = null;
- boolean isRunning = false;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("vote-status ")
- && !line.equals("vote-status consensus")) {
- System.out.println("File '" + file + "' contains network "
- + "status *votes*, not network status *consensuses*. "
- + "Skipping.");
- return;
- } else if (line.startsWith("valid-after ")) {
- String validAfterTime = line.substring("valid-after ".length());
- try {
- validAfterMillis = parseFormat.parse(validAfterTime)
- .getTime();
- } catch (ParseException e) {
- System.out.println("Could not parse valid-after timestamp in "
- + "'" + file + "'. Skipping.");
- return;
- }
- } else if (line.startsWith("r ")
- || line.equals("directory-footer")) {
- if (isRunning) {
- byte[] rawStatusentry = rawStatusentryBuilder.toString()
- .getBytes();
- importStatusentry(validAfterMillis, fingerprint, descriptor,
- orAddresses, rawStatusentry);
- orAddresses = new HashSet<String>();
- }
- if (line.equals("directory-footer")) {
- return;
- }
- rawStatusentryBuilder = new StringBuilder(line + "\n");
- String[] parts = line.split(" ");
- if (parts.length < 9) {
- System.out.println("Could not parse r line '" + line
- + "'. Skipping.");
- return;
- }
- fingerprint = Hex.encodeHexString(Base64.decodeBase64(parts[2]
- + "=")).toLowerCase();
- descriptor = Hex.encodeHexString(Base64.decodeBase64(parts[3]
- + "=")).toLowerCase();
- orAddresses.add(parts[6]);
- } else if (line.startsWith("a ")) {
- rawStatusentryBuilder.append(line + "\n");
- orAddresses.add(line.substring("a ".length(),
- line.lastIndexOf(":")));
- } else if (line.startsWith("s ") || line.equals("s")) {
- rawStatusentryBuilder.append(line + "\n");
- isRunning = line.contains(" Running");
- } else if (rawStatusentryBuilder != null) {
- rawStatusentryBuilder.append(line + "\n");
+ private static void parseConsensus(RelayNetworkStatusConsensus consensus) {
+ for (NetworkStatusEntry entry : consensus.getStatusEntries().values()) {
+ if (entry.getFlags().contains("Running")) {
+ Set<String> orAddresses = new HashSet<String>();
+ orAddresses.add(entry.getAddress());
+ for (String orAddressAndPort : entry.getOrAddresses()) {
+ orAddresses.add(orAddressAndPort.substring(0,
+ orAddressAndPort.lastIndexOf(":")));
}
+ importStatusentry(consensus.getValidAfterMillis(),
+ entry.getFingerprint().toLowerCase(),
+ entry.getDescriptor().toLowerCase(),
+ orAddresses, entry.getStatusEntryBytes());
}
- } catch (IOException e) {
- System.out.println("Could not parse consensus. Skipping.");
- return;
}
}
@@ -453,65 +326,29 @@ public class ExoneraTorDatabaseImporter {
}
}
+ private static final byte[] IGNORED_RAW_EXITLIST_ENTRY = new byte[0];
+
/* Parse an exit list. */
- private static void parseExitList(File file, byte[] bytes) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- bytes, "US-ASCII")));
- String fingerprint = null;
- Set<String> exitAddressLines = new HashSet<String>();
- StringBuilder rawExitlistentryBuilder = new StringBuilder();
- while (true) {
- String line = br.readLine();
- if ((line == null || line.startsWith("ExitNode "))
- && fingerprint != null) {
- for (String exitAddressLine : exitAddressLines) {
- String[] parts = exitAddressLine.split(" ");
- String exitAddress = parts[1];
- /* TODO Extend the following code for IPv6 once the exit list
- * format supports it. */
- String[] exitAddressParts = exitAddress.split("\\.");
- byte[] exitAddress24Bytes = new byte[3];
- exitAddress24Bytes[0] = (byte) Integer.parseInt(
- exitAddressParts[0]);
- exitAddress24Bytes[1] = (byte) Integer.parseInt(
- exitAddressParts[1]);
- exitAddress24Bytes[2] = (byte) Integer.parseInt(
- exitAddressParts[2]);
- String exitAddress24 = Hex.encodeHexString(
- exitAddress24Bytes);
- String scannedTime = parts[2] + " " + parts[3];
- long scannedMillis = -1L;
- try {
- scannedMillis = parseFormat.parse(scannedTime).getTime();
- } catch (ParseException e) {
- System.out.println("Could not parse timestamp in "
- + "'" + file + "'. Skipping.");
- return;
- }
- byte[] rawExitlistentry = rawExitlistentryBuilder.toString()
- .getBytes();
- importExitlistentry(fingerprint, exitAddress24, exitAddress,
- scannedMillis, rawExitlistentry);
- }
- exitAddressLines.clear();
- rawExitlistentryBuilder = new StringBuilder();
- }
- if (line == null) {
- break;
- }
- rawExitlistentryBuilder.append(line + "\n");
- if (line.startsWith("ExitNode ")) {
- fingerprint = line.substring("ExitNode ".length())
- .toLowerCase();
- } else if (line.startsWith("ExitAddress ")) {
- exitAddressLines.add(line);
- }
+ private static void parseExitList(ExitList exitList) {
+ for (Entry entry : exitList.getEntries()) {
+ for (Map.Entry<String, Long> e : entry.getExitAddresses().entrySet()) {
+ String exitAddress = e.getKey();
+ /* TODO Extend the following code for IPv6 once the exit list
+ * format supports it. */
+ String[] exitAddressParts = exitAddress.split("\\.");
+ byte[] exitAddress24Bytes = new byte[3];
+ exitAddress24Bytes[0] = (byte) Integer.parseInt(
+ exitAddressParts[0]);
+ exitAddress24Bytes[1] = (byte) Integer.parseInt(
+ exitAddressParts[1]);
+ exitAddress24Bytes[2] = (byte) Integer.parseInt(
+ exitAddressParts[2]);
+ String exitAddress24 = Hex.encodeHexString(
+ exitAddress24Bytes);
+ long scannedMillis = e.getValue();
+ importExitlistentry(entry.getFingerprint().toLowerCase(), exitAddress24,
+ exitAddress, scannedMillis, IGNORED_RAW_EXITLIST_ENTRY);
}
- br.close();
- } catch (IOException e) {
- System.out.println("Could not parse exit list. Skipping.");
- return;
}
}
More information about the tor-commits
mailing list