[tor-commits] [metrics-db/master] Sanitize bridge pool assignments.
karsten at torproject.org
karsten at torproject.org
Mon Mar 14 09:55:36 UTC 2011
commit 01d8d919512d8dfe3df3440e5dec0bccb2942baa
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Mar 14 10:54:59 2011 +0100
Sanitize bridge pool assignments.
---
config.template | 11 ++
.../ernie/db/BridgePoolAssignmentsProcessor.java | 128 ++++++++++++++++++++
src/org/torproject/ernie/db/Configuration.java | 23 +++-
src/org/torproject/ernie/db/Main.java | 7 +
4 files changed, 167 insertions(+), 2 deletions(-)
diff --git a/config.template b/config.template
index 0d6743e..47a8c8d 100644
--- a/config.template
+++ b/config.template
@@ -44,6 +44,13 @@
## Download exit list and store it to disk
#DownloadExitList 0
#
+## Process bridge pool assignment files by sanitizing bridge fingerprints
+## and sorting sanitized files into subdirectories
+#ProcessBridgePoolAssignments 0
+#
+## Relative path to directory to read bridge pool assignment files from
+#AssignmentsDirectory assignments/
+#
#### Data sinks ####
#
## Write directory archives to disk
@@ -66,4 +73,8 @@
#
## Relative path to directory to write sanitized bridges to
#SanitizedBridgesWriteDirectory sanitized-bridges/
+#
+## Relative path to directory to write sanitized bridge pool assignment
+## files to
+#SanitizedAssignmentsDirectory sanitized-assignments/
diff --git a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
new file mode 100644
index 0000000..583d36e
--- /dev/null
+++ b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
@@ -0,0 +1,128 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db;
+
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+import org.apache.commons.codec.*;
+import org.apache.commons.codec.binary.*;
+import org.apache.commons.codec.digest.*;
+
+public class BridgePoolAssignmentsProcessor {
+
+ public BridgePoolAssignmentsProcessor(File assignmentsDirectory,
+ File sanitizedAssignmentsDirectory) {
+
+ Logger logger =
+ Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
+ if (assignmentsDirectory == null ||
+ sanitizedAssignmentsDirectory == null) {
+ IllegalArgumentException e = new IllegalArgumentException("Neither "
+ + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
+ + "be null!");
+ throw e;
+ }
+
+ List<File> assignmentFiles = new ArrayList<File>();
+ Stack<File> files = new Stack<File>();
+ files.add(assignmentsDirectory);
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else {
+ assignmentFiles.add(file);
+ }
+ }
+
+ SimpleDateFormat assignmentFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat filenameFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (File assignmentFile : assignmentFiles) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ assignmentFile));
+ String line, bridgePoolAssignmentLine = null;
+ SortedSet<String> sanitizedAssignments = new TreeSet<String>();
+ boolean wroteLastLine = false;
+ while ((line = br.readLine()) != null || !wroteLastLine) {
+ if (line == null ||
+ line.startsWith("bridge-pool-assignment ")) {
+ if (bridgePoolAssignmentLine != null) {
+ try {
+ long bridgePoolAssignmentTime = assignmentFormat.parse(
+ bridgePoolAssignmentLine.substring(
+ "bridge-pool-assignment ".length())).getTime();
+ File sanitizedAssignmentsFile = new File(
+ sanitizedAssignmentsDirectory, filenameFormat.format(
+ bridgePoolAssignmentTime));
+ if (!sanitizedAssignmentsFile.exists()) {
+ sanitizedAssignmentsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ sanitizedAssignmentsFile));
+ bw.write(bridgePoolAssignmentLine + "\n");
+ for (String assignmentLine : sanitizedAssignments) {
+ bw.write(assignmentLine + "\n");
+ }
+ bw.close();
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ }
+ sanitizedAssignments.clear();
+ }
+ if (line == null) {
+ wroteLastLine = true;
+ } else {
+ bridgePoolAssignmentLine = line;
+ }
+ } else {
+ String[] parts = line.split(" ");
+ if (parts.length < 2 || parts[0].length() < 40) {
+ logger.warning("Unrecognized line '" + line
+ + "'. Skipping.");
+ continue;
+ }
+ String hashedFingerprint = null;
+ try {
+ hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
+ line.split(" ")[0].toCharArray())).toLowerCase();
+ } catch (DecoderException e) {
+ logger.warning("Unable to decode hex fingerprint in line '"
+ + line + "'. Skipping.");
+ continue;
+ }
+ String assignmentDetails = line.substring(40);
+ sanitizedAssignments.add(hashedFingerprint
+ + assignmentDetails);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read bridge pool assignment "
+ + "file '" + assignmentFile.getAbsolutePath()
+ + "'. Skipping.", e);
+ }
+ }
+
+ logger.info("Finished processing bridge pool assignment file(s).");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index c1cdbea..ae24175 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -37,6 +37,9 @@ public class Configuration {
+ "~gettor/gettor_stats.txt";
private String getTorDirectory = "gettor/";
private boolean downloadExitList = false;
+ private boolean processBridgePoolAssignments = false;
+ private String assignmentsDirectory = "assignments/";
+ private String sanitizedAssignmentsDirectory = "sanitized-assignments/";
public Configuration() {
/* Initialize logger. */
@@ -127,6 +130,13 @@ public class Configuration {
} else if (line.startsWith("DownloadExitList")) {
this.downloadExitList = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ProcessBridgePoolAssignments")) {
+ this.processBridgePoolAssignments = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("AssignmentsDirectory")) {
+ this.assignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
+ this.sanitizedAssignmentsDirectory = line.split(" ")[1];
} else {
logger.severe("Configuration file contains unrecognized "
+ "configuration key in line '" + line + "'! Exiting!");
@@ -156,8 +166,8 @@ public class Configuration {
if (!this.importCachedRelayDescriptors &&
!this.importDirectoryArchives && !this.downloadRelayDescriptors &&
!this.importBridgeSnapshots && !this.downloadGetTorStats &&
- !this.downloadExitList && !this.writeDirectoryArchives &&
- !this.writeSanitizedBridges) {
+ !this.downloadExitList && !this.processBridgePoolAssignments &&
+ !this.writeDirectoryArchives && !this.writeSanitizedBridges) {
logger.warning("We have not been configured to read data from any "
+ "data source or write data to any data sink. You need to "
+ "edit your config file (" + configFile.getAbsolutePath()
@@ -246,5 +256,14 @@ public class Configuration {
public boolean getDownloadExitList() {
return this.downloadExitList;
}
+ public boolean getProcessBridgePoolAssignments() {
+ return processBridgePoolAssignments;
+ }
+ public String getAssignmentsDirectory() {
+ return assignmentsDirectory;
+ }
+ public String getSanitizedAssignmentsDirectory() {
+ return sanitizedAssignmentsDirectory;
+ }
}
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index 657cdfc..50c06bb 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -128,6 +128,13 @@ public class Main {
new ExitListDownloader();
}
+ // Process bridge pool assignments
+ if (config.getProcessBridgePoolAssignments()) {
+ new BridgePoolAssignmentsProcessor(
+ new File(config.getAssignmentsDirectory()),
+ new File(config.getSanitizedAssignmentsDirectory()));
+ }
+
// Remove lock file
lf.releaseLock();
More information about the tor-commits
mailing list