[or-cvs] [metrics-utils/master 2/2] Check in ExoneraTor and bridge descriptor sanitizer from SVN.
karsten at torproject.org
karsten at torproject.org
Mon Sep 20 08:40:04 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon, 20 Sep 2010 10:31:57 +0200
Subject: Check in ExoneraTor and bridge descriptor sanitizer from SVN.
Commit: d2c24d70f7bd92eeeeac71c48b80fe6e3b9b2fca
---
bridge-desc-sanitizer/ConvertBridgeDescs.java | 452 +++++++++++++++++++++++++
bridge-desc-sanitizer/HOWTO | 113 ++++++
bridge-desc-sanitizer/extract-bridges.sh | 8 +
exonerator/ExoneraTor.java | 404 ++++++++++++++++++++++
exonerator/HOWTO | 159 +++++++++
exonerator/LICENSE | 30 ++
exonerator/exonerator.py | 371 ++++++++++++++++++++
7 files changed, 1537 insertions(+), 0 deletions(-)
create mode 100644 bridge-desc-sanitizer/ConvertBridgeDescs.java
create mode 100644 bridge-desc-sanitizer/HOWTO
create mode 100755 bridge-desc-sanitizer/extract-bridges.sh
create mode 100644 exonerator/ExoneraTor.java
create mode 100644 exonerator/HOWTO
create mode 100644 exonerator/LICENSE
create mode 100755 exonerator/exonerator.py
diff --git a/bridge-desc-sanitizer/ConvertBridgeDescs.java b/bridge-desc-sanitizer/ConvertBridgeDescs.java
new file mode 100644
index 0000000..6a6c5bf
--- /dev/null
+++ b/bridge-desc-sanitizer/ConvertBridgeDescs.java
@@ -0,0 +1,452 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.codec.binary.*;
+
+public class ConvertBridgeDescs {
+
+ public static void main(String[] args) throws Exception {
+
+ long started = System.currentTimeMillis();
+
+ if (args.length < 5) {
+ System.err.println("Usage: java "
+ + ConvertBridgeDescs.class.getSimpleName()
+ + " <input directory> <geoip.txt file> <YYYY> <MM> "
+ + "<output directory>");
+ System.exit(1);
+ }
+ File inDir = new File(args[0]);
+ File geoipFile = new File(args[1]);
+ String year = args[2];
+ String month = args[3];
+ int yearInt = Integer.parseInt(year);
+ int monthInt = Integer.parseInt(month);
+ File outDir = new File(args[4]);
+ if (!outDir.exists()) {
+ outDir.mkdir();
+ }
+
+ SortedSet<File> statuses = new TreeSet<File>();
+ Set<File> descriptors = new HashSet<File>();
+ Set<File> extrainfos = new HashSet<File>();
+
+ System.out.print("Parsing geoip.txt file... ");
+ BufferedReader r = new BufferedReader(new FileReader(geoipFile));
+ String line0 = null;
+ SortedMap<Long, String> geoipDatabase = new TreeMap<Long, String>();
+ while ((line0 = r.readLine()) != null) {
+ if (!line0.startsWith("#"))
+ geoipDatabase.put(Long.parseLong(line0.split(",")[0]),
+ line0.substring(line0.indexOf(',') + 1));
+ }
+ System.out.println("Found " + geoipDatabase.size()
+ + " entries (expected 100,000 +- 10,000).");
+
+ System.out.println("Checking files in " + inDir.getAbsolutePath()
+ + "...");
+ Stack<File> directoriesLeftToParse = new Stack<File>();
+ directoriesLeftToParse.push(inDir);
+ String currentYearAndMonth = "from-tonga-" + year + "-" + month;
+ String previousYearAndMonth = "from-tonga-" + (monthInt == 1 ?
+ "" + (yearInt - 1) + "-12" :
+ year + "-" + (monthInt < 11 ? "0" : "") + (monthInt - 1));
+ String nextYearAndMonth = "from-tonga-" + (monthInt == 12 ?
+ "" + (yearInt + 1) + "-01" :
+ year + "-" + (monthInt < 9 ? "0" : "") + (monthInt + 1));
+ while (!directoriesLeftToParse.isEmpty()) {
+ File directoryOrFile = directoriesLeftToParse.pop();
+ String filename = directoryOrFile.getName();
+ boolean addDirectory = false;
+ if (directoryOrFile.isDirectory()) {
+ if (/* base directory */
+ filename.equals("in") ||
+ /* current month */
+ filename.startsWith(currentYearAndMonth) ||
+ /* last days of previous month */
+ (filename.startsWith(previousYearAndMonth)
+ && Integer.parseInt(filename.substring(19, 21)) > 24) ||
+ /* first days of next month */
+ (filename.startsWith(nextYearAndMonth)
+ && Integer.parseInt(filename.substring(19, 21)) < 6)) {
+ for (File fileInDir: directoryOrFile.listFiles()) {
+ directoriesLeftToParse.push(fileInDir);
+ }
+ }
+ continue;
+ }
+ if (filename.startsWith("cached-extrainfo")) {
+ extrainfos.add(directoryOrFile);
+ } else if (filename.equals("bridge-descriptors")) {
+ descriptors.add(directoryOrFile);
+ } else if (filename.equals("networkstatus-bridges")) {
+ statuses.add(directoryOrFile);
+ }
+ }
+
+ int days = ((extrainfos.size() / 2 + descriptors.size()
+ + statuses.size()) + 3 * 24) / (3 * 48);
+ System.out.println("Found " + extrainfos.size()
+ + " cached-extrainfo[.new] files, " + descriptors.size()
+ + " bridge-descriptors files, and " + statuses.size()
+ + " networkstatus-bridges files, covering approximately " + days
+ + " days.");
+
+ System.out.print("Parsing extra-info descriptors");
+ String[] hex = new String[] { "0", "1", "2", "3", "4", "5", "6", "7",
+ "8", "9", "a", "b", "c", "d", "e", "f" };
+ for (String x : hex)
+ for (String y : hex)
+ new File(outDir + File.separator + "extra-infos" + File.separator
+ + x + File.separator + y).mkdirs();
+ Set<File> writtenExtrainfos = new HashSet<File>();
+ Map<String, String> extrainfoMapping = new HashMap<String, String>();
+ int parsed = 0;
+ for (File file : extrainfos) {
+ if (parsed++ > extrainfos.size() / days) {
+ System.out.print(".");
+ parsed = 0;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line = null;
+ StringBuilder original = null, scrubbed = null;
+ boolean skipSignature = false;
+ while ((line = br.readLine()) != null) {
+ if (skipSignature && !line.equals("-----END SIGNATURE-----")) {
+ continue;
+ } else if (line.startsWith("extra-info ")) {
+ original = new StringBuilder(line + "\n");
+ scrubbed = new StringBuilder("extra-info Unnamed "
+ + DigestUtils.shaHex(Hex.decodeHex(
+ line.split(" ")[2].toCharArray())).toUpperCase() + "\n");
+ } else if (line.startsWith("published ")
+ || line.startsWith("write-history ")
+ || line.startsWith("read-history ")
+ || line.startsWith("geoip-start-time ")
+ || line.startsWith("geoip-client-origins ")) {
+ original.append(line + "\n");
+ scrubbed.append(line + "\n");
+ } else if (line.startsWith("router-signature")) {
+ String originalDesc = original.toString() + line + "\n";
+ String originalHash = DigestUtils.shaHex(originalDesc);
+ String scrubbedDesc = scrubbed.toString();
+ String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
+ if (extrainfoMapping.containsKey(originalHash) &&
+ !extrainfoMapping.get(originalHash).equals(scrubbedHash)) {
+ System.out.println("We already have an extra-info mapping "
+ + "from " + originalHash + " to "
+ + extrainfoMapping.get(originalHash) + ", but we now want "
+ + "to add a mapping to " + scrubbedHash + ". Exiting");
+ System.exit(1);
+ }
+ extrainfoMapping.put(originalHash, scrubbedHash);
+ File out = new File(outDir + File.separator + "extra-infos"
+ + File.separator + scrubbedHash.charAt(0) + File.separator
+ + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
+ if (!out.exists()) {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(out));
+ bw.write(scrubbedDesc);
+ bw.close();
+ writtenExtrainfos.add(out);
+ }
+ } else if (line.equals("-----BEGIN SIGNATURE-----")) {
+ skipSignature = true;
+ } else if (line.equals("-----END SIGNATURE-----")) {
+ skipSignature = false;
+ } else {
+ System.out.println("Unrecognized line '" + line + "'. Exiting");
+ System.exit(1);
+ }
+ }
+ br.close();
+ }
+ System.out.println("\nWrote " + writtenExtrainfos.size()
+ + " extra-info descriptors.");
+
+ System.out.print("Parsing server descriptors");
+ for (String x : hex)
+ for (String y : hex)
+ new File(outDir + File.separator + "descriptors" + File.separator
+ + x + File.separator + y).mkdirs();
+ Set<File> writtenDescriptors = new HashSet<File>();
+ Map<File, File> referencedExtraInfos = new HashMap<File, File>();
+ Map<String, String> descriptorMapping = new HashMap<String, String>();
+ int found = 0, notfound = 0;
+ parsed = 0;
+ String haveExtraInfo = null;
+ for (File file : descriptors) {
+ if (parsed++ > descriptors.size() / days) {
+ System.out.print(".");
+ parsed = 0;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line = null, country = null;
+ StringBuilder original = null, scrubbed = null;
+ boolean skipCrypto = false, contactWritten = false;
+ while ((line = br.readLine()) != null) {
+ if (skipCrypto && !line.startsWith("-----END ")) {
+ original.append(line + "\n");
+ continue;
+ } else if (line.startsWith("router ")) {
+ original = new StringBuilder(line + "\n");
+ country = "zz";
+ String[] ipParts = line.split(" ")[2].replace('.', ' ').split(" ");
+ long ipNum = Long.parseLong(ipParts[0]) * 256L * 256L * 256L
+ + Long.parseLong(ipParts[1]) * 256L * 256L
+ + Long.parseLong(ipParts[2]) * 256L
+ + Long.parseLong(ipParts[3]);
+ long intervalStart = -1;
+ if (ipNum >= geoipDatabase.firstKey()) {
+ intervalStart = geoipDatabase.subMap(0L, ipNum).lastKey();
+ String dbContent = geoipDatabase.get(intervalStart);
+ long intervalEnd = Long.parseLong(dbContent.split(",")[0]);
+ if (ipNum <= intervalEnd)
+ country = dbContent.split(",")[1].toLowerCase();
+ }
+ scrubbed = new StringBuilder("router Unnamed 127.0.0.1 "
+ + line.split(" ")[3] + " " + line.split(" ")[4] + " "
+ + line.split(" ")[5] + "\n");
+ contactWritten = false;
+ haveExtraInfo = null;
+ } else if (line.startsWith("opt fingerprint ")) {
+ original.append(line + "\n");
+ scrubbed.append("opt fingerprint");
+ String fingerprint = DigestUtils.shaHex(Hex.decodeHex(
+ line.substring(16).replaceAll(" ", "").toCharArray())).
+ toUpperCase();
+ for (int i = 0; i < fingerprint.length() / 4; i++)
+ scrubbed.append(" " + fingerprint.substring(4 * i, 4 * (i + 1)));
+ scrubbed.append("\n");
+ } else if (line.startsWith("contact ")) {
+ original.append(line + "\n");
+ scrubbed.append("contact somebody at example dot " + country
+ + "\n");
+ contactWritten = true;
+ } else if (line.startsWith("router-signature")) {
+ String originalDesc = original.toString() + line + "\n";
+ String originalHash = DigestUtils.shaHex(originalDesc);
+ String scrubbedDesc = scrubbed.toString();
+ String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
+ if (descriptorMapping.containsKey(originalHash) &&
+ !descriptorMapping.get(originalHash).equals(scrubbedHash)) {
+ System.out.println("We already have a descriptor mapping "
+ + "from " + originalHash + " to "
+ + descriptorMapping.get(originalHash) + ", but we now "
+ + "want to add a mapping to " + scrubbedHash
+ + ". Exiting");
+ System.exit(1);
+ }
+ descriptorMapping.put(originalHash, scrubbedHash);
+ if (haveExtraInfo != null) {
+ File out = new File(outDir + File.separator + "descriptors"
+ + File.separator + scrubbedHash.charAt(0) + File.separator
+ + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
+ if (!out.exists()) {
+ BufferedWriter bw2 = new BufferedWriter(new FileWriter(out));
+ bw2.write(scrubbedDesc);
+ bw2.close();
+ writtenDescriptors.add(out);
+ String extraInfoHash = haveExtraInfo.toLowerCase();
+ File extrainfoFile = new File(outDir + File.separator
+ + "extra-infos" + File.separator
+ + extraInfoHash.charAt(0) + File.separator
+ + extraInfoHash.charAt(1) + File.separator
+ + extraInfoHash);
+ if (!extrainfoFile.exists()) {
+ System.out.println("Extra-info descriptor '"
+ + extrainfoFile + "' does not exist.");
+ System.exit(1);
+ }
+ referencedExtraInfos.put(out, extrainfoFile);
+ }
+ }
+ } else if (line.startsWith("opt extra-info-digest ")) {
+ String originalExtraInfo = line.split(" ")[2].toLowerCase();
+ if (!extrainfoMapping.containsKey(originalExtraInfo)) {
+ notfound++;
+ } else {
+ found++;
+ original.append(line + "\n");
+ haveExtraInfo = extrainfoMapping.get(originalExtraInfo).
+ toUpperCase();
+ scrubbed.append("opt extra-info-digest " + haveExtraInfo
+ + "\n");
+ }
+ } else if (line.startsWith("reject ")
+ || line.startsWith("accept ")) {
+ if (!contactWritten) {
+ scrubbed.append("contact nobody at example dot " + country
+ + "\n");
+ contactWritten = true;
+ }
+ original.append(line + "\n");
+ scrubbed.append(line + "\n");
+ } else if (line.startsWith("platform ")
+ || line.startsWith("opt protocols ")
+ || line.startsWith("published ")
+ || line.startsWith("uptime ")
+ || line.startsWith("bandwidth ")
+ || line.startsWith("uptime ")
+ || line.startsWith("opt hibernating ")
+ || line.equals("opt hidden-service-dir")
+ || line.equals("opt caches-extra-info")) {
+ original.append(line + "\n");
+ scrubbed.append(line + "\n");
+ } else if (line.startsWith("family ")) {
+ StringBuilder familyLine = new StringBuilder("family");
+ for (String s : line.substring(7).split(" ")) {
+ if (s.startsWith("$"))
+ familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
+ s.substring(1).toCharArray())).toUpperCase());
+ else
+ familyLine.append(" " + s);
+ }
+ original.append(line + "\n");
+ scrubbed.append(familyLine.toString() + "\n");
+ } else if (line.startsWith("@purpose ")) {
+ continue;
+ } else if (line.startsWith("-----BEGIN ")
+ || line.equals("onion-key") || line.equals("signing-key")) {
+ skipCrypto = true;
+ original.append(line + "\n");
+ } else if (line.startsWith("-----END ")) {
+ skipCrypto = false;
+ original.append(line + "\n");
+ } else {
+ System.out.println("Unrecognized line '" + line + "'. Exiting");
+ System.exit(1);
+ }
+ }
+ br.close();
+ }
+ System.out.println("\nWrote " + writtenDescriptors.size()
+ + " bridge descriptors. While parsing, we found that we parsed "
+ + found + " extra-info identifiers before, but are missing "
+ + notfound + ". (The number of missing identifiers should be "
+ + "significantly smaller.)");
+
+ System.out.print("Parsing network statuses");
+ Set<File> referencedDescriptors = new HashSet<File>();
+ parsed = notfound = found = 0;
+ for (File file : statuses) {
+ if (parsed++ > statuses.size() / days) {
+ System.out.print(".");
+ parsed = 0;
+ }
+ if (!file.getParent().substring(file.getParent().
+ indexOf("from-tonga-")).startsWith(currentYearAndMonth)) {
+ continue;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line = null;
+ StringBuilder scrubbed = new StringBuilder();
+ boolean addSLine = false;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ String bridgeIdentity = parts[2] + "==";
+ String hexBridgeIdentity = Hex.encodeHexString(
+ Base64.decodeBase64(bridgeIdentity));
+ String hashedBridgeIdentity2 = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
+ replace("=", "");
+ String hashedBridgeIdentity = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
+ substring(0, 27);
+ String descIdentifier = parts[3] + "==";
+ String hexDescIdentifier = Hex.encodeHexString(
+ Base64.decodeBase64(descIdentifier));
+ if (!descriptorMapping.containsKey(hexDescIdentifier)) {
+ notfound++;
+ addSLine = false;
+ } else {
+ found++;
+ String refDesc = descriptorMapping.get(hexDescIdentifier).
+ toLowerCase();
+ File descriptorFile = new File(outDir + File.separator
+ + "descriptors" + File.separator + refDesc.charAt(0)
+ + File.separator + refDesc.charAt(1) + File.separator
+ + refDesc);
+ if (!descriptorFile.exists()) {
+ System.out.println("Descriptor file '"
+ + descriptorFile.getAbsolutePath() + "' does not exist.");
+ }
+ String replacementDescIdentifier = Base64.encodeBase64String(
+ Hex.decodeHex(descriptorMapping.get(hexDescIdentifier).
+ toCharArray())).substring(0, 27);
+ scrubbed.append("r Unnamed " + hashedBridgeIdentity
+ + " " + replacementDescIdentifier + " " + parts[4] + " "
+ + parts[5] + " 127.0.0.1 " + parts[7] + " " + parts[8]
+ + "\n");
+ addSLine = true;
+ referencedDescriptors.add(descriptorFile);
+ }
+ } else if (line.startsWith("s ")) {
+ if (addSLine) {
+ scrubbed.append(line + "\n");
+ }
+ } else {
+ System.out.println("Unknown line: " + line);
+ System.exit(1);
+ }
+ }
+ String timeString = file.getParent().substring(file.getParent().
+ indexOf("from-tonga-") + 11);
+ String[] date = timeString.substring(0, 10).split("-");
+ String time = timeString.substring(11, 17);
+ File dir = new File(outDir + File.separator + "statuses"
+ + File.separator + date[0] + File.separator + date[1]
+ + File.separator + date[2] + File.separator);
+ dir.mkdirs();
+ File out = new File(dir.getAbsolutePath() + File.separator + date[0]
+ + date[1] + date[2] + "-" + time + "-"
+ + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
+ if (!out.exists()) {
+ BufferedWriter bw3 = new BufferedWriter(new FileWriter(out));
+ bw3.write(scrubbed.toString());
+ bw3.close();
+ }
+ }
+ System.out.println("\nWhile parsing, we found that we parsed "
+ + found + " bridge descriptors before, but are missing "
+ + notfound + ". (The number of missing identifiers should be "
+ + "significantly smaller.)");
+
+ Set<File> deleteFromReferencedExtraInfos = new HashSet<File>();
+ for (File e : referencedExtraInfos.keySet()) {
+ if (!referencedDescriptors.contains(e)) {
+ deleteFromReferencedExtraInfos.add(e);
+ }
+ }
+ for (File e : deleteFromReferencedExtraInfos) {
+ referencedExtraInfos.remove(e);
+ }
+ SortedSet<File> deleteDescriptors = new TreeSet<File>();
+ for (File e : writtenDescriptors) {
+ if (!referencedDescriptors.contains(e)) {
+ deleteDescriptors.add(e);
+ }
+ }
+ SortedSet<File> deleteExtraInfos = new TreeSet<File>();
+ for (File e : writtenExtrainfos) {
+ if (!referencedExtraInfos.values().contains(e)) {
+ deleteExtraInfos.add(e);
+ }
+ }
+ System.out.println("Deleting " + deleteDescriptors.size()
+ + " unreferenced bridge descriptors and "
+ + deleteExtraInfos.size() + " extra-info descriptors (keeping "
+ + (writtenDescriptors.size() - deleteDescriptors.size())
+ + " bridge descriptors and " + (writtenExtrainfos.size()
+ - deleteExtraInfos.size()) + " extra-info descriptors).");
+ for (File e : deleteDescriptors)
+ e.delete();
+ for (File e : deleteExtraInfos)
+ e.delete();
+
+ long finished = System.currentTimeMillis();
+ System.out.println("Processing took " + ((finished - started) / 1000)
+ + " seconds.");
+ }
+}
+
diff --git a/bridge-desc-sanitizer/HOWTO b/bridge-desc-sanitizer/HOWTO
new file mode 100644
index 0000000..b84d5ce
--- /dev/null
+++ b/bridge-desc-sanitizer/HOWTO
@@ -0,0 +1,113 @@
+Bridge descriptor sanitizer
+
+---------------------------------------------------------------------------
+
+Introduction:
+
+The bridge authority Tonga keeps a list of bridges in order to serve bridge
+addresses and descriptors to its clients. Every half hour, Tonga copies a
+snapshot of the known bridge descriptors to moria where these descriptors
+are archived for later statistical analysis. As a guiding principle, the
+Tor project makes all data that it uses for statistical analysis available
+to the interested public, in order to maximize transparency towards the
+community. However, the bridge descriptors contain the IP addresses and
+other contact information of bridges that must not be made public, or the
+purpose of bridges as non-public entry points into the Tor network would be
+obsolete. This script takes the half-hourly snapshots as input, removes all
+possibly sensitive information from the descriptors, and puts out the
+sanitized bridge descriptors that are safe to be published.
+
+---------------------------------------------------------------------------
+
+Processing steps:
+
+The following steps are taken to remove all potentially sensitive
+information from the bridge descriptors while keeping them useful for
+statistical analysis.
+
+1. Replace the bridge identity with its SHA1 value
+
+ Clients can request a bridge's current descriptor by sending its
+ identity string to the bridge authority. This is a feature to make
+ bridges on dynamic IP addresses useful. Therefore, the original
+ identities (and anything that could be used to derive them) need to be
+ removed from the descriptors. The bridge identity is replaced with its
+ SHA1 hash value. The idea is to have a consistent replacement that
+ remains stable over months or even years (without keeping a secret for a
+ keyed hash function).
+
+2. Remove all cryptographic keys and signatures
+
+ It would be straightforward to learn about the bridge identity from the
+ bridge's public key. Replacing keys by newly generated ones seemed to be
+ unnecessary (and would involve keeping a state over months/years), so
+ that all cryptographic objects have simply been removed.
+
+3. Replace IP address with 127.0.0.1
+
+ Of course, the IP address needs to be removed, too. However, the IP
+ address is resolved to a country code first and the result written to
+ the contact line as "somebody at example dot de" for Germany, etc. The
+ ports are kept unchanged though.
+
+4. Replace contact information
+
+ If there is contact information in a descriptor, the contact line is
+ changed to "somebody at ...". If there is none, a contact line is added
+ saying "nobody at ..." in order to put in the country code.
+
+5. Replace nickname with Unnamed
+
+ The bridge nicknames might give hints on the location of the bridge if
+ chosen without care; e.g. a bridge nickname might be very similar to the
+ operators' relay nicknames which might be located on adjacent IP
+ addresses. All bridge nicknames are therefore replaced with the string
+ Unnamed.
+
+Note that these processing steps only prevent people from learning about
+new bridge locations. People who already know a bridge identity or location
+can easily learn more about this bridge from the sanitized descriptors.
+This is useful for statistical analysis, e.g. to filter out bridges that
+have been running as relays before.
+
+---------------------------------------------------------------------------
+
+Quick Start:
+
+The following steps are necessary to process the half-hourly snapshots as
+collected by moria:
+
+- Install Java 5 or higher.
+
+- Download Apache Commons Codec 1.4 or higher for Base 64 and hex encoding
+ from http://commons.apache.org/codec/ and place the .jar (in the
+ following assumed to be commons-codec-1.4.jar) in the same directory as
+ this HOWTO file.
+
+- Copy the half-hourly snapshots named from-tonga-YYYY-MM-DDThhmmssZ.tar.gz
+ in a directory called data/ in the same directory as this HOWTO file.
+
+- Run ./extract-bridges.sh to extract the half-hourly snapshots in data/
+ to separate directories in the newly created subdirectory in/ .
+
+- Copy the geoip.txt from the Tor sources (from /src/config/) to the same
+ directory as this HOWTO file.
+
+- Compile the Java class using
+
+ $ javac -cp commons-codec-1.4.jar ConvertBridgeDescs.java
+
+- Run the script, providing it with the parameters it needs:
+
+ java -cp .:commons-codec-1.4.jar ConvertBridgeDescs
+ <input directory> <geoip.txt file>
+ <YYYY> <MM> <output directory>
+
+ Note that YYYY and MM specify the month that shall be processed. The other
+ descriptors in the input directory are ignored.
+
+ A sample invocation might be:
+
+ $ java -cp .:commons-codec-1.4.jar ConvertBridgeDescs in/ geoip.txt
+ 2008 10 out/
+
diff --git a/bridge-desc-sanitizer/extract-bridges.sh b/bridge-desc-sanitizer/extract-bridges.sh
new file mode 100755
index 0000000..5f412c3
--- /dev/null
+++ b/bridge-desc-sanitizer/extract-bridges.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+mkdir "in/"
+for i in `ls data/ | cut -c 1-29`
+do
+mkdir "in/"$i
+tar -C "in/"$i -xf "data/"$i".tar.gz"
+done
+
diff --git a/exonerator/ExoneraTor.java b/exonerator/ExoneraTor.java
new file mode 100644
index 0000000..eba3cca
--- /dev/null
+++ b/exonerator/ExoneraTor.java
@@ -0,0 +1,404 @@
+/* Copyright 2009 The Tor Project
+ * See LICENSE for licensing information */
+
+import java.io.*;
+import java.math.*;
+import java.text.*;
+import java.util.*;
+import org.bouncycastle.util.encoders.Base64;
+
+public final class ExoneraTor {
+
+ public static void main(final String[] args) throws Exception {
+
+ // check parameters
+ if (args.length < 4 || args.length > 5) {
+ System.err.println("\nUsage: java "
+ + ExoneraTor.class.getSimpleName()
+ + " <descriptor archive directory> <IP address in question> "
+ + "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> "
+ + "[<target address>[:<target port>]]\n");
+ return;
+ }
+ File archiveDirectory = new File(args[0]);
+ if (!archiveDirectory.exists() || !archiveDirectory.isDirectory()) {
+ System.err.println("\nDescriptor archive directory + "
+ + archiveDirectory.getAbsolutePath()
+ + " does not exist or is not a directory.\n");
+ return;
+ }
+ String relayIP = args[1];
+ String timestampStr = args[2] + " " + args[3];
+ SimpleDateFormat timeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long timestamp = timeFormat.parse(timestampStr).getTime();
+ String target = null, targetIP = null, targetPort = null;
+ String[] targetIPParts = null;
+ if (args.length > 4) {
+ target = args[4];
+ if (target.contains(":")) {
+ targetIP = target.split(":")[0];
+ targetPort = target.split(":")[1];
+ } else {
+ targetIP = target;
+ }
+ targetIPParts = targetIP.replace(".", " ").split(" ");
+ }
+ String DELIMITER = "--------------------------------------------------"
+ + "-------------------------";
+ System.out.println("\nTrying to find out whether " + relayIP + " was "
+ + "running as a Tor relay at " + timestampStr
+ + (target != null ? " permitting exiting to " + target : "")
+ + "...\n\n" + DELIMITER);
+
+ // check that we have the required archives
+ long timestampTooOld = timestamp - 300 * 60 * 1000;
+ long timestampFrom = timestamp - 180 * 60 * 1000;
+ long timestampTooNew = timestamp + 120 * 60 * 1000;
+ Calendar calTooOld = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ Calendar calFrom = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ Calendar calTooNew = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ calTooOld.setTimeInMillis(timestampTooOld);
+ calFrom.setTimeInMillis(timestampFrom);
+ calTooNew.setTimeInMillis(timestampTooNew);
+ System.out.printf("%nChecking that relevant archives between "
+ + "%tF %<tT and %tF %<tT are available...%n", calTooOld,
+ calTooNew);
+ SortedSet<String> requiredDirs = new TreeSet<String>();
+ requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooOld));
+ requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooNew));
+ if (target != null) {
+ requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
+ calTooOld));
+ requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
+ calTooNew));
+ }
+ SortedSet<File> consensusDirs = new TreeSet<File>();
+ SortedSet<File> descriptorsDirs = new TreeSet<File>();
+ Stack<File> directoriesLeftToParse = new Stack<File>();
+ directoriesLeftToParse.push(archiveDirectory);
+ while (!directoriesLeftToParse.isEmpty()) {
+ File directoryOrFile = directoriesLeftToParse.pop();
+ if (directoryOrFile.getName().startsWith("consensuses-")) {
+ if (requiredDirs.contains(directoryOrFile.getName())) {
+ requiredDirs.remove(directoryOrFile.getName());
+ consensusDirs.add(directoryOrFile);
+ }
+ } else if (directoryOrFile.getName().startsWith(
+ "server-descriptors-")) {
+ if (requiredDirs.contains(directoryOrFile.getName())) {
+ requiredDirs.remove(directoryOrFile.getName());
+ descriptorsDirs.add(directoryOrFile);
+ }
+ } else {
+ for (File fileInDir : directoryOrFile.listFiles())
+ if (fileInDir.isDirectory())
+ directoriesLeftToParse.push(fileInDir);
+ }
+ }
+ for (File dir : consensusDirs)
+ System.out.println(" " + dir.getAbsolutePath());
+ for (File dir : descriptorsDirs)
+ System.out.println(" " + dir.getAbsolutePath());
+ if (!requiredDirs.isEmpty()) {
+ System.out.println("\nWe are missing consensuses and/or server "
+ + "descriptors. Please download these archives and extract them "
+ + "to your data directory. Be sure NOT to rename the extracted "
+ + "directories or the contained files.");
+ for (String dir : requiredDirs)
+ System.out.println(" " + dir + ".tar.bz2");
+ return;
+ }
+
+ // look for consensus files
+ System.out.printf("%nLooking for relevant consensuses between "
+ + "%tF %<tT and %s...%n", calFrom, timestampStr);
+ SortedSet<File> tooOldConsensuses = new TreeSet<File>();
+ SortedSet<File> relevantConsensuses = new TreeSet<File>();
+ SortedSet<File> tooNewConsensuses = new TreeSet<File>();
+ directoriesLeftToParse.clear();
+ for (File consensusDir : consensusDirs)
+ directoriesLeftToParse.push(consensusDir);
+ SimpleDateFormat consensusTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ consensusTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ while (!directoriesLeftToParse.isEmpty()) {
+ File directoryOrFile = directoriesLeftToParse.pop();
+ if (directoryOrFile.isDirectory()) {
+ for (File fileInDir : directoryOrFile.listFiles()) {
+ directoriesLeftToParse.push(fileInDir);
+ }
+ continue;
+ } else {
+ String filename = directoryOrFile.getName();
+ if (filename.endsWith("consensus")) {
+ long consensusTime = consensusTimeFormat.parse(
+ filename.substring(0, 19)).getTime();
+ if (consensusTime >= timestampTooOld &&
+ consensusTime < timestampFrom)
+ tooOldConsensuses.add(directoryOrFile);
+ else if (consensusTime >= timestampFrom &&
+ consensusTime <= timestamp)
+ relevantConsensuses.add(directoryOrFile);
+ else if (consensusTime > timestamp &&
+ consensusTime <= timestampTooNew)
+ tooNewConsensuses.add(directoryOrFile);
+ }
+ }
+ }
+ SortedSet<File> allConsensuses = new TreeSet<File>();
+ allConsensuses.addAll(tooOldConsensuses);
+ allConsensuses.addAll(relevantConsensuses);
+ allConsensuses.addAll(tooNewConsensuses);
+ if (allConsensuses.isEmpty()) {
+ System.out.println(" None found!\n\n" + DELIMITER + "\n\nResult is "
+ + "INDECISIVE!\n\nWe cannot make any statement about IP address "
+ + relayIP + " being a relay at " + timestampStr + " or not! We "
+ + "did not find any relevant consensuses preceding the given "
+ + "time. This either means that you did not download and "
+ + "extract the consensus archives preceding the hours before "
+ + "the given time, or (in rare cases) that the directory "
+ + "archives are missing the hours before the timestamp. Please "
+ + "check that your directory archives contain consensus files "
+ + "of the interval 5:00 hours before and 2:00 hours after the "
+ + "time you are looking for.\n");
+ return;
+ }
+ for (File f : relevantConsensuses)
+ System.out.println(" " + f.getAbsolutePath());
+
+ // parse consensuses to find descriptors belonging to the IP address
+ System.out.println("\nLooking for descriptor identifiers referenced "
+ + "in \"r \" lines in these consensuses containing IP address "
+ + relayIP + "...");
+ SortedSet<File> positiveConsensusesNoTarget = new TreeSet<File>();
+ Set<String> addressesInSameNetwork = new HashSet<String>();
+ SortedMap<String, Set<File>> relevantDescriptors =
+ new TreeMap<String, Set<File>>();
+ for (File consensus : allConsensuses) {
+ if (relevantConsensuses.contains(consensus))
+ System.out.println(" " + consensus.getAbsolutePath());
+ BufferedReader br = new BufferedReader(new FileReader(consensus));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (!line.startsWith("r "))
+ continue;
+ String[] parts = line.split(" ");
+ String address = parts[6];
+ if (address.equals(relayIP)) {
+ byte[] result = Base64.decode(parts[3] + "==");
+ String hex = new BigInteger(1, Base64.decode(parts[3] +
+ "==")).toString(16).substring(0, 40);
+ if (!relevantDescriptors.containsKey(hex))
+ relevantDescriptors.put(hex, new HashSet<File>());
+ relevantDescriptors.get(hex).add(consensus);
+ positiveConsensusesNoTarget.add(consensus);
+ if (relevantConsensuses.contains(consensus))
+ System.out.println(" \"" + line + "\" references "
+ + "descriptor " + hex);
+ } else {
+ if (relayIP.startsWith(address.substring(0,
+ address.lastIndexOf(".")))) {
+ addressesInSameNetwork.add(address);
+ }
+ }
+ }
+ br.close();
+ }
+ if (relevantDescriptors.isEmpty()) {
+ System.out.printf(" None found!\n\n" + DELIMITER + "\n\nResult is "
+ + "NEGATIVE with moderate certainty!\n\nWe did not find IP "
+ + "address " + relayIP + " in any of the consensuses that were "
+ + "published between %tF %<tT and %tF %<tT.\n\nA possible "
+ + "reason for false negatives is that the relay is using a "
+ + "different IP address when generating a descriptor than for "
+ + "exiting to the Internet. We hope to provide better checks "
+ + "for this case in the future.", calTooOld, calTooNew);
+ if (!addressesInSameNetwork.isEmpty()) {
+ System.out.println("\n\nThe following other IP addresses of Tor "
+ + "relays were found in the mentioned consensus files that "
+ + "are in the same /24 network and that could be related to "
+ + "IP address " + relayIP + ":");
+ for (String s : addressesInSameNetwork) {
+ System.out.println(" " + s);
+ }
+ }
+ System.out.println();
+ return;
+ }
+
+ // parse router descriptors to check exit policies
+ SortedSet<File> positiveConsensuses = new TreeSet<File>();
+ Set<String> missingDescriptors = new HashSet<String>();
+ if (target != null) {
+ System.out.println("\nChecking if referenced descriptors permit "
+ + "exiting to " + target + "...");
+ Set<String> descriptors = relevantDescriptors.keySet();
+ missingDescriptors.addAll(relevantDescriptors.keySet());
+ directoriesLeftToParse.clear();
+ for (File descriptorsDir : descriptorsDirs)
+ directoriesLeftToParse.push(descriptorsDir);
+ while (!directoriesLeftToParse.isEmpty()) {
+ File directoryOrFile = directoriesLeftToParse.pop();
+ if (directoryOrFile.isDirectory()) {
+ for (File fileInDir : directoryOrFile.listFiles()) {
+ directoriesLeftToParse.push(fileInDir);
+ }
+ continue;
+ } else {
+ String filename = directoryOrFile.getName();
+ for (String descriptor : descriptors) {
+ if (filename.equals(descriptor)) {
+ missingDescriptors.remove(descriptor);
+ BufferedReader br = new BufferedReader(
+ new FileReader(directoryOrFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("reject ") ||
+ line.startsWith("accept ")) {
+ boolean ruleAccept = line.split(" ")[0].equals("accept");
+ String ruleAddress = line.split(" ")[1].split(":")[0];
+ if (!ruleAddress.equals("*")) {
+ if (!ruleAddress.contains("/") &&
+ !ruleAddress.equals(targetIP))
+ continue; // IP address does not match
+ String[] ruleIPParts = ruleAddress.split("/")[0].
+ replace(".", " ").split(" ");
+ int ruleNetwork = Integer.parseInt(
+ ruleAddress.split("/")[1]);
+ for (int i = 0; i < 4; i++) {
+ if (ruleNetwork == 0) {
+ break;
+ } else if (ruleNetwork >= 8) {
+ if (ruleIPParts[i].equals(targetIPParts[i]))
+ ruleNetwork -= 8;
+ else
+ break;
+ } else {
+ int mask = 255 ^ 255 >>> ruleNetwork;
+ if ((Integer.parseInt(ruleIPParts[i]) & mask) ==
+ (Integer.parseInt(targetIPParts[i]) & mask))
+ ruleNetwork = 0;
+ break;
+ }
+ }
+ if (ruleNetwork > 0)
+ continue; // IP address does not match
+ }
+ String rulePort = line.split(" ")[1].split(":")[1];
+ if (targetPort == null && !ruleAccept &&
+ !rulePort.equals("*"))
+ continue; // with no port given, we only consider
+ // reject :* rules as matching
+ if (targetPort != null) {
+ if (!rulePort.equals("*") &&
+ !targetPort.equals(rulePort))
+ continue; // ports do not match
+ }
+ boolean relevantMatch = false;
+ for (File f : relevantDescriptors.get(descriptor))
+ if (relevantConsensuses.contains(f))
+ relevantMatch = true;
+ if (relevantMatch)
+ System.out.println(" "
+ + directoryOrFile.getAbsolutePath() + " "
+ + (ruleAccept ? "permits" : "does not permit")
+ + " exiting to " + target + " according to rule \""
+ + line + "\"");
+ if (ruleAccept)
+ positiveConsensuses.addAll(
+ relevantDescriptors.get(descriptor));
+ break;
+ }
+ }
+ br.close();
+ }
+ }
+ }
+ }
+ }
+
+ // print out result
+ Set<File> matches = (target != null) ? positiveConsensuses
+ : positiveConsensusesNoTarget;
+ if (matches.contains(relevantConsensuses.last())) {
+ System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE with "
+ + "high certainty!\n\nWe found one or more relays on IP address "
+ + relayIP
+ + (target != null ? " permitting exit to " + target : "")
+ + " in the most recent consensus preceding " + timestampStr
+ + " that clients were likely to know.\n");
+ return;
+ }
+ boolean resultIndecisive = target != null
+ && !missingDescriptors.isEmpty();
+ if (resultIndecisive) {
+ System.out.println("\n" + DELIMITER + "\n\nResult is INDECISIVE!\n\n"
+ + "At least one referenced descriptor could not be found. This "
+ + "is a rare case, but one that (apparently) happens. We cannot "
+ + "make any good statement about exit relays without these "
+ + "descriptors. The following descriptors are missing:");
+ for (String desc : missingDescriptors)
+ System.out.println(" " + desc);
+ }
+ boolean inOtherRelevantConsensus = false, inTooOldConsensuses = false,
+ inTooNewConsensuses = false;
+ for (File f : matches)
+ if (relevantConsensuses.contains(f))
+ inOtherRelevantConsensus = true;
+ else if (tooOldConsensuses.contains(f))
+ inTooOldConsensuses = true;
+ else if (tooNewConsensuses.contains(f))
+ inTooNewConsensuses = true;
+ if (inOtherRelevantConsensus) {
+ if (!resultIndecisive)
+ System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE "
+ + "with moderate certainty!");
+ System.out.println("\nWe found one or more relays on IP address "
+ + relayIP
+ + (target != null ? " permitting exit to " + target : "")
+ + ", but not in the consensus immediately preceding "
+ + timestampStr + ". A possible reason for the relay being "
+ + "missing in the last consensus preceding the given time might "
+ + "be that some of the directory authorities had difficulties "
+ + "connecting to the relay. However, clients might still have "
+ + "used the relay.");
+ } else {
+ if (!resultIndecisive)
+ System.out.println("\n" + DELIMITER + "\n\nResult is NEGATIVE "
+ + "with high certainty!");
+ System.out.println("\nWe did not find any relay on IP address "
+ + relayIP
+ + (target != null ? " permitting exit to " + target : "")
+ + " in the consensuses 3:00 hours preceding " + timestampStr
+ + ".");
+ if (inTooOldConsensuses || inTooNewConsensuses) {
+ if (inTooOldConsensuses && !inTooNewConsensuses)
+ System.out.println("\nNote that we found a matching relay in "
+ + "consensuses that were published between 5:00 and 3:00 "
+ + "hours before " + timestampStr + ".");
+ else if (!inTooOldConsensuses && inTooNewConsensuses)
+ System.out.println("\nNote that we found a matching relay in "
+ + "consensuses that were published up to 2:00 hours after "
+ + timestampStr + ".");
+ else
+ System.out.println("\nNote that we found a matching relay in "
+ + "consensuses that were published between 5:00 and 3:00 "
+ + "hours before and in consensuses that were published up "
+ + "to 2:00 hours after " + timestampStr + ".");
+ System.out.println("Make sure that the timestamp you provided is "
+ + "in the correct timezone: UTC (or GMT).");
+ }
+ }
+ if (target != null) {
+ if (positiveConsensuses.isEmpty() &&
+ !positiveConsensusesNoTarget.isEmpty())
+ System.out.println("\nNote that although the found relay(s) did "
+ + "not permit exiting to " + target + ", there have been one "
+ + "or more relays running at the given time.");
+ }
+ System.out.println();
+ }
+}
+
diff --git a/exonerator/HOWTO b/exonerator/HOWTO
new file mode 100644
index 0000000..907a8f5
--- /dev/null
+++ b/exonerator/HOWTO
@@ -0,0 +1,159 @@
+ExoneraTor
+ or: a script that tells you whether some IP address was a Tor relay
+
+---------------------------------------------------------------------------
+
+Introduction:
+
+Some people have expressed the desire to learn whether a given IP address
+has been a Tor relay at a certain time. In addition to that, these people
+might want to know whether the IP address permitted exit to a given address
+and port.
+
+Answering these questions can be important for Tor relay operators to show
+to the authorities that an anonymous user might have conducted bad things
+with their IP address. Likewise, police investigators might be interested
+in the answer to these questions, too, in order to decide whether to
+proceed with their investigations or not.
+
+We can answer the above questions from looking at the descriptor archives
+that are available since late 2007 (or even beyond, but this script only
+works with the data format that was produced starting in October 2007).
+This script parses the directory archives to print out the answer whether
+a certain IP address was a Tor relay at a given time. The script further
+prints out all intermediate steps in answering this, so that users can
+confirm the correctness of the result themselves.
+
+This script is available in two versions written in Python and in Java with
+equivalent functionality.
+
+---------------------------------------------------------------------------
+
+Python Quick Start:
+
+In order to run the Python version of this script, you need to install and
+download the following software and data (please note that all instructions
+are written for Linux; commands for Windows or Mac OS X may vary):
+
+- Install Python 2.6.2 or higher. (Previous Python versions might work,
+ too, but have not been tested.)
+
+- Install the Python module IPy 0.62 or higher either from
+ http://pypi.python.org/pypi/IPy/ or using "apt-get install python-ipy" on
+ Debian-based systems.
+
+- Download the v3 consensuses and server descriptors of the relevant time
+ from http://metrics.torproject.org/data.html and extract them to a
+ directory in your working directory, e.g. /home/you/exonerator/data/ .
+ Don't rename the extracted directories or any of the contained files, or
+ the script won't find the contained descriptors.
+
+ Note that you only need the server descriptors if you want to learn
+ whether a given IP address permits exiting to a given target. If you
+ only want to learn whether that IP address was a Tor relay, you don't
+ need them.
+
+- Run the script, providing it with the parameters it needs:
+
+ python exonerator.py [--archive=<descriptor archive directory>]
+ <IP address in question>
+ <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
+ [<target address>[:<target port>]]
+
+ The --archive option defaults to data/ . In the following examples, it is
+ assumed that this default applies.
+
+ Make sure that the timestamp is provided in UTC, which is equivalent to
+ GMT, and not in your local timezone! Otherwise, results will very likely
+ be wrong.
+
+ A sample invocation might be:
+
+ $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
+ 209.85.129.104:80
+
+---------------------------------------------------------------------------
+
+Java Quick Start:
+
+In order to run the Java version of this script, you need to install and
+download the following software and data (please note that all instructions
+are written for Linux; commands for Windows or Mac OS X may vary):
+
+- Install Java 6 or higher.
+
+- Download the BouncyCastle provider that includes Base 64 decoding from
+ http://www.bouncycastle.org/download/bcprov-jdk16-143.jar and put it in
+ your working directory, e.g. /home/you/exonerator/ .
+
+- Download the v3 consensuses and server descriptors of the relevant time
+ from http://metrics.torproject.org/data.html and extract them to a
+ directory in your working directory, e.g. /home/you/exonerator/data/ .
+ Don't rename the extracted directories or any of the contained files, or
+ the script won't find the contained descriptors.
+
+ Note that you only need the server descriptors if you want to learn
+ whether a given IP address permits exiting to a given target. If you
+ only want to learn whether that IP address was a Tor relay, you don't
+ need them.
+
+- Compile the (single) Java class using this command:
+
+ $ javac -cp bcprov-jdk16-143.jar ExoneraTor.java
+
+- Run the script, providing it with the parameters it needs:
+
+ java -cp .:bcprov-jdk16-143.jar ExoneraTor
+ <descriptor archive directory>
+ <IP address in question>
+ <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
+ [<target address>[:<target port>]]
+
+ Make sure that the timestamp is provided in UTC, which is equivalent to
+ GMT, and not in your local timezone! Otherwise, results will very likely
+ be wrong.
+
+ A sample invocation might be:
+
+ $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+ 2009-08-15 16:05:00 209.85.129.104:80
+
+---------------------------------------------------------------------------
+
+Test cases:
+
+The following test cases work with the August 2009 archives and can be used
+to check whether this script works correctly:
+
+- Positive result of echelon1+2 being a relay:
+
+ $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
+ $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+ 2009-08-15 16:05:00
+
+- Positive result of echelon1+2 exiting to google.com on any port
+
+ $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 209.85.129.104
+ $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+ 2009-08-15 16:05:00 209.85.129.104
+
+- Positive result of echelon1+2 exiting to google.com on port 80
+
+ $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
+ 209.85.129.104:80
+ $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+ 2009-08-15 16:05:00 209.85.129.104:80
+
+- Negative result of echelon1+2 exiting to google.com, but not on port 25
+
+ $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
+ 209.85.129.104:25
+ $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+ 2009-08-15 16:05:00 209.85.129.104:25
+
+- Negative result with IP address of echelon1+2 changed in the last octet
+
+ $ python exonerator.py 209.17.171.50 2009-08-15 16:05:00
+ $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.50 \
+ 2009-08-15 16:05:00
+
diff --git a/exonerator/LICENSE b/exonerator/LICENSE
new file mode 100644
index 0000000..4bdb99d
--- /dev/null
+++ b/exonerator/LICENSE
@@ -0,0 +1,30 @@
+Copyright 2009 The Tor Project
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ * Neither the names of the copyright owners nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/exonerator/exonerator.py b/exonerator/exonerator.py
new file mode 100755
index 0000000..641d65e
--- /dev/null
+++ b/exonerator/exonerator.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python
+# Copyright 2009 The Tor Project -- see LICENSE for licensing information
+
+import binascii
+import os
+import sys
+import time
+from optparse import OptionParser
+from IPy import IP
+
+USAGE = "usage: %prog [options] <IP address in question> " \
+ "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> " \
+ "[<target address>[:<target port>]]"
+DELIMITER = "-" * 75
+
+if __name__ == '__main__':
+ # check parameters
+ parser = OptionParser(usage=USAGE)
+ parser.add_option("-a", "--archive", dest="archive", default="data/",
+ help="descriptor archive directory")
+ (options, args) = parser.parse_args()
+ if len(args) not in (3, 4):
+ parser.error("incorrect number of arguments")
+ if not os.path.isdir(options.archive):
+ parser.error("descriptor archive directory %s does not exist or " \
+ "is not a directory." % \
+ os.path.abspath(options.archive))
+ archiveDirectory = os.path.dirname(options.archive)
+ try:
+ relayIP = IP(args[0])
+ except ValueError:
+ parser.error("invalid IP address in question: '%s'" % args[0])
+ timestampStr = "%s %s" % (args[1], args[2])
+ os.environ['TZ'] = 'UTC'
+ time.tzset()
+ try:
+ timestamp = time.strptime(timestampStr, "%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ parser.error("incorrect time format: '%s'" % timestampStr)
+ # if a target is given, parse address and possibly port part of it
+ target = None
+ targetIP = None
+ targetPort = None
+ if len(args) == 4:
+ target = args[3]
+ targetParts = target.split(":")
+ try:
+ targetIP = IP(targetParts[0])
+ except ValueError:
+ parser.error("invalid target IP address in: '%s'" % args[3])
+ if len(targetParts) > 2:
+ parser.error("invalid target format: '%s'" % args[3])
+ if len(targetParts) > 1:
+ try:
+ targetPortTest = int(targetParts[1])
+ except ValueError:
+ parser.error("invalid target port number in: '%s'" % \
+ args[3])
+ if targetPortTest not in range(1, 65535):
+ parser.error("invalid target port number in: '%s'" % \
+ args[3])
+ targetPort = targetParts[1]
+
+ targetHelpStr = ""
+ if target:
+ targetHelpStr = " permitting exiting to %s" % target
+ print "\nTrying to find out whether %s was running a Tor relay at " \
+ "%s%s...\n\n%s\n" % (relayIP, timestampStr, targetHelpStr,
+ DELIMITER)
+
+ # check that we have the required archives
+ timestampTooOld = time.gmtime(time.mktime(timestamp) - 300 * 60)
+ timestampFrom = time.gmtime(time.mktime(timestamp) - 180 * 60)
+ timestampTooNew = time.gmtime(time.mktime(timestamp) + 120 * 60)
+ timestampTooOldStr = time.strftime("%Y-%m-%d %H:%M:%S",
+ timestampTooOld)
+ timestampFromStr = time.strftime("%Y-%m-%d %H:%M:%S", timestampFrom)
+ timestampTooNewStr = time.strftime("%Y-%m-%d %H:%M:%S",
+ timestampTooNew)
+ print "\nChecking that relevant archives between %s and %s are " \
+ "available..." % (timestampTooOldStr, timestampTooNewStr)
+
+ requiredDirs = set()
+ requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooOld))
+ requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooNew))
+ if target:
+ requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
+ timestampTooOld))
+ requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
+ timestampTooNew))
+
+ consensusDirs = list()
+ descriptorsDirs = list()
+ directoriesLeftToParse = list()
+ directoriesLeftToParse.append(archiveDirectory)
+
+ while directoriesLeftToParse:
+ directoryOrFile = directoriesLeftToParse.pop()
+ basename = os.path.basename(directoryOrFile)
+ if basename.startswith("consensuses-"):
+ if basename in requiredDirs:
+ requiredDirs.remove(basename)
+ consensusDirs.append(directoryOrFile)
+ elif basename.startswith("server-descriptors-"):
+ if basename in requiredDirs:
+ requiredDirs.remove(basename)
+ descriptorsDirs.append(directoryOrFile)
+ else:
+ for filename in os.listdir(directoryOrFile):
+ entry = "%s/%s" % (directoryOrFile, filename)
+ if os.path.isdir(entry):
+ directoriesLeftToParse.append(entry)
+
+ consensusDirs.sort()
+ for consensusDir in consensusDirs:
+ print " %s" % consensusDir
+ descriptorsDirs.sort()
+ for descriptorsDir in descriptorsDirs:
+ print " %s" % descriptorsDir
+
+ if requiredDirs:
+ print "\nWe are missing consensuses and/or server descriptors. " \
+ "Please download these archives and extract them to your " \
+ "data directory. Be sure NOT to rename the extracted " \
+ "directories or the contained files."
+ for requiredDir in sorted(requiredDirs):
+ print " %s.tar.bz2" % requiredDir
+ sys.exit()
+
+ # look for consensus files
+ print "\nLooking for relevant consensuses between %s and %s..." % \
+ (timestampFromStr, timestampStr)
+ tooOldConsensuses = set()
+ relevantConsensuses = set()
+ tooNewConsensuses = set()
+ directoriesLeftToParse = list(consensusDirs)
+ while directoriesLeftToParse:
+ directoryOrFile = directoriesLeftToParse.pop()
+ if os.path.isdir(directoryOrFile):
+ for filename in os.listdir(directoryOrFile):
+ entry = "%s/%s" % (directoryOrFile, filename)
+ directoriesLeftToParse.append(entry)
+ else:
+ basename = os.path.basename(directoryOrFile)
+ if (basename.endswith("consensus")):
+ consensusTime = time.strptime(basename[0:19],
+ "%Y-%m-%d-%H-%M-%S")
+ if consensusTime >= timestampTooOld and \
+ consensusTime < timestampFrom:
+ tooOldConsensuses.add(directoryOrFile)
+ elif consensusTime >= timestampFrom and \
+ consensusTime <= timestamp:
+ relevantConsensuses.add(directoryOrFile)
+ elif consensusTime > timestamp and \
+ consensusTime <= timestampTooNew:
+ tooNewConsensuses.add(directoryOrFile)
+ allConsensuses = set()
+ allConsensuses.update(tooOldConsensuses)
+ allConsensuses.update(relevantConsensuses)
+ allConsensuses.update(tooNewConsensuses)
+ if not allConsensuses:
+ print " None found!\n\n%s\n\nResult is INDECISIVE!\n\nWe " \
+ "cannot make any statement about IP address %s being a " \
+ "relay at %s or not! We did not find any relevant " \
+ "consensuses preceding the given time. This either means " \
+ "that you did not download and extract the consensus " \
+ "archives preceding the hours before the given time, or " \
+ "(in rare cases) that the directory archives are missing " \
+ "the hours before the timestamp. Please check that your " \
+ "directory archives contain consensus files of the " \
+ "interval 5:00 hours before and 2:00 hours after the time " \
+ "you are looking for.\n" % (DELIMITER, relayIP, timestampStr)
+ sys.exit()
+ for consensus in sorted(relevantConsensuses):
+ print " %s" % consensus
+
+ # parse consensuses to find descriptors belonging to the IP address
+ print "\nLooking for descriptor identifiers referenced in \"r \" " \
+ "lines in these consensuses containing IP address %s..." % \
+ relayIP
+ positiveConsensusesNoTarget = set()
+ addressesInSameNetwork = set()
+ relevantDescriptors = dict()
+ for consensus in allConsensuses:
+ if consensus in relevantConsensuses:
+ print " %s" % consensus
+ consensusFile = open(consensus, "r")
+ line = consensusFile.readline()
+ while line:
+ if line.startswith("r "):
+ address = IP(line.split(" ")[6])
+ if address == relayIP:
+ hexDesc = binascii.b2a_hex(binascii.a2b_base64(
+ line.split(" ")[3] + "=="))
+ if hexDesc not in relevantDescriptors.keys():
+ relevantDescriptors[hexDesc] = set()
+ relevantDescriptors[hexDesc].add(consensus)
+ positiveConsensusesNoTarget.add(consensus)
+ if consensus in relevantConsensuses:
+ print " \"%s\" references descriptor %s" % \
+ (line.rstrip(), hexDesc)
+ elif relayIP.overlaps(IP("%s/24" % address,
+ make_net=True)):
+ addressesInSameNetwork.add(address)
+ line = consensusFile.readline()
+ consensusFile.close()
+ if not relevantDescriptors:
+ print " None found!\n\n%s\n\nResult is NEGATIVE with moderate " \
+ "certainty!\n\nWe did not find IP address %s in any of " \
+ "the consensuses that were published between %s and " \
+ "%s.\n\nA possible reason for false negatives is that the " \
+ "relay is using a different IP address when generating a " \
+ "descriptor than for exiting to the Internet. We hope to " \
+ "provide better checks for this case in the future." % \
+ (DELIMITER, relayIP, timestampTooOldStr, timestampTooNewStr)
+ if addressesInSameNetwork:
+ print "\nThe following other IP addresses of Tor relays " \
+ "were found in the mentioned consensus files that are " \
+ "in the same /24 network and that could be related to " \
+ "IP address %s:" % relayIP
+ for addr in addressesInSameNetwork:
+ print " %s" % addr
+ print ""
+ sys.exit()
+
+ # parse router descriptors to check exit policies
+ positiveConsensuses = set()
+ missingDescriptors = set()
+ if target:
+ print "\nChecking if referenced descriptors permit exiting to " \
+ "%s..." % target
+ descriptors = relevantDescriptors.keys()
+ for desc in descriptors:
+ missingDescriptors.add(desc)
+ directoriesLeftToParse = list(descriptorsDirs)
+ while directoriesLeftToParse:
+ directoryOrFile = directoriesLeftToParse.pop()
+ if os.path.isdir(directoryOrFile):
+ for filename in os.listdir(directoryOrFile):
+ entry = "%s/%s" % (directoryOrFile, filename)
+ directoriesLeftToParse.append(entry)
+ else:
+ basename = os.path.basename(directoryOrFile)
+ for descriptor in descriptors:
+ if basename == descriptor:
+ missingDescriptors.remove(descriptor)
+ descriptorFile = open(directoryOrFile, "r")
+ line = descriptorFile.readline()
+ while line:
+ if line.startswith("reject ") or \
+ line.startswith("accept "):
+ ruleAccept = line.split()[0] == "accept"
+ ruleAddress = line.split()[1].split(":")[0]
+ if ruleAddress != "*" and not \
+ IP(ruleAddress).overlaps(targetIP):
+ # IP address does not match
+ line = descriptorFile.readline()
+ continue
+ rulePort = line.split()[1].split(":")[1]
+ if not targetPort and not ruleAccept and \
+ rulePort != "*":
+ # with no port given, we only consider
+ # reject :* rules as matching
+ line = descriptorFile.readline()
+ continue
+ if targetPort and rulePort != "*" and \
+ targetPort != rulePort:
+ # ports do not match
+ line = descriptorFile.readline()
+ continue
+ relevantMatch = False
+ for f in relevantDescriptors.get(
+ descriptor):
+ if f in relevantConsensuses:
+ relevantMatch = True
+ if relevantMatch:
+ if ruleAccept:
+ print " %s permits exiting to " \
+ "%s according to rule " \
+ "\"%s\"" % (directoryOrFile,
+ target, line.rstrip())
+ else:
+ print " %s does not permit " \
+ "exiting to %s according " \
+ "to rule \"%s\"" % \
+ (directoryOrFile,
+ target, line.rstrip())
+ if ruleAccept:
+ for consensus in \
+ relevantDescriptors.get(
+ descriptor):
+ positiveConsensuses.add(consensus)
+ break
+ line = descriptorFile.readline()
+ descriptorFile.close()
+
+ # print out result
+ matches = None
+ if target:
+ matches = positiveConsensuses
+ else:
+ matches = positiveConsensusesNoTarget
+ lastConsensus = sorted(relevantConsensuses)[len(relevantConsensuses)-1]
+ if lastConsensus in matches:
+ print "\n%s\n\nResult is POSITIVE with high certainty!\n\nWe " \
+ "found one or more relays on IP address %s%s in the most " \
+ "recent consensus preceding %s that clients were likely " \
+ "to know.\n" % (DELIMITER, relayIP, targetHelpStr,
+ timestampStr)
+ sys.exit()
+ resultIndecisive = target and len(missingDescriptors) > 0
+ if resultIndecisive:
+ print "\n%s\n\nResult is INDECISIVE!\n\nAt least one " \
+ "referenced descriptor could not be found. This is a rare " \
+ "case, but one that (apparently) happens. We cannot make " \
+ "any good statement about exit relays without these " \
+ "descriptors. The following descriptors are missing:" % \
+ DELIMITER
+ for desc in missingDescriptors:
+ print " %s" % desc
+ inOtherRelevantConsensus = False
+ inTooOldConsensuses = False
+ inTooNewConsensuses = False
+ for f in matches:
+ if f in relevantConsensuses:
+ inOtherRelevantConsensus = True
+ elif f in tooOldConsensuses:
+ inTooOldConsensuses = True
+ elif f in tooNewConsensuses:
+ inTooNewConsensuses = True
+ if inOtherRelevantConsensus:
+ if not resultIndecisive:
+ print "\n%s\n\nResult is POSITIVE with moderate certainty!" % \
+ DELIMITER
+ print "\nWe found one or more relays on IP address %s%s, but " \
+ "not in the consensus immediately preceding %s. A " \
+ "possible reason for the relay being missing in the last " \
+ "consensus preceding the given time might be that some of " \
+ "the directory authorities had difficulties connecting to " \
+ "the relay. However, clients might still have used the " \
+ "relay." % (relayIP, targetHelpStr, timestampStr)
+ else:
+ if not resultIndecisive:
+ print "\n%s\n\nResult is NEGATIVE with high certainty!" % \
+ DELIMITER
+ print "\nWe did not find any relay on IP address %s%s in the " \
+ "consensuses 3:00 hours preceding %s." % (relayIP,
+ targetHelpStr, timestampStr)
+ if inTooOldConsensuses or inTooNewConsensuses:
+ if inTooOldConsensuses and not inTooNewConsensuses:
+ print "\nNote that we found a matching relay in " \
+ "consensuses that were published between 5:00 and " \
+ "3:00 hours before %s." % timestampStr
+ elif not inTooOldConsensuses and inTooNewConsensuses:
+ print "\nNote that we found a matching relay in " \
+ "consensuses that were published up to 2:00 hours " \
+ "after %s." % timestampStr
+ else:
+ print "\nNote that we found a matching relay in " \
+ "consensuses that were published between 5:00 and " \
+ "3:00 hours before and in consensuses that were " \
+ "published up to 2:00 hours after %s." % timestampStr
+ print "Make sure that the timestamp you provided is in the " \
+ "correct timezone: UTC (or GMT)."
+ if target:
+ if not positiveConsensuses and positiveConsensusesNoTarget:
+ print "\nNote that although the found relay(s) did not " \
+ "permit exiting to %s there have been one or more " \
+ "relays running at the given time." % target
+ print ""
+
--
1.7.1
More information about the tor-commits
mailing list