[tor-commits] [collector/master] Sanitize TCP ports in bridge descriptors.
karsten at torproject.org
karsten at torproject.org
Mon Sep 19 12:25:25 UTC 2016
commit ecb053899eb965c2778cf05479c26549d67f7956
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri Jun 10 13:28:42 2016 +0200
Sanitize TCP ports in bridge descriptors.
Implements #19317.
---
CHANGELOG.md | 2 +
.../bridgedescs/SanitizedBridgesWriter.java | 81 +++++++++++++++++-----
src/main/webapp/index.html | 46 ++++++++++--
3 files changed, 107 insertions(+), 22 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24206ee..e17abad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
- Add support for Bifroest's bridge descriptor tarballs.
- Use a shutdown hook that gives currently running modules up to 10
minutes to finish properly, rather than killing them immediately.
+ - Replace TCP ports with hashes in @type bridge-network-status 1.1
+ and @type bridge-server-descriptor 1.2.
* Minor changes
- Remove quotes around base URL in index.json.
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index b787f78..b61cd30 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -138,7 +138,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
while ((line = br.readLine()) != null) {
String[] parts = line.split(",");
if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
- && line.length() != ("yyyy-MM,".length() + 50 * 2))
+ && line.length() != ("yyyy-MM,".length() + 50 * 2)
+ && line.length() != ("yyyy-MM,".length() + 83 * 2))
|| parts.length != 2) {
logger.warn("Invalid line in bridge-ip-secrets file "
+ "starting with '" + line.substring(0, 7) + "'! "
@@ -218,8 +219,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
scrubbedAddressPart = this.scrubIpv4Address(addressPart,
fingerprintBytes, published);
}
+ String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
+ published);
return (scrubbedAddressPart == null ? null :
- scrubbedAddressPart + ":" + portPart);
+ scrubbedAddressPart + ":" + scrubbedPort);
}
private String scrubIpv4Address(String address, byte[] fingerprintBytes,
@@ -334,14 +337,42 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return sb.toString();
}
+ private String scrubTcpPort(String portString, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (portString.equals("0")) {
+ return "0";
+ } else if (this.replaceIpAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more TCP
+ * ports in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[2 + 20 + 33];
+ int portNumber = Integer.parseInt(portString);
+ hashInput[0] = (byte) (portNumber >> 8);
+ hashInput[1] = (byte) portNumber;
+ System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 50, hashInput, 22, 33);
+ byte[] hashOutput = DigestUtils.sha256(hashInput);
+ int hashedPort = ((((hashOutput[0] & 0xFF) << 8)
+ | (hashOutput[1] & 0xFF)) >> 2) | 0xC000;
+ return String.valueOf(hashedPort);
+ } else {
+ return "1";
+ }
+ }
+
private byte[] getSecretForMonth(String month) throws IOException {
if (!this.secretsForHashingIpAddresses.containsKey(month)
- || this.secretsForHashingIpAddresses.get(month).length == 31) {
- byte[] secret = new byte[50];
+ || this.secretsForHashingIpAddresses.get(month).length < 83) {
+ byte[] secret = new byte[83];
this.secureRandom.nextBytes(secret);
if (this.secretsForHashingIpAddresses.containsKey(month)) {
System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
- secret, 0, 31);
+ secret, 0,
+ this.secretsForHashingIpAddresses.get(month).length);
}
if (month.compareTo(
this.bridgeSanitizingCutOffTimestamp) < 0) {
@@ -362,8 +393,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
bw.close();
} catch (IOException e) {
logger.warn("Could not store new secret "
- + "to disk! Not calculating any IP address hashes in "
- + "this execution!", e);
+ + "to disk! Not calculating any IP address or TCP port "
+ + "hashes in this execution!", e);
this.persistenceProblemWithSecrets = true;
throw new IOException(e);
}
@@ -471,11 +502,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
fingerprintBytes,
descPublicationTime);
String nickname = parts[1];
+ String scrubbedOrPort = this.scrubTcpPort(orPort,
+ fingerprintBytes, descPublicationTime);
+ String scrubbedDirPort = this.scrubTcpPort(dirPort,
+ fingerprintBytes, descPublicationTime);
scrubbed.append("r " + nickname + " "
+ hashedBridgeIdentityBase64 + " "
+ hashedDescriptorIdentifier + " " + descPublicationTime
- + " " + scrubbedAddress + " " + orPort + " " + dirPort
- + "\n");
+ + " " + scrubbedAddress + " " + scrubbedOrPort + " "
+ + scrubbedDirPort + "\n");
/* Sanitize any addresses in a lines using the fingerprint and
* descriptor publication time from the previous r line. */
@@ -555,7 +590,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
outputFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(new FileWriter(
outputFile));
- bw.write("@type bridge-network-status 1.0\n");
+ bw.write("@type bridge-network-status 1.1\n");
bw.write("published " + publicationTime + "\n");
bw.write(header.toString());
for (String scrubbed : scrubbedLines.values()) {
@@ -595,6 +630,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
String hashedBridgeIdentity = null;
String address = null;
String routerLine = null;
+ String scrubbedRouterLine = null;
String scrubbedAddress = null;
String masterKeyEd25519 = null;
List<String> orAddresses = null;
@@ -611,7 +647,12 @@ public class SanitizedBridgesWriter extends CollecTorMain {
* the bridge identity fingerprint for replacing the IP address in
* the scrubbed version. */
} else if (line.startsWith("router ")) {
- address = line.split(" ")[2];
+ String[] parts = line.split(" ");
+ if (parts.length != 6) {
+ logger.warn("Invalid router line: '" + line + "'. Skipping.");
+ return;
+ }
+ address = parts[2];
routerLine = line;
/* Store or-address parts in a list and sanitize them when we have
@@ -671,6 +712,17 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
}
+ String[] routerLineParts = routerLine.split(" ");
+ String nickname = routerLineParts[1];
+ String scrubbedOrPort = this.scrubTcpPort(routerLineParts[3],
+ fingerprintBytes, published);
+ String scrubbedDirPort = this.scrubTcpPort(routerLineParts[4],
+ fingerprintBytes, published);
+ String scrubbedSocksPort = this.scrubTcpPort(
+ routerLineParts[5], fingerprintBytes, published);
+ scrubbedRouterLine = String.format("router %s %s %s %s %s%n",
+ nickname, scrubbedAddress, scrubbedOrPort,
+ scrubbedDirPort, scrubbedSocksPort);
} catch (IOException e) {
/* There's a persistence problem, so we shouldn't scrub more
* IP addresses in this execution. */
@@ -692,10 +744,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* When we reach the signature, we're done. Write the sanitized
* descriptor to disk below. */
} else if (line.startsWith("router-signature")) {
- String[] routerLineParts = routerLine.split(" ");
- scrubbedDesc = "router " + routerLineParts[1] + " "
- + scrubbedAddress + " " + routerLineParts[3] + " "
- + routerLineParts[4] + " " + routerLineParts[5] + "\n";
+ scrubbedDesc = scrubbedRouterLine;
if (scrubbedOrAddresses != null) {
for (String scrubbedOrAddress : scrubbedOrAddresses) {
scrubbedDesc = scrubbedDesc += "or-address "
@@ -915,7 +964,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
outputFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(new FileWriter(
outputFile, appendToFile));
- bw.write("@type bridge-server-descriptor 1.1\n");
+ bw.write("@type bridge-server-descriptor 1.2\n");
bw.write(scrubbedDesc);
if (descriptorDigestSha256Base64 != null) {
bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
diff --git a/src/main/webapp/index.html b/src/main/webapp/index.html
index ccdeea2..bbc70f2 100644
--- a/src/main/webapp/index.html
+++ b/src/main/webapp/index.html
@@ -166,14 +166,14 @@
</tr>
<tr>
<td>Bridge Network Statuses</td>
- <td><tt>@type bridge-network-status 1.0</tt></td>
+ <td><tt>@type bridge-network-status 1.1</tt></td>
<td><a href="recent/bridge-descriptors/statuses/" class="btn btn-primary btn-xs">recent</a></td>
<td><a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a></td>
<td><a href="#type-bridge-network-status" class="btn btn-default btn-xs">format</a></td>
</tr>
<tr>
<td>Bridge Server Descriptors</td>
- <td><tt>@type bridge-server-descriptor 1.1</tt></td>
+ <td><tt>@type bridge-server-descriptor 1.2</tt></td>
<td><a href="recent/bridge-descriptors/server-descriptors/" class="btn btn-primary btn-xs">recent</a></td>
<td><a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a></td>
<td><a href="#type-bridge-server-descriptor" class="btn btn-default btn-xs">format</a></td>
@@ -447,6 +447,28 @@ with <tt>xx:xxxx</tt> being the hex-formatted 3 byte output of a similar
hash function as described for IPv4 addresses.
The only differences are that the input <tt>IP address</tt> is 16 bytes
long and the <tt>secret</tt> is only 19 bytes long.</li></ul>
+<li><b>Replace TCP port with TCP port hash:</b> It may be less obvious
+that TCP ports need to be sanitized, but an unusual TCP port used by a
+high-value bridge might still stand out and provide yet another way to
+locate and block the bridge.
+Therefore, each non-zero TCP port is replaced with a number in the range
+from 49152 to 65535 that is the result of
+<tt>H(port | bridge identity | secret)[:2] / 2^2 + 2^15 + 2^14</tt>
+written as decimal number.
+The input <tt>port</tt> is the 2-byte long binary representation of the
+TCP port.
+The <tt>bridge identity</tt> is the 20-byte long binary representation of
+the bridge's long-term identity fingerprint.
+The <tt>secret</tt> is a 33-byte long secure random string that changes
+once per month for all descriptors and statuses published in that month.
+<tt>H()</tt> is SHA-256.
+The <tt>[:2]</tt> operator means that we pick the 2 most significant bytes
+of the result.
+The subsequent integer division and additions make sure that sanitized
+ports are in the range from 49152 to 65535 which is reserved for private
+services.
+All operations assume inputs to be in network byte order.
+TCP ports that are 0 in the original descriptor are left unchanged.</li>
<li><b>Replace contact information:</b> If there is contact information in
a descriptor, the contact line is changed to
<tt>somebody</tt>.</li>
@@ -468,7 +490,7 @@ descriptor including all signatures.</li>
</ol>
<h3 id="type-bridge-network-status" class="hover">Bridge Network Statuses
-<small><tt>@type bridge-network-status 1.0</tt></small>
+<small><tt>@type bridge-network-status 1.1</tt></small>
<a href="recent/bridge-descriptors/statuses/" class="btn btn-primary btn-xs">recent</a>
<a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a>
<a href="#type-bridge-network-status" class="hover">#</a>
@@ -480,10 +502,19 @@ statuses, but with only a <tt>published</tt> line in the header and
without any lines in the footer.
The bridge descriptor archive tarballs contain all bridge
descriptors of a given month, not just network statuses.
+The format has changed over time to accomodate changes to the sanitizing
+process, with earlier versions being:
</p>
+<ul>
+<li><font color="#666"><tt>@type bridge-network-status 1.0</tt> was the
+first version.</font></li>
+<li><tt>@type bridge-network-status 1.1</tt> introduced sanitized TCP
+ports.</li>
+</ul>
+
<h3 id="type-bridge-server-descriptor" class="hover">Bridge Server descriptors
-<small><tt>@type bridge-server-descriptor 1.1</tt></small>
+<small><tt>@type bridge-server-descriptor 1.2</tt></small>
<a href="recent/bridge-descriptors/server-descriptors/" class="btn btn-primary btn-xs">recent</a>
<a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a>
<a href="#type-bridge-server-descriptor" class="hover">#</a>
@@ -511,9 +542,12 @@ number of sanitized bridge extra-info descriptors was raised.
As a result, there may be sanitized bridge server descriptors with version
<tt>@type bridge-server-descriptor 1.0</tt> with and without those
lines.</font></li>
-<li><tt>@type bridge-server-descriptor 1.1</tt> added
+<li><font color="#666"><tt>@type bridge-server-descriptor 1.1</tt> added
<tt>master-key-ed25519</tt> lines and <tt>router-digest-sha256</tt> to
-server descriptors published by bridges using an Ed25519 master key.</li>
+server descriptors published by bridges using an Ed25519 master
+key.</font></li>
+<li><tt>@type bridge-server-descriptor 1.2</tt> introduced sanitized TCP
+ports.</li>
</ul>
<h3 id="type-bridge-extra-info" class="hover">Bridge Extra-info Descriptors
More information about the tor-commits
mailing list