[or-cvs] [ernie/master] Handle non-ASCII char craziness in descriptors.
karsten at torproject.org
karsten at torproject.org
Thu Feb 25 13:53:19 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu, 25 Feb 2010 14:52:38 +0100
Subject: Handle non-ASCII char craziness in descriptors.
Commit: 498e096add421fe5139388c71a41bf786db86062
---
src/ArchiveWriter.java | 80 ++++++++++++++++------------------
src/CachedRelayDescriptorReader.java | 79 ++++++++++++++++++++-------------
src/RelayDescriptorDownloader.java | 80 ++++++++++++++++++++++++++--------
3 files changed, 146 insertions(+), 93 deletions(-)
diff --git a/src/ArchiveWriter.java b/src/ArchiveWriter.java
index 996db2e..7fad326 100644
--- a/src/ArchiveWriter.java
+++ b/src/ArchiveWriter.java
@@ -153,8 +153,9 @@ public class ArchiveWriter {
}
}
}
- public void store(BufferedReader br) throws IOException,
- ParseException {
+ public void store(byte[] data) throws IOException, ParseException {
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
String line = br.readLine();
if (line == null) {
this.logger.warning("Someone gave us an empty file for storing!");
@@ -220,23 +221,24 @@ public class ArchiveWriter {
String publishedTime = line.split(" ")[4] + " "
+ line.split(" ")[5];
long published = parseFormat.parse(publishedTime).getTime();
- String digest = Hex.encodeHexString(Base64.decodeBase64(
+ String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
line.split(" ")[3] + "=")).toLowerCase();
// TODO are 24 hours okay?
if (published + 24L * 60L * 60L * 1000L > now &&
!new File("directory-archive/server-descriptor/"
+ descriptorFormat.format(new Date(published))
- + digest.substring(0, 1) + "/" + digest.substring(1, 2)
- + "/" + digest).exists()) {
- if (!this.missingDescriptors.contains("server," + digest + ","
- + publishedTime)) {
+ + serverDesc.substring(0, 1) + "/"
+ + serverDesc.substring(1, 2)
+ + "/" + serverDesc).exists()) {
+ if (!this.missingDescriptors.contains("server," + serverDesc
+ + "," + publishedTime)) {
this.logger.fine("Adding server descriptor to missing list: "
- + "digest=" + digest
+ + "digest=" + serverDesc
+ ", filename=directory-archive/server-descriptor/"
+ descriptorFormat.format(new Date(published))
- + digest.substring(0, 1) + "/" + digest.substring(1, 2)
- + "/" + digest);
- this.missingDescriptors.add("server," + digest + ","
+ + serverDesc.substring(0, 1) + "/"
+ + serverDesc.substring(1, 2) + "/" + serverDesc);
+ this.missingDescriptors.add("server," + serverDesc + ","
+ publishedTime);
this.archiveWriterParseHistoryModified = true;
}
@@ -254,15 +256,10 @@ public class ArchiveWriter {
+ validAfterTime + ", filename=directory-archive/consensus/"
+ printFormat.format(new Date(validAfter)) + "-consensus");
consensusFile.getParentFile().mkdirs();
- BufferedReader br2 = new BufferedReader(new StringReader(
- sb.toString()));
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- consensusFile));
- while ((line = br2.readLine()) != null) {
- bw.write(line + "\n");
- }
- bw.close();
- br2.close();
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(consensusFile));
+ bos.write(data, 0, data.length);
+ bos.close();
this.logger.fine("Removing consensus from missing list: "
+ "valid-after=" + validAfterTime
+ ", filename=directory-archive/consensus/"
@@ -288,15 +285,10 @@ public class ArchiveWriter {
+ printFormat.format(new Date(validAfter)) + "-vote-"
+ fingerprint);
voteFile.getParentFile().mkdirs();
- BufferedReader br2 = new BufferedReader(new StringReader(
- sb.toString()));
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- voteFile));
- while ((line = br2.readLine()) != null) {
- bw.write(line + "\n");
- }
- bw.close();
- br2.close();
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(voteFile));
+ bos.write(data, 0, data.length);
+ bos.close();
this.logger.fine("Removing vote from missing list: "
+ "fingerprint=" + fingerprint + ", valid-after="
+ printFormat.format(new Date(validAfter))
@@ -320,9 +312,7 @@ public class ArchiveWriter {
boolean isServerDescriptor = line.startsWith("router ");
String publishedTime = null;
long published = -1L;
- String digest = null;
while ((line = br.readLine()) != null) {
- sb.append(line + "\n");
if (line.startsWith("published ")) {
publishedTime = line.substring("published ".length());
published = parseFormat.parse(publishedTime).getTime();
@@ -353,10 +343,21 @@ public class ArchiveWriter {
this.archiveWriterParseHistoryModified = true;
}
}
- } else if (line.equals("router-signature")) {
- digest = DigestUtils.shaHex(sb.toString()).toLowerCase();
}
}
+ String ascii = new String(data, "US-ASCII");
+ String startToken = isServerDescriptor ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start < 0 || sig < 0 || sig < start) {
+ this.logger.info("Cannot determine descriptor digest! Skipping.");
+ return;
+ }
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ String digest = DigestUtils.shaHex(forDigest);
SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
File descriptorFile = new File("directory-archive/"
@@ -373,15 +374,10 @@ public class ArchiveWriter {
+ digest.substring(0, 1) + "/" + digest.substring(1, 2)
+ "/" + digest);
descriptorFile.getParentFile().mkdirs();
- BufferedReader br2 = new BufferedReader(new StringReader(
- sb.toString()));
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- descriptorFile));
- while ((line = br2.readLine()) != null) {
- bw.write(line + "\n");
- }
- bw.close();
- br2.close();
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(descriptorFile));
+ bos.write(data, 0, data.length);
+ bos.close();
this.logger.fine("Removing " + (isServerDescriptor ?
"server descriptor" : "extra-info descriptor")
+ " from missing list: digest=" + digest
diff --git a/src/CachedRelayDescriptorReader.java b/src/CachedRelayDescriptorReader.java
index d865b5c..113f738 100644
--- a/src/CachedRelayDescriptorReader.java
+++ b/src/CachedRelayDescriptorReader.java
@@ -22,10 +22,22 @@ public class CachedRelayDescriptorReader {
}
for (File f : cachedDescDir.listFiles()) {
try {
+ // descriptors may contain non-ASCII chars; read as bytes to
+ // determine digests
+ BufferedInputStream bis =
+ new BufferedInputStream(new FileInputStream(f));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
if (f.getName().equals("cached-consensus")) {
BufferedReader br = new BufferedReader(new FileReader(f));
if (aw != null) {
- aw.store(br);
+ aw.store(allData);
}
br.close();
br = new BufferedReader(new FileReader(f));
@@ -35,41 +47,44 @@ public class CachedRelayDescriptorReader {
br.close();
} else if (f.getName().startsWith("cached-descriptors") ||
f.getName().startsWith("cached-extrainfo")) {
- BufferedReader br = new BufferedReader(new FileReader(f));
- String line = null;
- StringBuilder sb = new StringBuilder();
- while ((line = br.readLine()) != null || sb != null) {
- if (line == null && sb.length() < 1) {
- continue; // empty file?
+ String ascii = new String(allData, "US-ASCII");
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ f.getName().startsWith("cached-descriptors") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start)
+ + sigToken.length();
+ if (sig < 0) {
+ break;
}
- if (line == null || line.startsWith("router ") ||
- line.startsWith("extra-info ")) {
- if (sb.length() > 0) {
- BufferedReader storeBr = new BufferedReader(
- new StringReader(sb.toString()));
- if (aw != null) {
- aw.store(storeBr);
- }
- storeBr.close();
- storeBr = new BufferedReader(
- new StringReader(sb.toString()));
- if (rdp != null) {
- rdp.parse(storeBr);
- }
- storeBr.close();
- }
- if (line == null) {
- sb = null;
- break;
- } else {
- sb = new StringBuilder();
- }
+ end = ascii.indexOf(endToken, sig)
+ + endToken.length();
+ if (end < 0) {
+ break;
}
- if (!line.startsWith("@")) {
- sb.append(line + "\n");
+ String desc = ascii.substring(start, end);
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(allData, start, forDigest, 0, sig - start);
+ String digest = DigestUtils.shaHex(forDigest);
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ if (aw != null) {
+ aw.store(descBytes);
+ }
+ if (rdp != null) {
+ BufferedReader storeBr = new BufferedReader(
+ new StringReader(desc));
+ rdp.parse(storeBr);
+ storeBr.close();
}
}
- br.close();
logger.info("Finished reading cacheddesc/ directory.");
}
} catch (IOException e) {
diff --git a/src/RelayDescriptorDownloader.java b/src/RelayDescriptorDownloader.java
index b0f270d..c9e5ceb 100644
--- a/src/RelayDescriptorDownloader.java
+++ b/src/RelayDescriptorDownloader.java
@@ -3,6 +3,7 @@ import java.net.*;
import java.text.*;
import java.util.*;
import java.util.logging.*;
+import org.apache.commons.codec.digest.*;
/**
* Download the current consensus and relevant extra-info descriptors and
@@ -33,9 +34,12 @@ public class RelayDescriptorDownloader {
urls.addAll(aw.getMissingDescriptorUrls());
}
urls.removeAll(downloaded);
+ SortedSet<String> sortedAuthorities =
+ new TreeSet<String>(remainingAuthorities);
SortedSet<String> sortedUrls = new TreeSet<String>(urls);
- while (!remainingAuthorities.isEmpty() && !sortedUrls.isEmpty()) {
- String authority = remainingAuthorities.get(0);
+ SortedSet<String> retryUrls = new TreeSet<String>();
+ while (!sortedAuthorities.isEmpty() && !sortedUrls.isEmpty()) {
+ String authority = sortedAuthorities.first();
String url = sortedUrls.first();
try {
URL u = new URL("http://" + authority + url);
@@ -49,35 +53,73 @@ public class RelayDescriptorDownloader {
if (response == 200) {
BufferedInputStream in = new BufferedInputStream(
huc.getInputStream());
- StringBuilder sb = new StringBuilder();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
int len;
byte[] data = new byte[1024];
while ((len = in.read(data, 0, 1024)) >= 0) {
- sb.append(new String(data, 0, len));
+ // we need to write the result to a byte array in order
+ // to get a sane digest; otherwise, descriptors with
+ // non-ASCII chars lead to different digests.
+ baos.write(data, 0, len);
}
in.close();
- String result = sb.toString();
- if (rdp != null) {
- BufferedReader br = new BufferedReader(new StringReader(
- result));
- rdp.parse(br);
- br.close();
+ String digest = null;
+ byte[] allData = baos.toByteArray();
+ int beforeSig = new String(allData).indexOf(
+ "\nrouter-signature\n")
+ + "\nrouter-signature\n".length();
+ byte[] noSig = new byte[beforeSig];
+ System.arraycopy(allData, 0, noSig, 0, beforeSig);
+ digest = DigestUtils.shaHex(noSig);
+ // TODO UTF-8 may be wrong, but we don't care about the fields
+ // containing non-ASCII
+ String result = new String(allData, "UTF-8");
+ boolean verified = false;
+ if (url.contains("/tor/server/d/") ||
+ url.contains("/tor/extra/d/")) {
+ if (url.endsWith(digest)) {
+ verified = true;
+ } else {
+ logger.info("Downloaded descriptor digest (" + digest
+ + " doesn't match what we asked for (" + url + ")! "
+ + "Retrying.");
+ retryUrls.add(url);
+ }
+ } else {
+ verified = true;
+ // TODO verify downloaded consensuses and votes, too
}
- if (aw != null) {
- BufferedReader br = new BufferedReader(new StringReader(
- result));
- try {
- aw.store(br);
- } catch (Exception e) {
- e.printStackTrace();
- //TODO find better way to handle this
+ if (verified) {
+ if (rdp != null) {
+ BufferedReader br = new BufferedReader(new StringReader(
+ result));
+ rdp.parse(br);
+ br.close();
+ }
+ if (aw != null) {
+ BufferedReader br = new BufferedReader(new StringReader(
+ result));
+ try {
+ aw.store(allData);
+ } catch (Exception e) {
+ e.printStackTrace();
+ //TODO find better way to handle this
+ }
+ br.close();
}
- br.close();
}
+ } else {
+ retryUrls.add(url);
}
sortedUrls.remove(url);
+ if (sortedUrls.isEmpty()) {
+ sortedAuthorities.remove(authority);
+ sortedUrls.addAll(retryUrls);
+ retryUrls.clear();
+ }
} catch (IOException e) {
remainingAuthorities.remove(authority);
+ sortedAuthorities.remove(authority);
if (!remainingAuthorities.isEmpty()) {
logger.log(Level.INFO, "Failed downloading from "
+ authority + "!", e);
--
1.6.5
More information about the tor-commits
mailing list