[tor-commits] [metrics-tasks/master] Add option to write .sql files by desc publication hour (#8462).
karsten at torproject.org
karsten at torproject.org
Mon May 6 14:59:54 UTC 2013
commit eb7493e40144b11149410e83fe721487e6a9ac97
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon May 6 12:14:32 2013 +0200
Add option to write .sql files by desc publication hour (#8462).
---
task-8462/README | 7 ++-
task-8462/run-userstats.sh | 2 +
task-8462/src/Parse.java | 100 +++++++++++++++++++++++++------------------
3 files changed, 64 insertions(+), 45 deletions(-)
diff --git a/task-8462/README b/task-8462/README
index 9fd3976..0f34661 100644
--- a/task-8462/README
+++ b/task-8462/README
@@ -50,9 +50,10 @@ decompressing (but not extracting them) using bunzip2:
- in/relay-descriptors/ (consensuses-*.tar and extra-infos-*.tar)
- in/bridge-descriptors/ (bridge-descriptors-*.tar)
-Also comment out the rsync command in run-userstats.sh. Then run
-run-userstats.sh. After initializing the database, clean up the in/ and
-out/ directory and don't forget to put back the rsync command in
+Also comment out the rsync command in run-userstats.sh and add a
+--stats-date parameter to the java line (see commented out line). Then
+run run-userstats.sh. After initializing the database, clean up the in/
+and out/ directory and don't forget to put back the rsync command in
run-userstats.sh. It may be easier to set up separate instances of this
tool for initializing the database and for running it on a regular basis.
diff --git a/task-8462/run-userstats.sh b/task-8462/run-userstats.sh
index 9a759ee..73f9e3b 100644
--- a/task-8462/run-userstats.sh
+++ b/task-8462/run-userstats.sh
@@ -6,6 +6,8 @@ rsync -arz --delete --exclude 'relay-descriptors/votes' metrics.torproject.org::
echo `date` "Parsing descriptors."
javac -d bin/ -cp lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar src/Parse.java
java -cp bin/:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar Parse
+#java -cp bin/:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar Parse --stats-date
+#java -cp bin/:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar Parse --desc-hour
for i in $(ls out/*.sql)
do
echo `date` "Importing $i."
diff --git a/task-8462/src/Parse.java b/task-8462/src/Parse.java
index fdf9bf2..1b81d96 100644
--- a/task-8462/src/Parse.java
+++ b/task-8462/src/Parse.java
@@ -6,12 +6,10 @@ import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
-import java.util.Stack;
import java.util.TimeZone;
import java.util.TreeMap;
@@ -28,28 +26,28 @@ import org.torproject.descriptor.RelayNetworkStatusConsensus;
public class Parse {
public static void main(String[] args) throws Exception {
- detectBulkOrRegular();
+ parseArgs(args);
parseRelayDescriptors();
parseBridgeDescriptors();
closeOutputFiles();
}
- private static boolean isBulkImport = false;
- private static void detectBulkOrRegular() {
- Stack<File> inFiles = new Stack<File>();
- inFiles.add(new File("in"));
- while (!inFiles.isEmpty()) {
- File file = inFiles.pop();
- if (file.isDirectory()) {
- inFiles.addAll(Arrays.asList(file.listFiles()));
- } else if (file.getName().endsWith(".tar") ||
- file.getName().endsWith(".tar.bz2")) {
- isBulkImport = true;
- break;
- } else {
- isBulkImport = false;
- break;
- }
+ private static boolean writeToSingleFile = true;
+ private static boolean byStatsDateNotByDescHour = false;
+
+ private static void parseArgs(String[] args) {
+ if (args.length == 0) {
+ writeToSingleFile = true;
+ } else if (args.length == 1 && args[0].equals("--stats-date")) {
+ writeToSingleFile = false;
+ byStatsDateNotByDescHour = true;
+ } else if (args.length == 1 && args[0].equals("--desc-hour")) {
+ writeToSingleFile = false;
+ byStatsDateNotByDescHour = false;
+ } else {
+ System.err.println("Usage: java " + Parse.class.getName()
+ + " [ --stats-date | --desc-hour ]");
+ System.exit(1);
}
}
@@ -126,10 +124,12 @@ public class Parse {
double reqs = ((double) e.getValue()) - 4.0;
sum += reqs;
writeOutputLine(fingerprint, "relay", "responses", country,
- "", "", fromMillis, toMillis, reqs * intervalFraction);
+ "", "", fromMillis, toMillis, reqs * intervalFraction,
+ publishedMillis);
}
writeOutputLine(fingerprint, "relay", "responses", "", "",
- "", fromMillis, toMillis, sum * intervalFraction);
+ "", fromMillis, toMillis, sum * intervalFraction,
+ publishedMillis);
}
}
@@ -171,7 +171,7 @@ public class Parse {
break;
}
writeOutputLine(fingerprint, "relay", "bytes", "", "", "",
- fromMillis, toMillis, writtenBytes);
+ fromMillis, toMillis, writtenBytes, publishedMillis);
}
}
}
@@ -186,7 +186,7 @@ public class Parse {
toUpperCase();
if (statusEntry.getFlags().contains("Running")) {
writeOutputLine(fingerprint, "relay", "status", "", "", "",
- fromMillis, toMillis, 0.0);
+ fromMillis, toMillis, 0.0, fromMillis);
}
}
}
@@ -262,14 +262,17 @@ public class Parse {
double intervalFraction = ((double) (toMillis - fromMillis))
/ ((double) dirreqStatsIntervalLengthMillis);
writeOutputLine(fingerprint, "bridge", "responses", "", "",
- "", fromMillis, toMillis, resp * intervalFraction);
+ "", fromMillis, toMillis, resp * intervalFraction,
+ publishedMillis);
parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
- dirreqStatsIntervalLengthMillis, "country", bridgeIps);
+ dirreqStatsIntervalLengthMillis, "country", bridgeIps,
+ publishedMillis);
parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
dirreqStatsIntervalLengthMillis, "transport",
- bridgeIpTransports);
+ bridgeIpTransports, publishedMillis);
parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
- dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions);
+ dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions,
+ publishedMillis);
}
}
}
@@ -277,7 +280,8 @@ public class Parse {
private static void parseBridgeRespByCategory(String fingerprint,
long fromMillis, long toMillis, double resp,
long dirreqStatsIntervalLengthMillis, String category,
- SortedMap<String, Integer> frequencies) throws IOException {
+ SortedMap<String, Integer> frequencies, long publishedMillis)
+ throws IOException {
double total = 0.0;
SortedMap<String, Double> frequenciesCopy =
new TreeMap<String, Double>();
@@ -310,13 +314,13 @@ public class Parse {
double val = resp * intervalFraction * e.getValue() / total;
if (category.equals("country")) {
writeOutputLine(fingerprint, "bridge", "responses", e.getKey(),
- "", "", fromMillis, toMillis, val);
+ "", "", fromMillis, toMillis, val, publishedMillis);
} else if (category.equals("transport")) {
writeOutputLine(fingerprint, "bridge", "responses", "",
- e.getKey(), "", fromMillis, toMillis, val);
+ e.getKey(), "", fromMillis, toMillis, val, publishedMillis);
} else if (category.equals("version")) {
writeOutputLine(fingerprint, "bridge", "responses", "", "",
- e.getKey(), fromMillis, toMillis, val);
+ e.getKey(), fromMillis, toMillis, val, publishedMillis);
}
}
}
@@ -359,7 +363,7 @@ public class Parse {
break;
}
writeOutputLine(fingerprint, "bridge", "bytes", "",
- "", "", fromMillis, toMillis, writtenBytes);
+ "", "", fromMillis, toMillis, writtenBytes, publishedMillis);
}
}
}
@@ -370,8 +374,9 @@ public class Parse {
> ONE_HOUR_MILLIS / 2) {
return;
}
- long fromMillis = (status.getPublishedMillis()
- / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS;
+ long publishedMillis = status.getPublishedMillis();
+ long fromMillis = (publishedMillis / ONE_HOUR_MILLIS)
+ * ONE_HOUR_MILLIS;
long toMillis = fromMillis + ONE_HOUR_MILLIS;
for (NetworkStatusEntry statusEntry :
status.getStatusEntries().values()) {
@@ -379,7 +384,7 @@ public class Parse {
toUpperCase();
if (statusEntry.getFlags().contains("Running")) {
writeOutputLine(fingerprint, "bridge", "status", "", "", "",
- fromMillis, toMillis, 0.0);
+ fromMillis, toMillis, 0.0, publishedMillis);
}
}
}
@@ -388,13 +393,14 @@ public class Parse {
new HashMap<String, BufferedWriter>();
private static void writeOutputLine(String fingerprint, String node,
String metric, String country, String transport, String version,
- long fromMillis, long toMillis, double val) throws IOException {
+ long fromMillis, long toMillis, double val, long publishedMillis)
+ throws IOException {
if (fromMillis > toMillis) {
return;
}
String fromDateTime = formatDateTimeMillis(fromMillis);
String toDateTime = formatDateTimeMillis(toMillis);
- BufferedWriter bw = getOutputFile(fromDateTime);
+ BufferedWriter bw = getOutputFile(fromDateTime, publishedMillis);
bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\n",
fingerprint, node, metric, country, transport, version,
fromDateTime, toDateTime, val));
@@ -410,11 +416,21 @@ public class Parse {
return dateTimeFormat.format(millis);
}
- private static BufferedWriter getOutputFile(String fromDateTime)
- throws IOException {
- String outputFileName = isBulkImport
- ? "out/userstats-" + fromDateTime.substring(0, 10) + ".sql"
- : "out/userstats.sql";
+ private static BufferedWriter getOutputFile(String fromDateTime,
+ long publishedMillis) throws IOException {
+ String outputFileName;
+ if (writeToSingleFile) {
+ outputFileName = "out/userstats.sql";
+ } else if (byStatsDateNotByDescHour) {
+ outputFileName = "out/userstats-" + fromDateTime.substring(0, 10)
+ + ".sql";
+ } else {
+ String publishedHourDateTime = formatDateTimeMillis(
+ (publishedMillis / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS);
+ outputFileName = "out/userstats-"
+ + publishedHourDateTime.substring(0, 10) + "-"
+ + publishedHourDateTime.substring(11, 13) + ".sql";
+ }
BufferedWriter bw = openOutputFiles.get(outputFileName);
if (bw == null) {
bw = openOutputFile(outputFileName);
More information about the tor-commits
mailing list