[tor-commits] [metrics-tasks/master] Add option to write .sql files by desc publication hour (#8462).

karsten at torproject.org karsten at torproject.org
Mon May 6 14:59:54 UTC 2013


commit eb7493e40144b11149410e83fe721487e6a9ac97
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon May 6 12:14:32 2013 +0200

    Add option to write .sql files by desc publication hour (#8462).
---
 task-8462/README           |    7 ++-
 task-8462/run-userstats.sh |    2 +
 task-8462/src/Parse.java   |  100 +++++++++++++++++++++++++------------------
 3 files changed, 64 insertions(+), 45 deletions(-)

diff --git a/task-8462/README b/task-8462/README
index 9fd3976..0f34661 100644
--- a/task-8462/README
+++ b/task-8462/README
@@ -50,9 +50,10 @@ decompressing (but not extracting them) using bunzip2:
   - in/relay-descriptors/    (consensuses-*.tar and extra-infos-*.tar)
   - in/bridge-descriptors/   (bridge-descriptors-*.tar)
 
-Also comment out the rsync command in run-userstats.sh.  Then run
-run-userstats.sh.  After initializing the database, clean up the in/ and
-out/ directory and don't forget to put back the rsync command in
+Also comment out the rsync command in run-userstats.sh and add a
+--stats-date parameter to the java line (see commented out line).  Then
+run run-userstats.sh.  After initializing the database, clean up the in/
+and out/ directory and don't forget to put back the rsync command in
 run-userstats.sh.  It may be easier to set up separate instances of this
 tool for initializing the database and for running it on a regular basis.
 
diff --git a/task-8462/run-userstats.sh b/task-8462/run-userstats.sh
index 9a759ee..73f9e3b 100644
--- a/task-8462/run-userstats.sh
+++ b/task-8462/run-userstats.sh
@@ -6,6 +6,8 @@ rsync -arz --delete --exclude 'relay-descriptors/votes' metrics.torproject.org::
 echo `date` "Parsing descriptors."
 javac -d bin/ -cp lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar src/Parse.java
 java -cp bin/:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar Parse
+#java -cp bin/:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar Parse --stats-date
+#java -cp bin/:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar:lib/descriptor.jar Parse --desc-hour
 for i in $(ls out/*.sql)
 do
   echo `date` "Importing $i."
diff --git a/task-8462/src/Parse.java b/task-8462/src/Parse.java
index fdf9bf2..1b81d96 100644
--- a/task-8462/src/Parse.java
+++ b/task-8462/src/Parse.java
@@ -6,12 +6,10 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.text.SimpleDateFormat;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.SortedMap;
-import java.util.Stack;
 import java.util.TimeZone;
 import java.util.TreeMap;
 
@@ -28,28 +26,28 @@ import org.torproject.descriptor.RelayNetworkStatusConsensus;
 public class Parse {
 
   public static void main(String[] args) throws Exception {
-    detectBulkOrRegular();
+    parseArgs(args);
     parseRelayDescriptors();
     parseBridgeDescriptors();
     closeOutputFiles();
   }
 
-  private static boolean isBulkImport = false;
-  private static void detectBulkOrRegular() {
-    Stack<File> inFiles = new Stack<File>();
-    inFiles.add(new File("in"));
-    while (!inFiles.isEmpty()) {
-      File file = inFiles.pop();
-      if (file.isDirectory()) {
-        inFiles.addAll(Arrays.asList(file.listFiles()));
-      } else if (file.getName().endsWith(".tar") ||
-          file.getName().endsWith(".tar.bz2")) {
-        isBulkImport = true;
-        break;
-      } else {
-        isBulkImport = false;
-        break;
-      }
+  private static boolean writeToSingleFile = true;
+  private static boolean byStatsDateNotByDescHour = false;
+
+  private static void parseArgs(String[] args) {
+    if (args.length == 0) {
+      writeToSingleFile = true;
+    } else if (args.length == 1 && args[0].equals("--stats-date")) {
+      writeToSingleFile = false;
+      byStatsDateNotByDescHour = true;
+    } else if (args.length == 1 && args[0].equals("--desc-hour")) {
+      writeToSingleFile = false;
+      byStatsDateNotByDescHour = false;
+    } else {
+      System.err.println("Usage: java " + Parse.class.getName()
+          + " [ --stats-date | --desc-hour ]");
+      System.exit(1);
     }
   }
 
@@ -126,10 +124,12 @@ public class Parse {
         double reqs = ((double) e.getValue()) - 4.0;
         sum += reqs;
         writeOutputLine(fingerprint, "relay", "responses", country,
-            "", "", fromMillis, toMillis, reqs * intervalFraction);
+            "", "", fromMillis, toMillis, reqs * intervalFraction,
+            publishedMillis);
       }
       writeOutputLine(fingerprint, "relay", "responses", "", "",
-          "", fromMillis, toMillis, sum * intervalFraction);
+          "", fromMillis, toMillis, sum * intervalFraction,
+          publishedMillis);
     }
   }
 
@@ -171,7 +171,7 @@ public class Parse {
           break;
         }
         writeOutputLine(fingerprint, "relay", "bytes", "", "", "",
-            fromMillis, toMillis, writtenBytes);
+            fromMillis, toMillis, writtenBytes, publishedMillis);
       }
     }
   }
@@ -186,7 +186,7 @@ public class Parse {
           toUpperCase();
       if (statusEntry.getFlags().contains("Running")) {
         writeOutputLine(fingerprint, "relay", "status", "", "", "",
-            fromMillis, toMillis, 0.0);
+            fromMillis, toMillis, 0.0, fromMillis);
       }
     }
   }
@@ -262,14 +262,17 @@ public class Parse {
         double intervalFraction = ((double) (toMillis - fromMillis))
             / ((double) dirreqStatsIntervalLengthMillis);
         writeOutputLine(fingerprint, "bridge", "responses", "", "",
-            "", fromMillis, toMillis, resp * intervalFraction);
+            "", fromMillis, toMillis, resp * intervalFraction,
+            publishedMillis);
         parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
-            dirreqStatsIntervalLengthMillis, "country", bridgeIps);
+            dirreqStatsIntervalLengthMillis, "country", bridgeIps,
+            publishedMillis);
         parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
             dirreqStatsIntervalLengthMillis, "transport",
-            bridgeIpTransports);
+            bridgeIpTransports, publishedMillis);
         parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp,
-            dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions);
+            dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions,
+            publishedMillis);
       }
     }
   }
@@ -277,7 +280,8 @@ public class Parse {
   private static void parseBridgeRespByCategory(String fingerprint,
       long fromMillis, long toMillis, double resp,
       long dirreqStatsIntervalLengthMillis, String category,
-      SortedMap<String, Integer> frequencies) throws IOException {
+      SortedMap<String, Integer> frequencies, long publishedMillis)
+      throws IOException {
     double total = 0.0;
     SortedMap<String, Double> frequenciesCopy =
         new TreeMap<String, Double>();
@@ -310,13 +314,13 @@ public class Parse {
       double val = resp * intervalFraction * e.getValue() / total;
       if (category.equals("country")) {
         writeOutputLine(fingerprint, "bridge", "responses", e.getKey(),
-            "", "", fromMillis, toMillis, val);
+            "", "", fromMillis, toMillis, val, publishedMillis);
       } else if (category.equals("transport")) {
         writeOutputLine(fingerprint, "bridge", "responses", "",
-            e.getKey(), "", fromMillis, toMillis, val);
+            e.getKey(), "", fromMillis, toMillis, val, publishedMillis);
       } else if (category.equals("version")) {
         writeOutputLine(fingerprint, "bridge", "responses", "", "",
-            e.getKey(), fromMillis, toMillis, val);
+            e.getKey(), fromMillis, toMillis, val, publishedMillis);
       }
     }
   }
@@ -359,7 +363,7 @@ public class Parse {
           break;
         }
         writeOutputLine(fingerprint, "bridge", "bytes", "",
-            "", "", fromMillis, toMillis, writtenBytes);
+            "", "", fromMillis, toMillis, writtenBytes, publishedMillis);
       }
     }
   }
@@ -370,8 +374,9 @@ public class Parse {
         > ONE_HOUR_MILLIS / 2) {
       return;
     }
-    long fromMillis = (status.getPublishedMillis()
-        / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS;
+    long publishedMillis = status.getPublishedMillis();
+    long fromMillis = (publishedMillis / ONE_HOUR_MILLIS)
+        * ONE_HOUR_MILLIS;
     long toMillis = fromMillis + ONE_HOUR_MILLIS;
     for (NetworkStatusEntry statusEntry :
         status.getStatusEntries().values()) {
@@ -379,7 +384,7 @@ public class Parse {
           toUpperCase();
       if (statusEntry.getFlags().contains("Running")) {
         writeOutputLine(fingerprint, "bridge", "status", "", "", "",
-            fromMillis, toMillis, 0.0);
+            fromMillis, toMillis, 0.0, publishedMillis);
       }
     }
   }
@@ -388,13 +393,14 @@ public class Parse {
       new HashMap<String, BufferedWriter>();
   private static void writeOutputLine(String fingerprint, String node,
       String metric, String country, String transport, String version,
-      long fromMillis, long toMillis, double val) throws IOException {
+      long fromMillis, long toMillis, double val, long publishedMillis)
+      throws IOException {
     if (fromMillis > toMillis) {
       return;
     }
     String fromDateTime = formatDateTimeMillis(fromMillis);
     String toDateTime = formatDateTimeMillis(toMillis);
-    BufferedWriter bw = getOutputFile(fromDateTime);
+    BufferedWriter bw = getOutputFile(fromDateTime, publishedMillis);
     bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\n",
         fingerprint, node, metric, country, transport, version,
         fromDateTime, toDateTime, val));
@@ -410,11 +416,21 @@ public class Parse {
     return dateTimeFormat.format(millis);
   }
 
-  private static BufferedWriter getOutputFile(String fromDateTime)
-      throws IOException {
-    String outputFileName = isBulkImport
-        ? "out/userstats-" + fromDateTime.substring(0, 10) + ".sql"
-        : "out/userstats.sql";
+  private static BufferedWriter getOutputFile(String fromDateTime,
+      long publishedMillis) throws IOException {
+    String outputFileName;
+    if (writeToSingleFile) {
+      outputFileName = "out/userstats.sql";
+    } else if (byStatsDateNotByDescHour) {
+      outputFileName = "out/userstats-" + fromDateTime.substring(0, 10)
+          + ".sql";
+    } else {
+      String publishedHourDateTime = formatDateTimeMillis(
+          (publishedMillis / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS);
+      outputFileName = "out/userstats-"
+          + publishedHourDateTime.substring(0, 10) + "-"
+          + publishedHourDateTime.substring(11, 13) + ".sql";
+    }
     BufferedWriter bw = openOutputFiles.get(outputFileName);
     if (bw == null) {
       bw = openOutputFile(outputFileName);



More information about the tor-commits mailing list