[tor-commits] [metrics-tasks/master] Take database dates from directory names (#6471).

karsten at torproject.org karsten at torproject.org
Tue Nov 6 15:33:22 UTC 2012


commit d200e5911e850748c87c5f160519b9c61b95adbd
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Nov 5 23:10:53 2012 -0500

    Take database dates from directory names (#6471).
    
    Last modified times of .csv files do not always match publication dates.
---
 .../org/torproject/task6471/ConvertExample.java    |    4 +-
 .../org/torproject/task6471/DatabaseImporter.java  |    8 +++---
 .../torproject/task6471/DatabaseImporterImpl.java  |   21 ++++++++++++++-----
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/task-6471/java/src/org/torproject/task6471/ConvertExample.java b/task-6471/java/src/org/torproject/task6471/ConvertExample.java
index c96047c..c4ef4c6 100644
--- a/task-6471/java/src/org/torproject/task6471/ConvertExample.java
+++ b/task-6471/java/src/org/torproject/task6471/ConvertExample.java
@@ -11,7 +11,7 @@ public class ConvertExample {
 
     System.out.print("Saving combined ASN database to disk... ");
     startMillis = endMillis;
-    combinedDatabase.saveCombinedDatabases("asn-2012-07-2012-10.csv");
+    combinedDatabase.saveCombinedDatabases("asn-2005-09-2012-11.csv");
     endMillis = System.currentTimeMillis();
     System.out.println((endMillis - startMillis) + " millis.");
     startMillis = endMillis;
@@ -25,7 +25,7 @@ public class ConvertExample {
 
     System.out.print("Saving combined city database to disk... ");
     startMillis = endMillis;
-    combinedDatabase.saveCombinedDatabases("city-2012-07-2012-10.csv");
+    combinedDatabase.saveCombinedDatabases("city-2009-06-2012-10.csv");
     endMillis = System.currentTimeMillis();
     System.out.println((endMillis - startMillis) + " millis.");
     startMillis = endMillis;
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java
index 330ecea..0d4ac92 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java
@@ -32,8 +32,8 @@ public interface DatabaseImporter extends Database {
    * address ranges and block numbers, and GeoLiteCity-Location.csv
    * contains country codes for block numbers, among other things.  Only
    * the range start and end addresses and the country code are imported.
-   * The database date is taken from the file modification time of the
-   * GeoLiteCity-Blocks.csv file.
+   * The database date is taken from the directory name containing blocks
+   * and location file.
    *
    * A typical entry from the GeoLiteCity-Blocks.csv file is:
    *   ""3758093312","3758094335","108612""
@@ -46,8 +46,8 @@ public interface DatabaseImporter extends Database {
   /**
    * Import the contents of one or more Maxmind GeoIPASNum2.csv databases.
    * Only the range start and end addresses and the AS number are
-   * imported.  The database date is taken from the file modification
-   * time.
+   * imported.  The database date is taken from the directory name which
+   * is expected to be yyyy-mm/, e.g., 2012-11/GeoIPASNum2.csv.
    *
    * A typical entry from such a database file is:
    *   "3758063616,3758079999,"AS9381 Wharf T&T Ltd.""
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
index 407d8ca..4d15827 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
@@ -170,10 +170,12 @@ public class DatabaseImporterImpl extends DatabaseImpl
 
   boolean importGeoLiteCityBlocksAndLocationFiles(File blocksFile,
       File locationFile) {
-    long lastModifiedMillis = blocksFile.lastModified();
     String databaseFileName = blocksFile.getName() + "+"
         + locationFile.getName();
-    int databaseDate = (int) (lastModifiedMillis / 86400000);
+    String databaseDateString =
+        blocksFile.getParentFile().getName().substring(
+        "GeoLiteCity_".length());
+    int databaseDate = convertDateStringToNumber(databaseDateString);
     this.addDatabase(databaseFileName, databaseDate);
     try {
       /* Parse location file first and remember country codes for given
@@ -262,17 +264,24 @@ public class DatabaseImporterImpl extends DatabaseImpl
   }
 
   private boolean importGeoIPASNum2File(File file) {
-    long lastModifiedMillis = file.lastModified();
     String databaseFileName = file.getName();
-    int databaseDate = (int) (lastModifiedMillis / 86400000);
+    String databaseDateString =
+        file.getParentFile().getName().replaceAll("-", "") + "01";
+    int databaseDate = convertDateStringToNumber(databaseDateString);
     this.addDatabase(databaseFileName, databaseDate);
     try {
       BufferedReader br = new BufferedReader(new FileReader(file));
       String line;
       while ((line = br.readLine()) != null) {
         String[] parts = line.split(",");
-        long startAddress = Long.parseLong(parts[0]),
-            endAddress = Long.parseLong(parts[1]);
+        try {
+          Long.parseLong(parts[0].trim());
+          Long.parseLong(parts[1].trim());
+        } catch (NumberFormatException e) {
+          System.err.println(file.getAbsolutePath() + " '" + line + "'");
+        }
+        long startAddress = Long.parseLong(parts[0].trim()),
+            endAddress = Long.parseLong(parts[1].trim());
         String code = parts[2].split(" ")[0].replaceAll("\"", "");
         if (!code.startsWith("AS")) {
           /* Don't import illegal range. */





More information about the tor-commits mailing list