[tor-commits] [metrics-tasks/master] Take database dates from directory names (#6471).
karsten at torproject.org
karsten at torproject.org
Tue Nov 6 15:33:22 UTC 2012
commit d200e5911e850748c87c5f160519b9c61b95adbd
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon Nov 5 23:10:53 2012 -0500
Take database dates from directory names (#6471).
Last modified times of .csv files do not always match publication dates.
---
.../org/torproject/task6471/ConvertExample.java | 4 +-
.../org/torproject/task6471/DatabaseImporter.java | 8 +++---
.../torproject/task6471/DatabaseImporterImpl.java | 21 ++++++++++++++-----
3 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/task-6471/java/src/org/torproject/task6471/ConvertExample.java b/task-6471/java/src/org/torproject/task6471/ConvertExample.java
index c96047c..c4ef4c6 100644
--- a/task-6471/java/src/org/torproject/task6471/ConvertExample.java
+++ b/task-6471/java/src/org/torproject/task6471/ConvertExample.java
@@ -11,7 +11,7 @@ public class ConvertExample {
System.out.print("Saving combined ASN database to disk... ");
startMillis = endMillis;
- combinedDatabase.saveCombinedDatabases("asn-2012-07-2012-10.csv");
+ combinedDatabase.saveCombinedDatabases("asn-2005-09-2012-11.csv");
endMillis = System.currentTimeMillis();
System.out.println((endMillis - startMillis) + " millis.");
startMillis = endMillis;
@@ -25,7 +25,7 @@ public class ConvertExample {
System.out.print("Saving combined city database to disk... ");
startMillis = endMillis;
- combinedDatabase.saveCombinedDatabases("city-2012-07-2012-10.csv");
+ combinedDatabase.saveCombinedDatabases("city-2009-06-2012-10.csv");
endMillis = System.currentTimeMillis();
System.out.println((endMillis - startMillis) + " millis.");
startMillis = endMillis;
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java
index 330ecea..0d4ac92 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java
@@ -32,8 +32,8 @@ public interface DatabaseImporter extends Database {
* address ranges and block numbers, and GeoLiteCity-Location.csv
* contains country codes for block numbers, among other things. Only
* the range start and end addresses and the country code are imported.
- * The database date is taken from the file modification time of the
- * GeoLiteCity-Blocks.csv file.
+ * The database date is taken from the directory name containing blocks
+ * and location file.
*
* A typical entry from the GeoLiteCity-Blocks.csv file is:
* ""3758093312","3758094335","108612""
@@ -46,8 +46,8 @@ public interface DatabaseImporter extends Database {
/**
* Import the contents of one or more Maxmind GeoIPASNum2.csv databases.
* Only the range start and end addresses and the AS number are
- * imported. The database date is taken from the file modification
- * time.
+ * imported. The database date is taken from the directory name which
+ * is expected to be yyyy-mm/, e.g., 2012-11/GeoIPASNum2.csv.
*
* A typical entry from such a database file is:
* "3758063616,3758079999,"AS9381 Wharf T&T Ltd.""
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
index 407d8ca..4d15827 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
@@ -170,10 +170,12 @@ public class DatabaseImporterImpl extends DatabaseImpl
boolean importGeoLiteCityBlocksAndLocationFiles(File blocksFile,
File locationFile) {
- long lastModifiedMillis = blocksFile.lastModified();
String databaseFileName = blocksFile.getName() + "+"
+ locationFile.getName();
- int databaseDate = (int) (lastModifiedMillis / 86400000);
+ String databaseDateString =
+ blocksFile.getParentFile().getName().substring(
+ "GeoLiteCity_".length());
+ int databaseDate = convertDateStringToNumber(databaseDateString);
this.addDatabase(databaseFileName, databaseDate);
try {
/* Parse location file first and remember country codes for given
@@ -262,17 +264,24 @@ public class DatabaseImporterImpl extends DatabaseImpl
}
private boolean importGeoIPASNum2File(File file) {
- long lastModifiedMillis = file.lastModified();
String databaseFileName = file.getName();
- int databaseDate = (int) (lastModifiedMillis / 86400000);
+ String databaseDateString =
+ file.getParentFile().getName().replaceAll("-", "") + "01";
+ int databaseDate = convertDateStringToNumber(databaseDateString);
this.addDatabase(databaseFileName, databaseDate);
try {
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
while ((line = br.readLine()) != null) {
String[] parts = line.split(",");
- long startAddress = Long.parseLong(parts[0]),
- endAddress = Long.parseLong(parts[1]);
+ try {
+ Long.parseLong(parts[0].trim());
+ Long.parseLong(parts[1].trim());
+ } catch (NumberFormatException e) {
+ System.err.println(file.getAbsolutePath() + " '" + line + "'");
+ }
+ long startAddress = Long.parseLong(parts[0].trim()),
+ endAddress = Long.parseLong(parts[1].trim());
String code = parts[2].split(" ")[0].replaceAll("\"", "");
if (!code.startsWith("AS")) {
/* Don't import illegal range. */
More information about the tor-commits
mailing list