[or-cvs] [metrics-utils/master 2/4] Support parsing .gz-compressed web server logs.
karsten at torproject.org
karsten at torproject.org
Fri Sep 24 16:03:47 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri, 24 Sep 2010 13:07:59 +0200
Subject: Support parsing .gz-compressed web server logs.
Commit: fb018b0b006a72d455c9d379ae4710984c96c56d
---
visitor/ChangeLog | 3 +++
visitor/HOWTO | 8 +++++---
visitor/VisiTor.java | 10 ++++++++--
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/visitor/ChangeLog b/visitor/ChangeLog
index 8b00c97..01c2294 100644
--- a/visitor/ChangeLog
+++ b/visitor/ChangeLog
@@ -1,5 +1,8 @@
VisiTor change log:
+Changes in version 0.0.3 - 2010-09-2?
+ - Support parsing .gz-compressed web server logs. Suggested by murb.
+
Changes in version 0.0.2 - 2010-09-22
- Don't break if we're given zero exit lists.
- If we saw zero requests on a day, write "0", not "NA". Only write "NA"
diff --git a/visitor/HOWTO b/visitor/HOWTO
index 85361ac..57d7a8f 100644
--- a/visitor/HOWTO
+++ b/visitor/HOWTO
@@ -50,8 +50,8 @@ for Linux and Mac OS X; commands for Windows may vary):
Note that as of August 2010, one month of exit lists is 20M compressed
and 168M uncompressed.
-- Put your web server log in your working directory, too, e.g.
- /home/you/visitor/access_log .
+- Put your .gz-compressed or decompressed web server log in your working
+ directory, too, e.g. /home/you/visitor/access_log.gz .
- Compile the (single) Java class using this command:
@@ -64,10 +64,12 @@ for Linux and Mac OS X; commands for Windows may vary):
java VisiTor <web server log> <exit list directory> <output file>
[<server log part with Tor user requests>]
- A sample invocation might be:
+ Sample invocations might be:
$ java VisiTor access_log exitlists/ out.csv tor_access_log
+ $ java VisiTor access_log.gz exitlists/ out.csv tor_access_log
+
- Find the results in /home/you/visitor/out.csv in a format that can be
imported by any spreadsheet application like OpenOffice.org Calc or
processed by R.
diff --git a/visitor/VisiTor.java b/visitor/VisiTor.java
index 0af583d..624fd3a 100644
--- a/visitor/VisiTor.java
+++ b/visitor/VisiTor.java
@@ -5,6 +5,7 @@ import java.io.*;
import java.text.*;
import java.util.*;
import java.util.regex.*;
+import java.util.zip.*;
public final class VisiTor {
@@ -61,8 +62,13 @@ public final class VisiTor {
return;
}
try {
- webServerLogReader = new BufferedReader(new FileReader(
- webServerLog));
+ if (webServerLog.endsWith(".gz")) {
+ webServerLogReader = new BufferedReader(new InputStreamReader(
+ new GZIPInputStream(new FileInputStream(webServerLog))));
+ } else {
+ webServerLogReader = new BufferedReader(new FileReader(
+ webServerLog));
+ }
logLine = webServerLogReader.readLine();
} catch (IOException e) {
System.out.println("FAILED\nCould not read file! Exiting!");
--
1.7.1
More information about the tor-commits
mailing list