[or-cvs] [metrics-utils/master 3/4] Support reading decompressed web server log from stdin.
karsten at torproject.org
karsten at torproject.org
Fri Sep 24 16:03:47 UTC 2010
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri, 24 Sep 2010 13:28:32 +0200
Subject: Support reading decompressed web server log from stdin.
Commit: 51c9df86f55bbcebe0bbda79a6ef4269b2a7691a
---
visitor/ChangeLog | 2 ++
visitor/HOWTO | 8 ++++++--
visitor/VisiTor.java | 49 +++++++++++++++++++++++++++++++------------------
3 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/visitor/ChangeLog b/visitor/ChangeLog
index 01c2294..e187b30 100644
--- a/visitor/ChangeLog
+++ b/visitor/ChangeLog
@@ -2,6 +2,8 @@ VisiTor change log:
Changes in version 0.0.3 - 2010-09-2?
- Support parsing .gz-compressed web server logs. Suggested by murb.
+ - Support reading decompressed web server log from stdin. Suggested by
+ murb.
Changes in version 0.0.2 - 2010-09-22
- Don't break if we're given zero exit lists.
diff --git a/visitor/HOWTO b/visitor/HOWTO
index 57d7a8f..2e1267b 100644
--- a/visitor/HOWTO
+++ b/visitor/HOWTO
@@ -58,8 +58,9 @@ for Linux and Mac OS X; commands for Windows may vary):
$ javac VisiTor.java
- Run the Java application, providing it with the parameters it needs.
- Note that the fourth parameter that writes out the server log part with
- Tor user requests is optional:
+ Passing '-' (without quotes) as web server log file name means that the
+ web server log will be read from stdin. Note that the fourth parameter
+ that writes out the server log part with Tor user requests is optional:
java VisiTor <web server log> <exit list directory> <output file>
[<server log part with Tor user requests>]
@@ -70,6 +71,9 @@ for Linux and Mac OS X; commands for Windows may vary):
$ java VisiTor access_log.gz exitlists/ out.csv tor_access_log
+ $ gunzip -c access_log.gz | java VisiTor - exitlists/ out.csv \
+ tor_access_log
+
- Find the results in /home/you/visitor/out.csv in a format that can be
imported by any spreadsheet application like OpenOffice.org Calc or
processed by R.
diff --git a/visitor/VisiTor.java b/visitor/VisiTor.java
index 624fd3a..2a9fb1e 100644
--- a/visitor/VisiTor.java
+++ b/visitor/VisiTor.java
@@ -48,32 +48,45 @@ public final class VisiTor {
/* Read the first line of the web server log to let the user know
* early if we think we can't parse it. */
- System.out.print("Reading the first line of your web server log '"
- + webServerLog + "' to see if we can parse it... ");
+ System.out.print("Reading the first line of your web server log "
+ + (webServerLog.equals("-") ? "from stdin" : "'" + webServerLog
+ + "'") + " to see if we can parse it... ");
SimpleDateFormat logFormat = new SimpleDateFormat(
"[dd/MMM/yyyy:HH:mm:ss Z]");
logFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
Pattern ipAddressPattern = Pattern.compile("(\\d+\\.){3}\\d+");
BufferedReader webServerLogReader = null;
String logLine = null;
- File logFile = new File(webServerLog);
- if (!logFile.exists()) {
- System.out.println("FAILED\nFile does not exist! Exiting!");
- return;
- }
- try {
- if (webServerLog.endsWith(".gz")) {
+ if (webServerLog.equals("-")) {
+ try {
webServerLogReader = new BufferedReader(new InputStreamReader(
- new GZIPInputStream(new FileInputStream(webServerLog))));
- } else {
- webServerLogReader = new BufferedReader(new FileReader(
- webServerLog));
+ System.in));
+ logLine = webServerLogReader.readLine();
+ } catch (IOException e) {
+ System.out.println("FAILED\nCould not read from stdin! Exiting!");
+ e.printStackTrace();
+ return;
+ }
+ } else {
+ File logFile = new File(webServerLog);
+ if (!logFile.exists()) {
+ System.out.println("FAILED\nFile does not exist! Exiting!");
+ return;
+ }
+ try {
+ if (webServerLog.endsWith(".gz")) {
+ webServerLogReader = new BufferedReader(new InputStreamReader(
+ new GZIPInputStream(new FileInputStream(webServerLog))));
+ } else {
+ webServerLogReader = new BufferedReader(new FileReader(
+ webServerLog));
+ }
+ logLine = webServerLogReader.readLine();
+ } catch (IOException e) {
+ System.out.println("FAILED\nCould not read file! Exiting!");
+ e.printStackTrace();
+ return;
}
- logLine = webServerLogReader.readLine();
- } catch (IOException e) {
- System.out.println("FAILED\nCould not read file! Exiting!");
- e.printStackTrace();
- return;
}
if (logLine == null) {
System.out.println("FAILED\nLog file is empty! Exiting!");
--
1.7.1
More information about the tor-commits
mailing list