[tor-commits] [metrics-lib/master] Add log line interfaces and access methods.
karsten at torproject.org
karsten at torproject.org
Fri Feb 16 09:06:04 UTC 2018
commit 59689a9fa4c162378f347902eb68e4c21ccf0043
Author: iwakeh <iwakeh at torproject.org>
Date: Tue Feb 6 14:59:05 2018 +0000
Add log line interfaces and access methods.
For both the general LogDescriptor and extension WebServerAccessLog.
Include some new tests.
---
.../org/torproject/descriptor/LogDescriptor.java | 12 ++++++++++
.../torproject/descriptor/{log => }/Method.java | 5 ++--
.../torproject/descriptor/WebServerAccessLog.java | 28 ++++++++++++++++++++++
.../descriptor/log/WebServerAccessLogImpl.java | 20 ++++++++++++++++
.../descriptor/log/WebServerAccessLogLine.java | 15 ++++++++++--
.../descriptor/log/LogDescriptorTest.java | 15 +++++++-----
6 files changed, 85 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index ff02cae..6a6bf84 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -43,5 +43,17 @@ public interface LogDescriptor extends Descriptor {
@Override
public List<String> getUnrecognizedLines();
+ /**
+ * Returns a list of all parseable log lines.
+ * <p>Might require a lot of memory depending on log size.</p>
+ */
+ public List<? extends Line> logLines() throws DescriptorParseException;
+
+ public interface Line {
+
+ /** Returns a log line string. */
+ public String toLogString();
+
+ }
}
diff --git a/src/main/java/org/torproject/descriptor/log/Method.java b/src/main/java/org/torproject/descriptor/Method.java
similarity index 50%
rename from src/main/java/org/torproject/descriptor/log/Method.java
rename to src/main/java/org/torproject/descriptor/Method.java
index c29d495..9135fe2 100644
--- a/src/main/java/org/torproject/descriptor/log/Method.java
+++ b/src/main/java/org/torproject/descriptor/Method.java
@@ -1,8 +1,9 @@
/* Copyright 2018 The Tor Project
* See LICENSE for licensing information */
-package org.torproject.descriptor.log;
+package org.torproject.descriptor;
-public enum Method {
+/** Enum for web server access log methods. */
+public enum Method {
GET, HEAD, POST;
}
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b94bc30..b4f1940 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
import java.time.LocalDate;
import java.util.List;
+import java.util.Optional;
/**
* Contains a sanitized web server access log file from a {@code torproject.org}
@@ -61,5 +62,32 @@ public interface WebServerAccessLog extends LogDescriptor {
@Override
public List<String> getUnrecognizedLines();
+ public interface Line extends LogDescriptor.Line {
+
+ /** Returns the IP address of the requesting host. */
+ public String getIp();
+
+ /** Returns the HTTP method, e.g., GET. */
+ public Method getMethod();
+
+ /** Returns the protocol and version, e.g., HTTP/1.1. */
+ public String getProtocol();
+
+ /** Returns the requested resource. */
+ public String getRequest();
+
+ /** Returns the size of the response in bytes, if available. */
+ public Optional<Integer> getSize();
+
+ /** Returns the final status code, e.g., 200. */
+ public int getResponse();
+
+ /** Returns the date when the request was received. */
+ public LocalDate getDate();
+
+ /** True, if this is a valid web server access log line. */
+ public boolean isValid();
+ }
+
}
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index f02b1d7..7b56528 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -10,12 +10,17 @@ import org.torproject.descriptor.internal.FileType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.InputStreamReader;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Collection;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
/**
* Implementation of web server access log descriptors.
@@ -126,5 +131,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
return this.logDate;
}
+ /** Returns a list of all valid log lines. */
+ @Override
+ public List<WebServerAccessLog.Line> logLines()
+ throws DescriptorParseException {
+ try (BufferedReader br
+ = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(
+ this.getRawDescriptorBytes())))) {
+ return br.lines().map(line
+ -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
+ .filter(line -> line.isValid()).collect(Collectors.toList());
+ } catch (Exception ex) {
+ throw new DescriptorParseException("Cannot retrieve log lines.", ex);
+ }
+ }
+
}
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
index c9d73cc..8a17230 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
@@ -3,6 +3,9 @@
package org.torproject.descriptor.log;
+import org.torproject.descriptor.Method;
+import org.torproject.descriptor.WebServerAccessLog;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -18,7 +21,7 @@ import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-public class WebServerAccessLogLine {
+public class WebServerAccessLogLine implements WebServerAccessLog.Line {
private static final Logger log = LoggerFactory
.getLogger(WebServerAccessLogLine.class);
@@ -54,6 +57,7 @@ public class WebServerAccessLogLine {
private String protocol;
/** Returns a log line string. Possibly empty. */
+ @Override
public String toLogString() {
if (!this.valid) {
return "";
@@ -74,7 +78,7 @@ public class WebServerAccessLogLine {
return this.date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
}
- /** Returns a string containing the ip. */
+ @Override
public String getIp() {
return this.ip;
}
@@ -84,22 +88,27 @@ public class WebServerAccessLogLine {
this.ip = fromMap(ip, ipMap);
}
+ @Override
public Method getMethod() {
return this.method;
}
+ @Override
public String getProtocol() {
return this.protocol;
}
+ @Override
public String getRequest() {
return this.request;
}
+ @Override
public Optional<Integer> getSize() {
return this.size < 0 ? Optional.empty() : Optional.of(this.size);
}
+ @Override
public int getResponse() {
return this.response;
}
@@ -109,10 +118,12 @@ public class WebServerAccessLogLine {
this.request = fromMap(request, requestMap);
}
+ @Override
public LocalDate getDate() {
return this.date;
}
+ @Override
public boolean isValid() {
return this.valid;
}
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index b12cfc0..a871791 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -1,4 +1,3 @@
-
/* Copyright 2017--2018 The Tor Project
* See LICENSE for licensing information */
@@ -51,6 +50,7 @@ public class LogDescriptorTest {
protected String[] pan;
protected Class<LogDescriptor> type;
protected boolean isDecompressionTest;
+ protected int lineCount;
/** All types of data that can be encountered during sync. */
@Parameters
@@ -60,29 +60,30 @@ public class LogDescriptorTest {
"metrics.torproject.org_meronense.torproject.org_access.log"
+ "_20170530.gz",
"metrics.torproject.org", "20170530", "gz"},
- WebServerAccessLog.class},
+ WebServerAccessLog.class, 24},
{Boolean.FALSE, 1878, new String[]{"meronense.torproject.org",
"xy.host.org_meronense.torproject.org_access.log_20170530.log",
"metrics.torproject.org", "20170530", "xz"},
- WebServerAccessLog.class},
+ WebServerAccessLog.class, 24},
{Boolean.TRUE, 70730, new String[]{"archeotrichon.torproject.org",
"archive.torproject.org_archeotrichon.torproject.org_access.log_"
+ "20151007.xz",
"archive.torproject.org", "20151007", "xz"},
- WebServerAccessLog.class},
+ WebServerAccessLog.class, 655},
{Boolean.TRUE, 0, new String[]{"dummy.host.net",
"nix.server.org_dummy.host.net_access.log_20111111.bz2",
"nix.server.org", "20111111", "bz2"},
- WebServerAccessLog.class}});
+ WebServerAccessLog.class, 0}});
}
/** This constructor receives the above defined data for each run. */
public LogDescriptorTest(boolean decompression, int size, String[] pan,
- Class<LogDescriptor> type) {
+ Class<LogDescriptor> type, int lineCount) {
this.pan = pan;
this.size = size;
this.type = type;
this.isDecompressionTest = decompression;
+ this.lineCount = lineCount;
}
/** Prepares the temporary folder and writes files to it for this test. */
@@ -129,6 +130,8 @@ public class LogDescriptorTest {
InternalLogDescriptor ld = (InternalLogDescriptor) descs.get(0);
assertEquals("Wrong compression type string. " + dataUsed(),
pan[4], ld.getCompressionType());
+ List<? extends LogDescriptor.Line> lines = ld.logLines();
+ assertEquals(this.lineCount, lines.size());
}
private String dataUsed() {
More information about the tor-commits
mailing list