[tor-commits] [metrics-lib/master] Avoid parsing descriptor contents to Maps.

karsten at torproject.org karsten at torproject.org
Wed Jun 18 15:07:27 UTC 2014


commit 73e5a6989df19923775978428cd9bb21e0a96dc4
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Jun 18 11:38:07 2014 +0200

    Avoid parsing descriptor contents to Maps.
    
    Extra-info descriptors contain lots of comma-separated key=value lists
    that we store in SortedMap instances.  But those occupy a lot of memory,
    and it's not certain that we'll ever want to use the contained keys or
    values.
    
    New approach: when parsing a descriptor, use regular expressions to check
    if lines are valid, and delay parsing into maps until needed.
---
 .../descriptor/impl/ExtraInfoDescriptorImpl.java   |   90 ++++++++++----------
 .../torproject/descriptor/impl/ParseHelper.java    |   59 ++++++++-----
 2 files changed, 81 insertions(+), 68 deletions(-)

diff --git a/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java b/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java
index 13fdfa8..836551a 100644
--- a/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java
+++ b/src/org/torproject/descriptor/impl/ExtraInfoDescriptorImpl.java
@@ -712,28 +712,28 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl
     return this.dirreqStatsIntervalLength;
   }
 
-  private SortedMap<String, Integer> dirreqV2Ips;
+  private String dirreqV2Ips;
   public SortedMap<String, Integer> getDirreqV2Ips() {
-    return this.dirreqV2Ips == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV2Ips);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV2Ips);
   }
 
-  private SortedMap<String, Integer> dirreqV3Ips;
+  private String dirreqV3Ips;
   public SortedMap<String, Integer> getDirreqV3Ips() {
-    return this.dirreqV3Ips == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV3Ips);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV3Ips);
   }
 
-  private SortedMap<String, Integer> dirreqV2Reqs;
+  private String dirreqV2Reqs;
   public SortedMap<String, Integer> getDirreqV2Reqs() {
-    return this.dirreqV2Reqs == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV2Reqs);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV2Reqs);
   }
 
-  private SortedMap<String, Integer> dirreqV3Reqs;
+  private String dirreqV3Reqs;
   public SortedMap<String, Integer> getDirreqV3Reqs() {
-    return this.dirreqV3Reqs == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV3Reqs);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV3Reqs);
   }
 
   private double dirreqV2Share = -1.0;
@@ -746,40 +746,40 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl
     return this.dirreqV3Share;
   }
 
-  private SortedMap<String, Integer> dirreqV2Resp;
+  private String dirreqV2Resp;
   public SortedMap<String, Integer> getDirreqV2Resp() {
-    return this.dirreqV2Resp == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV2Resp);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV2Resp);
   }
 
-  private SortedMap<String, Integer> dirreqV3Resp;
+  private String dirreqV3Resp;
   public SortedMap<String, Integer> getDirreqV3Resp() {
-    return this.dirreqV3Resp == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV3Resp);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV3Resp);
   }
 
-  private SortedMap<String, Integer> dirreqV2DirectDl;
+  private String dirreqV2DirectDl;
   public SortedMap<String, Integer> getDirreqV2DirectDl() {
-    return this.dirreqV2DirectDl == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV2DirectDl);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV2DirectDl);
   }
 
-  private SortedMap<String, Integer> dirreqV3DirectDl;
+  private String dirreqV3DirectDl;
   public SortedMap<String, Integer> getDirreqV3DirectDl() {
-    return this.dirreqV3DirectDl == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV3DirectDl);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV3DirectDl);
   }
 
-  private SortedMap<String, Integer> dirreqV2TunneledDl;
+  private String dirreqV2TunneledDl;
   public SortedMap<String, Integer> getDirreqV2TunneledDl() {
-    return this.dirreqV2TunneledDl == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV2TunneledDl);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV2TunneledDl);
   }
 
-  private SortedMap<String, Integer> dirreqV3TunneledDl;
+  private String dirreqV3TunneledDl;
   public SortedMap<String, Integer> getDirreqV3TunneledDl() {
-    return this.dirreqV3TunneledDl == null ? null :
-        new TreeMap<String, Integer>(this.dirreqV3TunneledDl);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.dirreqV3TunneledDl);
   }
 
   private BandwidthHistory dirreqReadHistory;
@@ -802,10 +802,10 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl
     return this.entryStatsIntervalLength;
   }
 
-  private SortedMap<String, Integer> entryIps;
+  private String entryIps;
   public SortedMap<String, Integer> getEntryIps() {
-    return this.entryIps == null ? null :
-        new TreeMap<String, Integer>(this.entryIps);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.entryIps);
   }
 
   private long cellStatsEndMillis = -1L;
@@ -904,10 +904,10 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl
     return this.geoipStartTimeMillis;
   }
 
-  private SortedMap<String, Integer> geoipClientOrigins;
+  private String geoipClientOrigins;
   public SortedMap<String, Integer> getGeoipClientOrigins() {
-    return this.geoipClientOrigins == null ? null :
-        new TreeMap<String, Integer>(this.geoipClientOrigins);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.geoipClientOrigins);
   }
 
   private long bridgeStatsEndMillis = -1L;
@@ -920,22 +920,22 @@ public class ExtraInfoDescriptorImpl extends DescriptorImpl
     return this.bridgeStatsIntervalLength;
   }
 
-  private SortedMap<String, Integer> bridgeIps;
+  private String bridgeIps;
   public SortedMap<String, Integer> getBridgeIps() {
-    return this.bridgeIps == null ? null :
-        new TreeMap<String, Integer>(this.bridgeIps);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.bridgeIps);
   }
 
-  private SortedMap<String, Integer> bridgeIpVersions;
+  private String bridgeIpVersions;
   public SortedMap<String, Integer> getBridgeIpVersions() {
-    return this.bridgeIpVersions == null ? null :
-        new TreeMap<String, Integer>(this.bridgeIpVersions);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.bridgeIpVersions);
   }
 
-  private SortedMap<String, Integer> bridgeIpTransports;
+  private String bridgeIpTransports;
   public SortedMap<String, Integer> getBridgeIpTransports() {
-    return this.bridgeIpTransports == null ? null :
-      new TreeMap<String, Integer>(this.bridgeIpTransports);
+    return ParseHelper.convertCommaSeparatedKeyIntegerValueList(
+        this.bridgeIpTransports);
   }
 
   private List<String> transports = new ArrayList<String>();
diff --git a/src/org/torproject/descriptor/impl/ParseHelper.java b/src/org/torproject/descriptor/impl/ParseHelper.java
index 4fbe34a..048225c 100644
--- a/src/org/torproject/descriptor/impl/ParseHelper.java
+++ b/src/org/torproject/descriptor/impl/ParseHelper.java
@@ -286,11 +286,13 @@ public class ParseHelper {
         toUpperCase();
   }
 
-  public static SortedMap<String, Integer>
-      parseCommaSeparatedKeyIntegerValueList(String line,
-      String[] partsNoOpt, int index, int keyLength)
+  private static Map<Integer, Pattern>
+      commaSeparatedKeyValueListPatterns =
+      new HashMap<Integer, Pattern>();
+  public static String parseCommaSeparatedKeyIntegerValueList(
+      String line, String[] partsNoOpt, int index, int keyLength)
       throws DescriptorParseException {
-    SortedMap<String, Integer> result = new TreeMap<String, Integer>();
+    String result = "";
     if (partsNoOpt.length < index) {
       throw new DescriptorParseException("Line '" + line + "' does not "
           + "contain a key-value list at index " + index + ".");
@@ -299,26 +301,37 @@ public class ParseHelper {
           + "unrecognized values beyond the expected key-value list at "
           + "index " + index + ".");
     } else if (partsNoOpt.length > index) {
-      String[] listElements = partsNoOpt[index].split(",", -1);
-      for (String listElement : listElements) {
-        String[] keyAndValue = listElement.split("=");
-        String key = null;
-        int value = -1;
-        if (keyAndValue.length == 2 && (keyLength == 0 ||
-            keyAndValue[0].length() == keyLength)) {
-          try {
-            value = Integer.parseInt(keyAndValue[1]);
-            key = keyAndValue[0];
-          } catch (NumberFormatException e) {
-            /* Handle below. */
-          }
-        }
-        if (key == null) {
-          throw new DescriptorParseException("Line '" + line + "' "
-              + "contains an illegal key or value in list element '"
-              + listElement + "'.");
+      if (!commaSeparatedKeyValueListPatterns.containsKey(keyLength)) {
+        String keyPattern = "[0-9a-zA-Z?<>-]"
+            + (keyLength == 0 ? "+" : "{" + keyLength + "}");
+        String valuePattern = "\\-?[0-9]{1,9}";
+        String patternString = String.format("^%s=%s(,%s=%s)*$",
+            keyPattern, valuePattern, keyPattern, valuePattern);
+        commaSeparatedKeyValueListPatterns.put(keyLength,
+            Pattern.compile(patternString));
+      }
+      Pattern pattern = commaSeparatedKeyValueListPatterns.get(
+          keyLength);
+      if (pattern.matcher(partsNoOpt[index]).matches()) {
+        result = partsNoOpt[index];
+      } else {
+        throw new DescriptorParseException("Line '" + line + "' "
+            + "contains an illegal key or value.");
+      }
+    }
+    return result;
+  }
+
+  public static SortedMap<String, Integer>
+      convertCommaSeparatedKeyIntegerValueList(String validatedString) {
+    SortedMap<String, Integer> result = null;
+    if (validatedString != null) {
+      result = new TreeMap<String, Integer>();
+      if (validatedString.contains("=")) {
+        for (String listElement : validatedString.split(",", -1)) {
+          String[] keyAndValue = listElement.split("=");
+          result.put(keyAndValue[0], Integer.parseInt(keyAndValue[1]));
         }
-        result.put(key, value);
       }
     }
     return result;



More information about the tor-commits mailing list