[or-cvs] [tor/master] Fix bridge statistics.

Nick Mathewson nickm at seul.org
Fri Dec 18 19:17:46 UTC 2009


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Sat, 12 Dec 2009 08:32:46 +0100
Subject: Fix bridge statistics.
Commit: e1e5c1b3aba96afbd6fa3b5ece262720dd45a80e

Fix statistics on client numbers by country as seen by bridges that were
broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour intervals
instead of variable 12-to-48-hour intervals.
---
 ChangeLog                 |    3 +
 doc/spec/control-spec.txt |   10 ++--
 doc/spec/dir-spec.txt     |   31 ++++++++-
 src/or/control.c          |   22 ++-----
 src/or/geoip.c            |  167 +++++++++++++++++++++++++++++++++++++++++++++
 src/or/main.c             |   27 +++++++-
 src/or/or.h               |    8 ++-
 src/or/router.c           |   48 +++----------
 8 files changed, 251 insertions(+), 65 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 2cb2383..56c8958 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -25,6 +25,9 @@ Changes in version 0.2.2.7-alpha - 2009-??-??
     - Do not segfault when writing buffer stats when we haven't observed
       a single circuit to report about. Found by Fabian Lanze. Bugfix on
       0.2.2.1-alpha.
+    - Fix statistics on client numbers by country as seen by bridges that
+      were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour
+      intervals instead of variable 12-to-48-hour intervals.
 
 
 Changes in version 0.2.2.6-alpha - 2009-11-19
diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt
index 33d4658..73f0b02 100644
--- a/doc/spec/control-spec.txt
+++ b/doc/spec/control-spec.txt
@@ -1637,11 +1637,11 @@
   TimeStarted is a quoted string indicating when the reported summary
   counts from (in GMT).
 
-  The CountrySummary keyword has as its argument a comma-separated
-  set of "countrycode=count" pairs. For example,
-  650-CLIENTS_SEEN TimeStarted="Thu Dec 25 23:50:43 EST 2008"
-  650 CountrySummary=us=16,de=8,uk=8
-[XXX Matt Edman informs me that the time format above is wrong. -RD]
+  The CountrySummary keyword has as its argument a comma-separated,
+  possibly empty set of "countrycode=count" pairs. For example (without
+  linebreak),
+  650-CLIENTS_SEEN TimeStarted="2008-12-25 23:50:43"
+  CountrySummary=us=16,de=8,uk=8
 
 4.1.15. New consensus networkstatus has arrived.
 
diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt
index 5eedbd5..0bfc758 100644
--- a/doc/spec/dir-spec.txt
+++ b/doc/spec/dir-spec.txt
@@ -627,8 +627,8 @@
 
         As documented in 2.1 above.  See migration notes in section 2.2.1.
 
-    "geoip-start" YYYY-MM-DD HH:MM:SS NL
-    "geoip-client-origins" CC=N,CC=N,... NL
+    ("geoip-start" YYYY-MM-DD HH:MM:SS NL)
+    ("geoip-client-origins" CC=N,CC=N,... NL)
 
         Only generated by bridge routers (see blocking.pdf), and only
         when they have been configured with a geoip database.
@@ -641,6 +641,33 @@
         "geoip-start" is the time at which we began collecting geoip
         statistics.
 
+        "geoip-start" and "geoip-client-origins" have been replaced by
+        "bridge-stats-end" and "bridge-stats-ips" in 0.2.2.4-alpha. The
+        reason is that the measurement interval with "geoip-stats" as
+        determined by subtracting "geoip-start" from "published" could
+        have had a variable length, whereas the measurement interval in
+        0.2.2.4-alpha and later is set to be exactly 24 hours long. In
+        order to clearly distinguish the new measurement intervals from
+        the old ones, the new keywords have been introduced.
+
+    "bridge-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+        [At most once.]
+
+        YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+        interval of length NSEC seconds (86400 seconds by default).
+
+        A "bridge-stats-end" line, as well as any other "bridge-*" line,
+        is only added when the relay has been running as a bridge for at
+        least 24 hours.
+
+    "bridge-ips" CC=N,CC=N,... NL
+        [At most once.]
+
+        List of mappings from two-letter country codes to the number of
+        unique IP addresses that have connected from that country to the
+        bridge and which are no known relays, rounded up to the nearest
+        multiple of 8.
+
     "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
         [At most once.]
 
diff --git a/src/or/control.c b/src/or/control.c
index c3567bd..d3ed858 100644
--- a/src/or/control.c
+++ b/src/or/control.c
@@ -1755,21 +1755,10 @@ getinfo_helper_events(control_connection_t *control_conn,
                  "information", question);
       }
     } else if (!strcmp(question, "status/clients-seen")) {
-      char geoip_start[ISO_TIME_LEN+1];
-      size_t answer_len;
-      char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL));
-
-      if (!geoip_summary)
+      char *bridge_stats = geoip_get_bridge_stats_controller(time(NULL));
+      if (!bridge_stats)
         return -1;
-
-      answer_len = strlen("TimeStarted=\"\" CountrySummary=") +
-                   ISO_TIME_LEN + strlen(geoip_summary) + 1;
-      *answer = tor_malloc(answer_len);
-      format_iso_time(geoip_start, geoip_get_history_start());
-      tor_snprintf(*answer, answer_len,
-                   "TimeStarted=\"%s\" CountrySummary=%s",
-                   geoip_start, geoip_summary);
-      tor_free(geoip_summary);
+      *answer = bridge_stats;
     } else {
       return 0;
     }
@@ -3846,10 +3835,9 @@ control_event_bootstrap_problem(const char *warn, int reason)
  * from recently. Send a copy to the controller in case it wants to
  * display it for the user. */
 void
-control_event_clients_seen(const char *timestarted, const char *countries)
+control_event_clients_seen(const char *controller_str)
 {
   send_control_event(EVENT_CLIENTS_SEEN, 0,
-    "650 CLIENTS_SEEN TimeStarted=\"%s\" CountrySummary=%s\r\n",
-    timestarted, countries);
+    "650 CLIENTS_SEEN %s\r\n", controller_str);
 }
 
diff --git a/src/or/geoip.c b/src/or/geoip.c
index a57e1fb..a43daaf 100644
--- a/src/or/geoip.c
+++ b/src/or/geoip.c
@@ -1081,6 +1081,173 @@ geoip_dirreq_stats_write(time_t now)
   tor_free(data_v3);
 }
 
+/** Start time of bridge stats. */
+static time_t start_of_bridge_stats_interval;
+
+/** Initialize bridge stats. */
+void
+geoip_bridge_stats_init(time_t now)
+{
+  start_of_bridge_stats_interval = now;
+}
+
+/** Parse the bridge statistics as they are written to extra-info
+ * descriptors for being returned to controller clients. Return the
+ * controller string if successful, or NULL otherwise. */
+static char *
+parse_bridge_stats_controller(const char *stats_str, time_t now)
+{
+  char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1],
+       *controller_str, *eos;
+  const char *stats_end_line = "bridge-stats-end",
+             *ips_line = "bridge-ips", *tmp;
+  time_t stats_end_time;
+  size_t controller_len;
+  int seconds;
+  tor_assert(stats_str);
+
+  /* Parse timestamp and number of seconds from
+    "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
+  tmp = strstr(stats_str, stats_end_line);
+  if (!tmp || strlen(tmp) < strlen(stats_end_line) + 1 + ISO_TIME_LEN + 6)
+    return NULL;
+  strlcpy(stats_end_str, tmp + strlen(stats_end_line) + 1,
+          sizeof(stats_end_str));
+  if (parse_iso_time(stats_end_str, &stats_end_time) < 0)
+    return NULL;
+  if (stats_end_time < now - (25*60*60) ||
+      stats_end_time > now + (1*60*60))
+    return NULL;
+  seconds = (int)strtol(tmp + strlen(stats_end_line) + 1 +
+                        ISO_TIME_LEN + 2, &eos, 10);
+  if (!eos || seconds < 23*60*60)
+    return NULL;
+  format_iso_time(stats_start_str, stats_end_time - seconds);
+
+  /* Parse: "bridge-ips CC=N,CC=N,..." */
+  tmp = strstr(tmp, ips_line);
+  if (!tmp || strlen(tmp) < strlen(ips_line))
+    return NULL;
+  if (strlen(tmp) > strlen(ips_line) + 2)
+    tmp += strlen(ips_line) + 1;
+  else
+    tmp = "";
+  controller_len = strlen("TimeStarted=\"\" CountrySummary=") +
+                          ISO_TIME_LEN + strlen(tmp) + 1;
+  controller_str = tor_malloc(controller_len);
+  if (tor_snprintf(controller_str, controller_len,
+                   "TimeStarted=\"%s\" CountrySummary=%s",
+                   stats_start_str, tmp) < 0) {
+    tor_free(controller_str);
+    return NULL;
+  }
+  return controller_str;
+}
+
+/** Most recent bridge statistics formatted to be written to extra-info
+ * descriptors. */
+static char *bridge_stats_extrainfo = NULL;
+
+/** Most recent bridge statistics formatted to be returned to controller
+ * clients. */
+static char *bridge_stats_controller = NULL;
+
+/** Write bridge statistics to $DATADIR/stats/bridge-stats and return
+ * when we should next try to write statistics. */
+time_t
+geoip_bridge_stats_write(time_t now)
+{
+  char *statsdir = NULL, *filename = NULL, *data = NULL,
+       written[ISO_TIME_LEN+1], *out = NULL, *controller_str;
+  size_t len;
+
+  /* If we changed from relay to bridge recently, adapt starting time
+   * of current measurements. */
+  if (start_of_bridge_stats_interval < client_history_starts)
+    start_of_bridge_stats_interval = client_history_starts;
+
+  /* Check if 24 hours have passed since starting measurements. */
+  if (now < start_of_bridge_stats_interval +
+            DIR_ENTRY_RECORD_USAGE_RETAIN_IPS)
+    return start_of_bridge_stats_interval +
+           DIR_ENTRY_RECORD_USAGE_RETAIN_IPS;
+
+  /* Discard all items in the client history that are too old. */
+  geoip_remove_old_clients(start_of_bridge_stats_interval);
+
+  statsdir = get_datadir_fname("stats");
+  if (check_private_dir(statsdir, CPD_CREATE) < 0)
+    goto done;
+  filename = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats");
+  data = geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
+  format_iso_time(written, now);
+  len = strlen("bridge-stats-end  (999999 s)\nbridge-ips \n") +
+        ISO_TIME_LEN + (data ? strlen(data) : 0) + 1;
+  out = tor_malloc(len);
+  if (tor_snprintf(out, len, "bridge-stats-end %s (%u s)\nbridge-ips %s\n",
+              written, (unsigned) (now - start_of_bridge_stats_interval),
+              data ? data : "") < 0)
+    goto done;
+  write_str_to_file(filename, out, 0);
+  controller_str = parse_bridge_stats_controller(out, now);
+  if (!controller_str)
+    goto done;
+  start_of_bridge_stats_interval = now;
+  tor_free(bridge_stats_extrainfo);
+  tor_free(bridge_stats_controller);
+  bridge_stats_extrainfo = out;
+  out = NULL;
+  bridge_stats_controller = controller_str;
+  control_event_clients_seen(controller_str);
+ done:
+  tor_free(filename);
+  tor_free(statsdir);
+  tor_free(data);
+  tor_free(out);
+  return start_of_bridge_stats_interval +
+         DIR_ENTRY_RECORD_USAGE_RETAIN_IPS;
+}
+
+/** Try to load the most recent bridge statistics from disk, unless we
+ * have finished a measurement interval lately. */
+static void
+load_bridge_stats(time_t now)
+{
+  char *fname, *contents, *controller_str;
+  if (bridge_stats_extrainfo)
+    return;
+  fname = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats");
+  contents = read_file_to_str(fname, 0, NULL);
+  if (contents) {
+    controller_str = parse_bridge_stats_controller(contents, now);
+    if (controller_str) {
+      bridge_stats_extrainfo = contents;
+      bridge_stats_controller = controller_str;
+    } else {
+      tor_free(contents);
+    }
+  }
+  tor_free(fname);
+}
+
+/** Return most recent bridge statistics for inclusion in extra-info
+ * descriptors, or NULL if we don't have recent bridge statistics. */
+char *
+geoip_get_bridge_stats_extrainfo(time_t now)
+{
+  load_bridge_stats(now);
+  return bridge_stats_extrainfo;
+}
+
+/** Return most recent bridge statistics to be returned to controller
+ * clients, or NULL if we don't have recent bridge statistics. */
+char *
+geoip_get_bridge_stats_controller(time_t now)
+{
+  load_bridge_stats(now);
+  return bridge_stats_controller;
+}
+
 /** Start time of entry stats. */
 static time_t start_of_entry_stats_interval;
 
diff --git a/src/or/main.c b/src/or/main.c
index 4dc182e..101c084 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -832,6 +832,8 @@ run_scheduled_events(time_t now)
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
   static time_t time_to_write_stats_files = 0;
+  static time_t time_to_write_bridge_stats = 0;
+  static int should_init_bridge_stats = 1;
   static time_t time_to_retry_dns_init = 0;
   or_options_t *options = get_options();
   int i;
@@ -966,7 +968,8 @@ run_scheduled_events(time_t now)
     if (options->CellStatistics || options->DirReqStatistics ||
         options->EntryStatistics || options->ExitPortStatistics) {
       if (!time_to_write_stats_files) {
-        /* Initialize stats. */
+        /* Initialize stats. We're doing this here and not in options_act,
+         * so that we know exactly when the 24 hours interval ends. */
         if (options->CellStatistics)
           rep_hist_buffer_stats_init(now);
         if (options->DirReqStatistics)
@@ -998,6 +1001,28 @@ run_scheduled_events(time_t now)
     }
   }
 
+  /* 1h. Check whether we should write bridge statistics to disk.
+   */
+  if (should_record_bridge_info(options)) {
+    if (time_to_write_bridge_stats < now) {
+      if (should_init_bridge_stats) {
+        /* (Re-)initialize bridge statistics. */
+        geoip_bridge_stats_init(now);
+        time_to_write_bridge_stats = now + WRITE_STATS_INTERVAL;
+        should_init_bridge_stats = 0;
+      } else {
+        /* Possibly write bridge statistics to disk and ask when to write
+         * them next time. */
+        time_to_write_bridge_stats = geoip_bridge_stats_write(
+                                           time_to_write_bridge_stats);
+      }
+    }
+  } else if (!should_init_bridge_stats) {
+    /* Bridge mode was turned off. Ensure that stats are re-initialized
+     * next time bridge mode is turned on. */
+    should_init_bridge_stats = 1;
+  }
+
   /* Remove old information from rephist and the rend cache. */
   if (time_to_clean_caches < now) {
     rep_history_clean(now - options->RephistTrackTime);
diff --git a/src/or/or.h b/src/or/or.h
index 736e66a..85f549f 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3630,8 +3630,7 @@ typedef enum {
 void control_event_bootstrap(bootstrap_status_t status, int progress);
 void control_event_bootstrap_problem(const char *warn, int reason);
 
-void control_event_clients_seen(const char *timestarted,
-                                const char *countries);
+void control_event_clients_seen(const char *controller_str);
 
 #ifdef CONTROL_PRIVATE
 /* Used only by control.c and test.c */
@@ -4089,6 +4088,10 @@ void geoip_dirreq_stats_init(time_t now);
 void geoip_dirreq_stats_write(time_t now);
 void geoip_entry_stats_init(time_t now);
 void geoip_entry_stats_write(time_t now);
+void geoip_bridge_stats_init(time_t now);
+time_t geoip_bridge_stats_write(time_t now);
+char *geoip_get_bridge_stats_extrainfo(time_t);
+char *geoip_get_bridge_stats_controller(time_t);
 
 /********************************* hibernate.c **********************/
 
@@ -4749,7 +4752,6 @@ int router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router,
                                  crypto_pk_env_t *ident_key);
 int extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
                              crypto_pk_env_t *ident_key);
-char *extrainfo_get_client_geoip_summary(time_t);
 int is_legal_nickname(const char *s);
 int is_legal_nickname_or_hexdigest(const char *s);
 int is_legal_hexdigest(const char *s);
diff --git a/src/or/router.c b/src/or/router.c
index 8b6d1d9..7362e60 100644
--- a/src/or/router.c
+++ b/src/or/router.c
@@ -1882,6 +1882,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
   int result;
   size_t len;
   static int write_stats_to_extrainfo = 1;
+  time_t now = time(NULL);
 
   base16_encode(identity, sizeof(identity),
                 extrainfo->cache_info.identity_digest, DIGEST_LEN);
@@ -1896,7 +1897,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
 
   if (options->ExtraInfoStatistics && write_stats_to_extrainfo) {
     char *contents = NULL;
-    time_t now = time(NULL);
     log_info(LD_GENERAL, "Adding stats to extra-info descriptor.");
     if (options->DirReqStatistics &&
         load_stats_file("stats"PATH_SEPARATOR"dirreq-stats",
@@ -1953,18 +1953,16 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
     return -1;
 
   if (should_record_bridge_info(options)) {
-    char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL));
-    if (geoip_summary) {
-      char geoip_start[ISO_TIME_LEN+1];
-      format_iso_time(geoip_start, geoip_get_history_start());
-      result = tor_snprintf(s+strlen(s), maxlen-strlen(s),
-                            "geoip-start-time %s\n"
-                            "geoip-client-origins %s\n",
-                            geoip_start, geoip_summary);
-      control_event_clients_seen(geoip_start, geoip_summary);
-      tor_free(geoip_summary);
-      if (result<0)
-        return -1;
+    char *bridge_stats = geoip_get_bridge_stats_extrainfo(now);
+    if (bridge_stats) {
+      size_t pos = strlen(s);
+      if (strlcpy(s + pos, bridge_stats, maxlen - strlen(s)) !=
+          strlen(bridge_stats)) {
+        log_warn(LD_DIR, "Could not write bridge-stats to extra-info "
+                 "descriptor.");
+        s[pos] = '\0';
+      }
+      tor_free(bridge_stats);
     }
   }
 
@@ -2013,30 +2011,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
   return (int)strlen(s)+1;
 }
 
-/** Wrapper function for geoip_get_client_history(). It first discards
- * any items in the client history that are too old -- it dumps anything
- * more than 48 hours old, but it only considers whether to dump at most
- * once per 48 hours, so we aren't too precise to an observer (see also
- * r14780).
- */
-char *
-extrainfo_get_client_geoip_summary(time_t now)
-{
-  static time_t last_purged_at = 0;
-  int geoip_purge_interval =
-      (get_options()->DirReqStatistics || get_options()->EntryStatistics) ?
-      DIR_ENTRY_RECORD_USAGE_RETAIN_IPS : 48*60*60;
-  if (now > last_purged_at+geoip_purge_interval) {
-    /* (Note that this also discards items in the client history with
-     * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
-     * because bridge and directory stats are independent. Keep in mind
-     * for future extensions, though.) */
-    geoip_remove_old_clients(now-geoip_purge_interval);
-    last_purged_at = now;
-  }
-  return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
-}
-
 /** Return true iff <b>s</b> is a legally valid server nickname. */
 int
 is_legal_nickname(const char *s)
-- 
1.5.6.5




More information about the tor-commits mailing list