[or-cvs] [tor/master] Write unit tests and fix issues they uncovered.

arma at seul.org arma at seul.org
Thu Sep 17 01:46:22 UTC 2009


Author: Mike Perry <mikeperry-git at fscked.org>
Date: Thu, 27 Aug 2009 23:28:20 -0700
Subject: Write unit tests and fix issues they uncovered.
Commit: b52bce91fc25d29f1d1a7b5a32076eadd7005d2f

---
 src/or/circuitbuild.c |  158 +++++++++++++++++++++++++++++++++----------------
 src/or/config.c       |    2 +-
 src/or/or.h           |   45 ++++++++++----
 src/or/test.c         |   67 +++++++++++++++++++++
 4 files changed, 206 insertions(+), 66 deletions(-)

diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index 582567b..ee9dce2 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -9,9 +9,13 @@
  * \brief The actual details of building circuits.
  **/
 
+// XXX: Move this noise to common/compat.c?
 #include <math.h>
 
+// also use pow()
+
 long int lround(double x);
+double ln(double x);
 
 double
 ln(double x)
@@ -20,6 +24,8 @@ ln(double x)
 }
 #undef log
 
+#define CIRCUIT_PRIVATE
+
 #include "or.h"
 #include "crypto.h"
 
@@ -81,16 +87,19 @@ static time_t start_of_month(time_t when);
  * time units are milliseconds
  */
 int
-circuit_build_times_add_time(circuit_build_times_t *cbt, long time)
+circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time)
 {
-  if (time > UINT16_MAX) {
+  if (time > BUILD_TIME_MAX) {
     log_notice(LD_CIRC,
-       "Circuit build time of %ldms exceeds max. Capping at 65536ms", time);
-    time = UINT16_MAX;
+       "Circuit build time of %ums exceeds max. Capping at 65536ms", time);
+    time = BUILD_TIME_MAX;
+  } else if (time <= 0) {
+    log_err(LD_CIRC, "Circuit build time is %u!", time);
+    return -1;
   }
   cbt->circuit_build_times[cbt->build_times_idx] = time;
   cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE;
-  if (cbt->total_build_times + 1 < NCIRCUITS_TO_OBSERVE)
+  if (cbt->total_build_times < NCIRCUITS_TO_OBSERVE)
     cbt->total_build_times++;
 
   return 0;
@@ -101,7 +110,7 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, long time)
  */
 static void
 circuit_build_times_create_histogram(circuit_build_times_t *cbt,
-                                     uint16_t *histogram)
+                                     build_time_t *histogram)
 {
   int i, c;
   // calculate histogram
@@ -116,10 +125,11 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt,
 /**
  * Find maximum circuit build time
  */
-static uint16_t
+static build_time_t
 circuit_build_times_max(circuit_build_times_t *cbt)
 {
-  int i = 0, max_build_time = 0;
+  int i = 0;
+  build_time_t max_build_time = 0;
   for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
     if (cbt->circuit_build_times[i] > max_build_time)
       max_build_time = cbt->circuit_build_times[i];
@@ -127,36 +137,39 @@ circuit_build_times_max(circuit_build_times_t *cbt)
   return max_build_time;
 }
 
-static uint16_t
+static build_time_t
 circuit_build_times_min(circuit_build_times_t *cbt)
 {
   int i = 0;
-  uint16_t min_build_time = UINT16_MAX;
+  build_time_t min_build_time = BUILD_TIME_MAX;
   for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
     if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
             cbt->circuit_build_times[i] < min_build_time)
       min_build_time = cbt->circuit_build_times[i];
   }
-  if (min_build_time == UINT16_MAX) {
-    log_warn(LD_CIRC, "No build times less than UIN16_MAX!");
+  if (min_build_time == BUILD_TIME_MAX) {
+    log_warn(LD_CIRC, "No build times less than BUILD_TIME_MAX!");
   }
   return min_build_time;
 }
 
 /**
- * output a histogram of current circuit build times
+ * output a histogram of current circuit build times.
+ *
+ * XXX: Is do_unit the right way to do this unit test
+ * noise?
  */
 void
 circuit_build_times_update_state(circuit_build_times_t *cbt,
-                                 or_state_t *state)
+                                 or_state_t *state, int do_unit)
 {
-  uint16_t max_build_time = 0, *histogram;
+  build_time_t max_build_time = 0, *histogram;
   int i = 0, nbins = 0;
   config_line_t **next, *line;
 
   max_build_time = circuit_build_times_max(cbt);
   nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH);
-  histogram = tor_malloc_zero(nbins * sizeof(uint16_t));
+  histogram = tor_malloc_zero(nbins * sizeof(build_time_t));
 
   circuit_build_times_create_histogram(cbt, histogram);
   // write to state
@@ -177,8 +190,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt,
                  histogram[i]);
     next = &(line->next);
   }
-  if (!get_options()->AvoidDiskWrites)
-    or_state_mark_dirty(get_or_state(), 0);
+
+  if (!do_unit) {
+    if (!get_options()->AvoidDiskWrites)
+      or_state_mark_dirty(get_or_state(), 0);
+  }
 
   if (histogram) tor_free(histogram);
 }
@@ -191,8 +207,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
   int tot_values = 0, lines = 0;
   config_line_t *line;
   msg = NULL;
-  memset(cbt->circuit_build_times, 0, NCIRCUITS_TO_OBSERVE);
-  cbt->total_build_times = state->TotalBuildTimes;
+  memset(cbt, 0, sizeof(*cbt));
 
   for (line = state->BuildtimeHistogram; line; line = line->next) {
     smartlist_t * args = smartlist_create();
@@ -206,9 +221,9 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
       const char *ms_str = smartlist_get(args,0);
       const char *count_str = smartlist_get(args,1);
       uint32_t count, i;
-      uint16_t ms;
+      build_time_t ms;
       int ok;
-      ms = tor_parse_ulong(ms_str, 0, 0, UINT16_MAX, &ok, NULL);
+      ms = tor_parse_ulong(ms_str, 0, 0, BUILD_TIME_MAX, &ok, NULL);
       if (!ok) {
         *msg = tor_strdup("Unable to parse circuit build times: "
                           "Unparsable bin number");
@@ -233,10 +248,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
   return (msg ? -1 : 0);
 }
 
-static void
+void
 circuit_build_times_update_alpha(circuit_build_times_t *cbt)
 {
-  uint16_t *x=cbt->circuit_build_times;
+  build_time_t *x=cbt->circuit_build_times;
   double a = 0;
   int n=0,i=0;
 
@@ -248,6 +263,10 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
     a += ln(x[i]);
     n++;
   }
+  if (n!=cbt->total_build_times) {
+    log_err(LD_CIRC, "Discrepency in build times count: %d vs %d", n,
+            cbt->total_build_times);
+  }
   tor_assert(n==cbt->total_build_times);
   a -= n*ln(cbt->Xm);
   a = n/a;
@@ -257,7 +276,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
 
 /**
  * This is the Pareto Quantile Function. It calculates the point x
- * in the distribution such that F(x) < quantile (ie quantile*100%
+ * in the distribution such that F(x) = quantile (ie quantile*100%
  * of the mass of the density function is below x on the curve).
  *
  * We use it to calculate the timeout and also synthetic values of
@@ -265,50 +284,92 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
  *
  * See http://en.wikipedia.org/wiki/Quantile_function,
  * http://en.wikipedia.org/wiki/Inverse_transform_sampling and
- * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
+ * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_random_sample_from_Pareto_distribution
  * That's right. I'll cite wikipedia all day long.
  */
-static double
+double
 circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
                                       double quantile)
 {
-  return cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha);
+  double ret = cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha);
+  if (ret > INT32_MAX) {
+    ret = INT32_MAX;
+  }
+  tor_assert(ret > 0);
+  return ret;
+}
+
+/* Pareto CDF */
+double
+circuit_build_times_cdf(circuit_build_times_t *cbt,
+                                   double x)
+{
+  double ret = 1.0-pow(cbt->Xm/x,cbt->alpha);
+  tor_assert(0 <= ret && ret <= 1.0);
+  return ret;
+}
+
+build_time_t
+circuit_build_times_generate_sample(circuit_build_times_t *cbt,
+                                    double q_lo, double q_hi)
+{
+  uint32_t r = crypto_rand_int(UINT32_MAX-1);
+  double u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT32_MAX);
+  build_time_t ret;
+
+  tor_assert(0 <= u && u < 1.0);
+  ret = lround(circuit_build_times_calculate_timeout(cbt, u));
+  tor_assert(ret > 0);
+  return ret;
 }
 
 static void
 circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt)
 {
   /* Generate 0.8-1.0... */
-  uint64_t r = crypto_rand_uint64(UINT64_MAX-1);
-  double u = BUILDTIMEOUT_QUANTILE_CUTOFF +
-             ((1.0-BUILDTIMEOUT_QUANTILE_CUTOFF)*r)/(1.0*UINT64_MAX);
-
-  long gentime = lround(circuit_build_times_calculate_timeout(cbt, u));
+  build_time_t gentime = circuit_build_times_generate_sample(cbt,
+              BUILDTIMEOUT_QUANTILE_CUTOFF, 1.0);
 
-  if (gentime < get_options()->CircuitBuildTimeout*1000) {
+  if (gentime < (build_time_t)get_options()->CircuitBuildTimeout*1000) {
     log_warn(LD_CIRC,
-      "Generated a synthetic timeout LESS than the current timeout: %ld vs %d",
+      "Generated a synthetic timeout LESS than the current timeout: %u vs %d",
       gentime, get_options()->CircuitBuildTimeout*1000);
-    tor_assert(gentime >= get_options()->CircuitBuildTimeout*1000);
-  } else if (gentime > UINT16_MAX) {
-    gentime = UINT16_MAX;
+    tor_assert(gentime >=
+            (build_time_t)get_options()->CircuitBuildTimeout*1000);
+  } else if (gentime > BUILD_TIME_MAX) {
+    gentime = BUILD_TIME_MAX;
     log_info(LD_CIRC,
-      "Generated a synthetic timeout LESS than the current timeout: %ld vs %d",
-      gentime, get_options()->CircuitBuildTimeout*1000);
+      "Generated a synthetic timeout larger than the max: %u", gentime);
   } else {
-    log_info(LD_CIRC, "Generated synthetic time %ld for timeout",
-             gentime);
+    log_info(LD_CIRC, "Generated synthetic time %u for timeout", gentime);
   }
 
   circuit_build_times_add_time(cbt, gentime);
 }
 
+void
+circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
+                                  double quantile, build_time_t timeout)
+{
+  // Q(u) = Xm/((1-u)^(1/a))
+  // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
+  // CircBuildTimeout = Xm/((1-0.8))^(1/a))
+  // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
+  // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
+  // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
+  cbt->alpha = ln(1.0-quantile)/(ln(cbt->Xm)-ln(timeout));
+}
+
 /**
  * Store a timeout as a synthetic value
  */
 void
 circuit_build_times_add_timeout(circuit_build_times_t *cbt)
 {
+  /* XXX: If there are a ton of timeouts, we should reduce
+   * the circuit build timeout by like 2X or something...
+   * But then how do we differentiate between that and network
+   * failure? */
   if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) {
     /* Store a timeout before we have enough data as special */
     cbt->pre_timeouts++;
@@ -316,17 +377,12 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt)
   }
 
   /* Store a timeout as a random position on this curve */
-  if (cbt->pre_timeouts && get_options()->CircuitBuildTimeout != 60) {
+  if (cbt->pre_timeouts) {
     cbt->Xm = circuit_build_times_min(cbt);
     // Use current timeout to get an estimate on alpha
-    // Q(u) = Xm/((1-u)^(1/a))
-    // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
-    // CircBuildTimeout = Xm/((1-0.8))^(1/a))
-    // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
-    // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
-    // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
-    cbt->alpha = -ln(1-BUILDTIMEOUT_QUANTILE_CUTOFF)/
-             (ln(get_options()->CircuitBuildTimeout)-ln(cbt->Xm));
+    circuit_build_times_initial_alpha(cbt,
+                     1.0-((double)cbt->pre_timeouts)/cbt->total_build_times,
+                     get_options()->CircuitBuildTimeout*1000);
     while (cbt->pre_timeouts-- != 0) {
       circuit_build_times_add_timeout_worker(cbt);
     }
diff --git a/src/or/config.c b/src/or/config.c
index 39a4ac1..45ce5cf 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -5207,7 +5207,7 @@ or_state_save(time_t now)
    * to avoid redundant writes. */
   entry_guards_update_state(global_state);
   rep_hist_update_state(global_state);
-  circuit_build_times_update_state(&circ_times, global_state);
+  circuit_build_times_update_state(&circ_times, global_state, 0);
   if (accounting_is_enabled(get_options()))
     accounting_run_housekeeping(now);
 
diff --git a/src/or/or.h b/src/or/or.h
index 5e94a56..13626c4 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -1884,15 +1884,6 @@ typedef struct crypt_path_t {
                                  DH_KEY_LEN)
 #define ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN)
 
-// XXX: Do we want to artifically tweak CircuitIdleTimeout and
-// the number of circuits we build at a time if < MIN here?
-#define MIN_CIRCUITS_TO_OBSERVE 1000
-#define NCIRCUITS_TO_OBSERVE 10000 /* approx 3 weeks worth of circuits */
-#define BUILDTIME_BIN_WIDTH 50
-
-/* TODO: This should be moved to the consensus */
-#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8
-
 /** Information used to build a circuit. */
 typedef struct {
   /** Intended length of the final circuit. */
@@ -2866,25 +2857,51 @@ void bridges_retry_all(void);
 
 void entry_guards_free_all(void);
 
-/* Circuit Build Timeout "public" functions (I love C... No wait.) */
+/* Circuit Build Timeout "public" functions and structures.
+ * (I love C... No wait.) */
+
+// XXX: Do we want to artifically tweak CircuitIdleTimeout and
+// the number of circuits we build at a time if < MIN here?
+#define MIN_CIRCUITS_TO_OBSERVE 500
+#define NCIRCUITS_TO_OBSERVE 5000 /* approx 1.5 weeks worth of circuits */
+#define BUILDTIME_BIN_WIDTH 50
+
+/* TODO: This should be moved to the consensus */
+#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8
+
+typedef uint32_t build_time_t;
+#define BUILD_TIME_MAX  ((build_time_t)(INT32_MAX))
+
 typedef struct {
   // XXX: Make this a smartlist..
-  uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
+  build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
   int build_times_idx;
   int total_build_times;
   int pre_timeouts;
-  uint16_t Xm;
+  build_time_t Xm;
   double alpha;
 } circuit_build_times_t;
 
 extern circuit_build_times_t circ_times;
 void circuit_build_times_update_state(circuit_build_times_t *cbt,
-                                      or_state_t *state);
+                                      or_state_t *state, int do_unit);
 int  circuit_build_times_parse_state(circuit_build_times_t *cbt,
                                      or_state_t *state, char **msg);
 void circuit_build_times_add_timeout(circuit_build_times_t *cbt);
 void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
-int circuit_build_times_add_time(circuit_build_times_t *cbt, long time);
+int circuit_build_times_add_time(circuit_build_times_t *cbt,
+                                 build_time_t time);
+
+#ifdef CIRCUIT_PRIVATE
+double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
+                                             double quantile);
+build_time_t circuit_build_times_generate_sample(circuit_build_times_t *cbt,
+                                                 double q_lo, double q_hi);
+void circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
+                                       double quantile, build_time_t time);
+void circuit_build_times_update_alpha(circuit_build_times_t *cbt);
+double circuit_build_times_cdf(circuit_build_times_t *cbt, double x);
+#endif
 
 /********************************* circuitlist.c ***********************/
 
diff --git a/src/or/test.c b/src/or/test.c
index f2cc7cc..ea8ce86 100644
--- a/src/or/test.c
+++ b/src/or/test.c
@@ -37,6 +37,9 @@ const char tor_git_revision[] = "";
 #define GEOIP_PRIVATE
 #define MEMPOOL_PRIVATE
 #define ROUTER_PRIVATE
+#define CIRCUIT_PRIVATE
+
+#include <math.h>
 
 #include "or.h"
 #include "test.h"
@@ -3404,6 +3407,69 @@ test_dirutil_param_voting(void)
   smartlist_free(vote3.net_params);
   smartlist_free(vote4.net_params);
 
+  return;
+}
+
+static void
+test_circuit_timeout(void)
+{
+  /* Plan:
+   *  1. Generate 1000 samples
+   *  2. Estimate parameters
+   *  3. If difference, repeat
+   *  4. Save state
+   *  5. load state
+   *  6. Estimate parameters
+   *  7. compare differences
+   */
+  circuit_build_times_t initial;
+  circuit_build_times_t estimate;
+  circuit_build_times_t final;
+  or_state_t state;
+  int i;
+  char *msg;
+  double timeout1, timeout2;
+  memset(&initial, 0, sizeof(circuit_build_times_t));
+  memset(&estimate, 0, sizeof(circuit_build_times_t));
+  memset(&final, 0, sizeof(circuit_build_times_t));
+  memset(&state, 0, sizeof(or_state_t));
+
+#define timeout0 (30*1000.0)
+  initial.Xm = 750;
+  circuit_build_times_initial_alpha(&initial, BUILDTIMEOUT_QUANTILE_CUTOFF,
+                                    timeout0);
+  do {
+    int n = 0;
+    for (i=0; i < MIN_CIRCUITS_TO_OBSERVE; i++) {
+      if (circuit_build_times_add_time(&estimate,
+              circuit_build_times_generate_sample(&initial, 0, 1)) == 0) {
+        n++;
+      }
+    }
+    circuit_build_times_update_alpha(&estimate);
+    timeout1 = circuit_build_times_calculate_timeout(&estimate,
+                                  BUILDTIMEOUT_QUANTILE_CUTOFF);
+    log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout1, estimate.Xm);
+  } while (fabs(circuit_build_times_cdf(&initial, timeout0) -
+                circuit_build_times_cdf(&initial, timeout1)) > 0.05
+                /* 5% error */
+           && estimate.total_build_times < NCIRCUITS_TO_OBSERVE);
+
+  test_assert(estimate.total_build_times < NCIRCUITS_TO_OBSERVE);
+
+  circuit_build_times_update_state(&estimate, &state, 1);
+  test_assert(circuit_build_times_parse_state(&final, &state, &msg) == 0);
+
+  circuit_build_times_update_alpha(&final);
+  timeout2 = circuit_build_times_calculate_timeout(&final,
+                                 BUILDTIMEOUT_QUANTILE_CUTOFF);
+  log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout2, final.Xm);
+
+  test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) -
+                circuit_build_times_cdf(&initial, timeout2)) < 0.05);
+
+done:
+  return;
 }
 
 extern const char AUTHORITY_CERT_1[];
@@ -4931,6 +4997,7 @@ static struct {
   ENT(dirutil),
   SUBENT(dirutil, measured_bw),
   SUBENT(dirutil, param_voting),
+  ENT(circuit_timeout),
   ENT(v3_networkstatus),
   ENT(policies),
   ENT(rend_fns),
-- 
1.5.6.5




More information about the tor-commits mailing list