[tor-commits] [tor/master] More logs to try to diagnose bug 7164

nickm at torproject.org nickm at torproject.org
Thu Mar 27 18:31:49 UTC 2014


commit 0389d4aa561bec06ad2aab70ea5a989f1f2d02c6
Author: Nick Mathewson <nickm at torproject.org>
Date:   Mon Mar 17 14:15:12 2014 -0400

    More logs to try to diagnose bug 7164
    
    This time, check in microdesc_cache_clean() to see what could be
    going wrong with an attempt to clean a microdesc that's held by a node.
---
 changes/bug7164_diagnose_harder |    6 +++++
 src/or/microdesc.c              |   52 ++++++++++++++++++++++++++++++++++++++-
 src/or/nodelist.c               |   19 ++++++++++++++
 src/or/nodelist.h               |    1 +
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/changes/bug7164_diagnose_harder b/changes/bug7164_diagnose_harder
new file mode 100644
index 0000000..28c36b6
--- /dev/null
+++ b/changes/bug7164_diagnose_harder
@@ -0,0 +1,6 @@
+  o Minor features:
+    - Try harder to diagnose a possible cause of bug 7164, which causes
+      intermittent "microdesc_free() called but md was still referenced"
+      warnings. We now log more information about the likely error case,
+      to try to figure out why we might be cleaning a microdescriptor
+      as old if it's still referenced by a live node.
diff --git a/src/or/microdesc.c b/src/or/microdesc.c
index 90ac0ac..abcf5fc 100644
--- a/src/or/microdesc.c
+++ b/src/or/microdesc.c
@@ -368,7 +368,9 @@ microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force)
     cutoff = now - TOLERATE_MICRODESC_AGE;
 
   for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) {
-    if ((*mdp)->last_listed < cutoff) {
+    const int is_old = (*mdp)->last_listed < cutoff;
+    const unsigned held_by_nodes = (*mdp)->held_by_nodes;
+    if (is_old && !held_by_nodes) {
       ++dropped;
       victim = *mdp;
       mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp);
@@ -376,6 +378,54 @@ microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force)
       bytes_dropped += victim->bodylen;
       microdesc_free(victim);
     } else {
+      if (is_old) {
+        /* It's old, but it has held_by_nodes set.  That's not okay. */
+        /* Let's try to diagnose and fix #7164 . */
+        smartlist_t *nodes = nodelist_find_nodes_with_microdesc(*mdp);
+        const networkstatus_t *ns = networkstatus_get_latest_consensus();
+        int networkstatus_age = -1;
+        if (ns) {
+          networkstatus_age = now - ns->valid_after;
+        }
+        log_warn(LD_BUG, "Microdescriptor seemed very old "
+                 "(last listed %d hours ago vs %d hour cutoff), but is still "
+                 "marked as being held by %d node(s). I found %d node(s) "
+                 "holding it. Current networkstatus is %d hours old.",
+                 (int)((now - (*mdp)->last_listed) / 3600),
+                 (int)((now - cutoff) / 3600),
+                 held_by_nodes,
+                 smartlist_len(nodes),
+                 (int)(networkstatus_age / 3600));
+
+        SMARTLIST_FOREACH_BEGIN(nodes, const node_t *, node) {
+          const char *rs_match = "No RS";
+          const char *rs_present = "";
+          if (node->rs) {
+            if (tor_memeq(node->rs->descriptor_digest,
+                          (*mdp)->digest, DIGEST256_LEN)) {
+              rs_match = "Microdesc digest in RS matches";
+            } else {
+              rs_match = "Microdesc digest in RS does match";
+            }
+            if (ns) {
+              /* This should be impossible, but let's see! */
+              rs_present = " RS not present in networkstatus.";
+              SMARTLIST_FOREACH(ns->routerstatus_list, routerstatus_t *,rs, {
+                if (rs == node->rs) {
+                  rs_present = " RS okay in networkstatus.";
+                }
+              });
+            }
+          }
+          log_warn(LD_BUG, "  [%d]: ID=%s. md=%p, rs=%p, ri=%p. %s.%s",
+                   node_sl_idx,
+                   hex_str(node->identity, DIGEST_LEN),
+                   node->md, node->rs, node->ri, rs_match, rs_present);
+        } SMARTLIST_FOREACH_END(node);
+        smartlist_free(nodes);
+        (*mdp)->last_listed = now;
+      }
+
       ++kept;
       mdp = HT_NEXT(microdesc_map, &cache->map, mdp);
     }
diff --git a/src/or/nodelist.c b/src/or/nodelist.c
index 178f084..d92ef17 100644
--- a/src/or/nodelist.c
+++ b/src/or/nodelist.c
@@ -337,6 +337,25 @@ nodelist_drop_node(node_t *node, int remove_from_ht)
   node->nodelist_idx = -1;
 }
 
+/** Return a newly allocated smartlist of the nodes that have <b>md</b> as
+ * their microdescriptor. */
+smartlist_t *
+nodelist_find_nodes_with_microdesc(const microdesc_t *md)
+{
+  smartlist_t *result = smartlist_new();
+
+  if (the_nodelist == NULL)
+    return result;
+
+  SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) {
+    if (node->md == md) {
+      smartlist_add(result, node);
+    }
+  } SMARTLIST_FOREACH_END(node);
+
+  return result;
+}
+
 /** Release storage held by <b>node</b>  */
 static void
 node_free(node_t *node)
diff --git a/src/or/nodelist.h b/src/or/nodelist.h
index 8a4665a..836b2af 100644
--- a/src/or/nodelist.h
+++ b/src/or/nodelist.h
@@ -26,6 +26,7 @@ void nodelist_set_consensus(networkstatus_t *ns);
 void nodelist_remove_microdesc(const char *identity_digest, microdesc_t *md);
 void nodelist_remove_routerinfo(routerinfo_t *ri);
 void nodelist_purge(void);
+smartlist_t *nodelist_find_nodes_with_microdesc(const microdesc_t *md);
 
 void nodelist_free_all(void);
 void nodelist_assert_ok(void);





More information about the tor-commits mailing list