[or-cvs] r14802: New code to implement proposal for local geoip stats. Only e (in tor/trunk: . doc src/common src/or)
nickm at seul.org
nickm at seul.org
Thu May 29 02:29:36 UTC 2008
Author: nickm
Date: 2008-05-28 22:29:35 -0400 (Wed, 28 May 2008)
New Revision: 14802
Modified:
tor/trunk/ChangeLog
tor/trunk/configure.in
tor/trunk/doc/TODO
tor/trunk/src/common/util.c
tor/trunk/src/or/config.c
tor/trunk/src/or/connection_or.c
tor/trunk/src/or/directory.c
tor/trunk/src/or/geoip.c
tor/trunk/src/or/main.c
tor/trunk/src/or/or.h
tor/trunk/src/or/router.c
tor/trunk/src/or/test.c
Log:
New code to implement proposal for local geoip stats. Only enabled with --enable-geoip-stats passed to configure.
Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/ChangeLog 2008-05-29 02:29:35 UTC (rev 14802)
@@ -104,6 +104,9 @@
Robert Hogan. Fixes the first part of bug 681.
- Make bridge authorities never serve extrainfo docs.
- Allow comments in geoip file.
+ - New configure/torrc options (--enable-geoip-stats,
+ DirRecordUsageByCountry) to record how many IPs we've served directory
+ info to in each country code.
o Minor features (security):
- Reject requests for reverse-dns lookup of names in a private
Modified: tor/trunk/configure.in
===================================================================
--- tor/trunk/configure.in 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/configure.in 2008-05-29 02:29:35 UTC (rev 14802)
@@ -87,6 +87,13 @@
;;
esac
+AC_ARG_ENABLE(geoip-stats,
+ AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+
+if test "$enable_geoip_stats" = "yes"; then
+ AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+fi
+
AC_ARG_ENABLE(gcc-warnings,
AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
Modified: tor/trunk/doc/TODO
===================================================================
--- tor/trunk/doc/TODO 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/doc/TODO 2008-05-29 02:29:35 UTC (rev 14802)
@@ -289,10 +289,10 @@
too much.
o teach geoip_parse_entry() to skip over lines that start with #, so we
can put a little note at the top of the geoip file to say what it is.
-N d we should have an off-by-default way for relays to dump geoip data to
+ . we should have an off-by-default way for relays to dump geoip data to
a file in their data directory, for measurement purposes.
- - Basic implementation
- - Include probability-of-selection
+ o Basic implementation
+N - Include probability-of-selection
R d let bridges set relaybandwidthrate as low as 5kb
R - bug: if we launch using bridges, and then stop using bridges, we
still have our bridges in our entryguards section, and may use them.
Modified: tor/trunk/src/common/util.c
===================================================================
--- tor/trunk/src/common/util.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/common/util.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -1559,7 +1559,6 @@
tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
#endif
new_file->fd = -1;
- tempname_len = strlen(fname)+16;
tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
new_file->filename = tor_strdup(fname);
if (open_flags & O_APPEND) {
@@ -1577,8 +1576,7 @@
new_file->rename_on_close = 1;
}
- if ((new_file->fd = open(open_name, open_flags, mode))
- < 0) {
+ if ((new_file->fd = open(open_name, open_flags, mode)) < 0) {
log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
open_name, fname, strerror(errno));
goto err;
Modified: tor/trunk/src/or/config.c
===================================================================
--- tor/trunk/src/or/config.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/config.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -179,6 +179,9 @@
V(DirPolicy, LINELIST, NULL),
V(DirPort, UINT, "0"),
OBSOLETE("DirPostPeriod"),
+#ifdef ENABLE_GEOIP_STATS
+ V(DirRecordUsageByCountry, BOOL, "0"),
+#endif
VAR("DirServer", LINELIST, DirServers, NULL),
V(DNSPort, UINT, "0"),
V(DNSListenAddress, LINELIST, NULL),
Modified: tor/trunk/src/or/connection_or.c
===================================================================
--- tor/trunk/src/or/connection_or.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/connection_or.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -901,7 +901,7 @@
} else {
/* only report it to the geoip module if it's not a known router */
if (!router_get_by_digest(conn->identity_digest))
- geoip_note_client_seen(TO_CONN(conn)->addr, now);
+ geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now);
}
if (conn->handshake_state) {
or_handshake_state_free(conn->handshake_state);
Modified: tor/trunk/src/or/directory.c
===================================================================
--- tor/trunk/src/or/directory.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/directory.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -2484,6 +2484,26 @@
goto done;
}
+#ifdef ENABLE_GEOIP_STATS
+ {
+ geoip_client_action_t act =
+ is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2;
+ uint32_t addr = conn->_base.addr;
+
+ if (conn->_base.linked_conn) {
+ connection_t *c = conn->_base.linked_conn;
+ if (c->type == CONN_TYPE_EXIT) {
+ circuit_t *circ = TO_EDGE_CONN(c)->on_circuit;
+ if (! CIRCUIT_IS_ORIGIN(circ)) {
+ or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn;
+ addr = orconn->_base.addr;
+ }
+ }
+ }
+ geoip_note_client_seen(act, addr, time(NULL));
+ }
+#endif
+
// note_request(request_type,dlen);
(void) request_type;
write_http_response_header(conn, -1, compressed,
Modified: tor/trunk/src/or/geoip.c
===================================================================
--- tor/trunk/src/or/geoip.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/geoip.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -131,7 +131,7 @@
* "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
* where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
* integers, and CC is a country code.
- *
+ *
* It also recognizes, and skips over, blank lines and lines that start
* with '#' (comments).
*/
@@ -208,9 +208,12 @@
typedef struct clientmap_entry_t {
HT_ENTRY(clientmap_entry_t) node;
uint32_t ipaddr;
- time_t last_seen;
+ time_t last_seen; /* The last 2 bits of this value hold the client
+ * operation. */
} clientmap_entry_t;
+#define ACTION_MASK 3
+
/** Map from client IP address to last time seen. */
static HT_HEAD(clientmap, clientmap_entry_t) client_history =
HT_INITIALIZER();
@@ -238,12 +241,28 @@
/** Note that we've seen a client connect from the IP <b>addr</b> (host order)
* at time <b>now</b>. Ignored by all but bridges. */
void
-geoip_note_client_seen(uint32_t addr, time_t now)
+geoip_note_client_seen(geoip_client_action_t action,
+ uint32_t addr, time_t now)
{
or_options_t *options = get_options();
clientmap_entry_t lookup, *ent;
- if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+ if (action == GEOIP_CLIENT_CONNECT) {
+ if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+ return;
+ } else {
+#ifndef ENABLE_GEOIP_STATS
return;
+#else
+ if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+ !options->DirRecordUsageByCountry)
+ return;
+#endif
+ }
+
+ /* We use the low 3 bits of the time to encode the action. Since we're
+ * potentially remembering times of clients, we don't want to make
+ * clientmap_entry_t larger than it has to be. */
+ now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
lookup.ipaddr = addr;
ent = HT_FIND(clientmap, &client_history, &lookup);
if (ent) {
@@ -328,7 +347,7 @@
* that country, and cc is a lowercased country code. Returns NULL if we don't
* want to export geoip data yet. */
char *
-geoip_get_client_history(time_t now)
+geoip_get_client_history(time_t now, geoip_client_action_t action)
{
char *result = NULL;
if (!geoip_is_loaded())
@@ -343,7 +362,10 @@
unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
unsigned total = 0;
HT_FOREACH(ent, clientmap, &client_history) {
- int country = geoip_get_country_by_ip((*ent)->ipaddr);
+ int country;
+ if (((*ent)->last_seen & ACTION_MASK) != action)
+ continue;
+ country = geoip_get_country_by_ip((*ent)->ipaddr);
if (country < 0)
continue;
tor_assert(0 <= country && country < n_countries);
@@ -404,6 +426,41 @@
return result;
}
+void
+dump_geoip_stats(void)
+{
+#ifdef ENABLE_GEOIP_STATS
+ time_t now = time(NULL);
+ char *filename = get_datadir_fname("geoip-stats");
+ char *data_v2 = NULL, *data_v3 = NULL;
+ char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
+ open_file_t *open_file = NULL;
+ FILE *out;
+
+ data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
+ data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
+ format_iso_time(since, geoip_get_history_start());
+ format_iso_time(written, now);
+ if (!data_v2 || !data_v3)
+ goto done;
+ out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
+ if (!out)
+ goto done;
+ if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
+ written, since, data_v3, data_v2) < 0)
+ goto done;
+
+ finish_writing_to_file(open_file);
+ open_file = NULL;
+ done:
+ if (open_file)
+ abort_writing_to_file(open_file);
+ tor_free(filename);
+ tor_free(data_v2);
+ tor_free(data_v3);
+#endif
+}
+
/** Helper used to implement GETINFO ip-to-country/... controller command. */
int
getinfo_helper_geoip(control_connection_t *control_conn,
Modified: tor/trunk/src/or/main.c
===================================================================
--- tor/trunk/src/or/main.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/main.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -832,6 +832,7 @@
static time_t time_to_clean_caches = 0;
static time_t time_to_recheck_bandwidth = 0;
static time_t time_to_check_for_expired_networkstatus = 0;
+ static time_t time_to_dump_geoip_stats = 0;
or_options_t *options = get_options();
int i;
int have_dir_info;
@@ -958,6 +959,12 @@
time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
}
+ if (time_to_dump_geoip_stats < now) {
+#define DUMP_GEOIP_STATS_INTERVAL (60*60);
+ time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
+ dump_geoip_stats();
+ }
+
/** 2. Periodically, we consider getting a new directory, getting a
* new running-routers list, and/or force-uploading our descriptor
* (if we've passed our internal checks). */
Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/or.h 2008-05-29 02:29:35 UTC (rev 14802)
@@ -2358,6 +2358,10 @@
* count of how many client addresses have contacted us so that we can help
* the bridge authority guess which countries have blocked access to us. */
int BridgeRecordUsageByCountry;
+#ifdef ENABLE_GEOIP_STATS
+ int DirRecordUsageByCountry;
+#endif
+
/** Optionally, a file with GeoIP data. */
char *GeoIPFile;
@@ -3294,13 +3298,27 @@
int geoip_get_n_countries(void);
const char *geoip_get_country_name(int num);
int geoip_is_loaded(void);
-void geoip_note_client_seen(uint32_t addr, time_t now);
+/** Indicates an action that we might be noting geoip statistics on.
+ * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
+ * the others, we're not.
+ */
+typedef enum {
+ /** We've noticed a connection as a bridge relay. */
+ GEOIP_CLIENT_CONNECT = 0,
+ /** We've served a networkstatus consensus as a directory server. */
+ GEOIP_CLIENT_NETWORKSTATUS = 1,
+ /** We've served a v2 networkstatus consensus as a directory server. */
+ GEOIP_CLIENT_NETWORKSTATUS_V2 = 2,
+} geoip_client_action_t;
+void geoip_note_client_seen(geoip_client_action_t action,
+ uint32_t addr, time_t now);
void geoip_remove_old_clients(time_t cutoff);
time_t geoip_get_history_start(void);
-char *geoip_get_client_history(time_t now);
+char *geoip_get_client_history(time_t now, geoip_client_action_t action);
int getinfo_helper_geoip(control_connection_t *control_conn,
const char *question, char **answer);
void geoip_free_all(void);
+void dump_geoip_stats(void);
/********************************* hibernate.c **********************/
Modified: tor/trunk/src/or/router.c
===================================================================
--- tor/trunk/src/or/router.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/router.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -1830,7 +1830,7 @@
geoip_remove_old_clients(now-48*60*60);
last_purged_at = now;
}
- geoip_summary = geoip_get_client_history(time(NULL));
+ geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT);
if (geoip_summary) {
char geoip_start[ISO_TIME_LEN+1];
format_iso_time(geoip_start, geoip_get_history_start());
Modified: tor/trunk/src/or/test.c
===================================================================
--- tor/trunk/src/or/test.c 2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/test.c 2008-05-29 02:29:35 UTC (rev 14802)
@@ -3908,28 +3908,28 @@
get_options()->BridgeRecordUsageByCountry = 1;
/* Put 9 observations in AB... */
for (i=32; i < 40; ++i)
- geoip_note_client_seen(i, now);
- geoip_note_client_seen(225, now);
+ geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
+ geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now);
/* and 3 observations in XY, several times. */
for (j=0; j < 10; ++j)
for (i=52; i < 55; ++i)
- geoip_note_client_seen(i, now-3600);
+ geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
/* and 17 observations in ZZ... */
for (i=110; i < 127; ++i)
- geoip_note_client_seen(i, now-7200);
- s = geoip_get_client_history(now+5*24*60*60);
+ geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200);
+ s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(s);
test_streq("zz=24,ab=16", s);
tor_free(s);
/* Now clear out all the zz observations. */
geoip_remove_old_clients(now-6000);
- s = geoip_get_client_history(now+5*24*60*60);
+ s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(! s); /* There are only 12 observations left. Not enough to
build an answer. Add 4 more in XY... */
for (i=55; i < 59; ++i)
- geoip_note_client_seen(i, now-3600);
- s = geoip_get_client_history(now+5*24*60*60);
+ geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
+ s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
test_assert(s);
test_streq("ab=16", s);
tor_free(s);
More information about the tor-commits
mailing list