Copied: torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py (from rev 17913, torflow/trunk/CircuitAnalysis/buildtimes.py)
--- torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py (rev 0)
+++ torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+# uses metatroller to collect circuit build times for 5% slices of guard nodes
+# [OUTPUT] one directory, with three files: StatsHandler aggregate stats file, file with all circuit events (for detailed reference), file with just buildtimes
+import socket,sys,time,getopt,os
+from TorCtl.TorUtil import meta_port,meta_host,control_port,control_host
+from TorCtl.StatsSupport import StatsHandler
+from TorCtl import PathSupport, TorCtl
+__selmgr = PathSupport.SelectionManager(
+ pathlen=3,
+ order_exits=True,
+ percent_fast=80,
+ percent_skip=0,
+ min_bw=1024,
+ use_all_exits=True,
+ uniform=True,
+ use_exit=None,
+ use_guards=True,
+ restrict_guards=True)
+class Connection(PathSupport.Connection):
+ """ thread quits when required number of circuits found, otherwise identical"""
+ def __init__(self,s):
+ PathSupport.Connection.__init__(self,s)
+ def _loop(self):
+ while 1:
+ try:
+ isEvent, reply = self._read_reply()
+ except:
+ self._err(sys.exc_info())
+ return
+ if isEvent:
+ if self._handler is not None:
+ self._eventQueue.put((time.time(), reply))
+ else:
+ cb = self._queue.get() # atomic..
+ cb(reply)
+ if self._handler is not None:
+ if self._handler.circ_failed + self._handler.circ_built >= self._handler.nstats:
+ print 'Finished gathering',self._handler.circ_failed + self._handler.circ_built,'circuits'
+ print self._handler.circ_failed,'failed',self._handler.circ_built,'built'
+ return
+class StatsGatherer(StatsHandler):
+ def __init__(self,c, selmgr,basefile_name,nstats):
+ StatsHandler.__init__(self,c, selmgr)
+ self.detailfile = open(basefile_name + '.detail','w')
+ self.buildtimesfile = open(basefile_name + '.buildtimes','w')
+ self.circ_built = 0
+ self.nstats = nstats
+ # sometimes relevant CircEvents occur before the circ_id is
+ # added to self.circuits, which means they get discarded
+ # we track them in self.othercircs: a dictionary of list of events
+ self.othercircs = {}
+ def circ_event_str(self,now,circ_event):
+ """ returns an string summarizing the circuit event"""
+ output = [circ_event.event_name, str(circ_event.circ_id),
+ circ_event.status]
+ if circ_event.path:
+ output.append(",".join(circ_event.path))
+ if circ_event.reason:
+ output.append("REASON=" + circ_event.reason)
+ if circ_event.remote_reason:
+ output.append("REMOTE_REASON=" + circ_event.remote_reason)
+ output = [now]+ output
+ outstr = ' '.join(output) + '\n'
+ return outstr
+ def add_missed_events(self,circ_id):
+ """ if there are events for a circuit that were missed, add them"""
+ if circ_id in self.othercircs:
+ for e_str in self.othercircs[circ_id]:
+ self.detailfile.write(e_str)
+ self.detailfile.flush()
+ # now in self.circuits, so can delete it from self.othercircs
+ del self.othercircs[circ_id]
+ def circ_status_event(self, circ_event):
+ """ handles circuit status event """
+ now = time.time()
+ now = '%3.10f' % now
+ if circ_event.circ_id in self.circuits.keys():
+ self.add_missed_events(circ_event.circ_id)
+ if circ_event.status == 'EXTENDED':
+ extend_time = circ_event.arrived_at-self.circuits[circ_event.circ_id].last_extended_at
+ self.circuits[circ_event.circ_id].extend_times.append(extend_time)
+ self.circuits[circ_event.circ_id].last_extended_at = circ_event.arrived_at
+ if circ_event.status == 'BUILT':
+ circ = self.circuits[circ_event.circ_id]
+ buildtime = reduce(lambda x,y:x+y,circ.extend_times,0.0)
+ self.buildtimesfile.write(str(circ.circ_id) + '\t' + str(buildtime) + '\n')
+ self.buildtimesfile.flush()
+ outstr = self.circ_event_str(now,circ_event)
+ self.detailfile.write(outstr)
+ self.detailfile.flush()
+ # check to see if done gathering data
+ if circ_event.status == 'BUILT': self.circ_built += 1
+ else:
+ #eventstr =
+ #if circ_event.circ_id in self.othercircs.keys():
+ if circ_event.circ_id not in self.othercircs.keys():
+ self.othercircs[circ_event.circ_id] = []
+ self.othercircs[circ_event.circ_id] += [self.circ_event_str(now,circ_event)]
+ StatsHandler.circ_status_event(self,circ_event)
+def getdata(filename,ncircuits):
+ """ starts stat gathering thread """
+ s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
+ s.connect((control_host,control_port))
+ c = Connection(s)
+ c.authenticate() # also launches thread...
+ h = StatsGatherer(c,__selmgr,filename,ncircuits)
+ c.set_event_handler(h)
+ c.set_events([TorCtl.EVENT_TYPE.STREAM,
+ return c
+def setargs():
+ ncircuits = ""
+ dirname = ""
+ filename = ""
+ if len(sys.argv[1:]) < 3:
+ usage()
+ sys.exit(2)
+ try:
+ opts,args = getopt.getopt(sys.argv[1:],"p:n:d:")
+ except getopt.GetoptError,err:
+ print str(err)
+ usage()
+ ncircuits=None
+ percentile=None
+ dirname=""
+ for o,a in opts:
+ if o == '-n':
+ if a.isdigit(): ncircuits = int(a)
+ else: usage()
+ elif o == '-d': dirname = a #directory where output files go
+ elif o == '-p':
+ if a.isdigit(): percentile = int(a)
+ else: usage()
+ else:
+ assert False, "Bad option"
+ return ncircuits,percentile,dirname
+def usage():
+ print 'usage: statscontroller.py [-p <#percentile>] -n <# circuits> -d <output dir name>'
+ sys.exit(1)
+def guardslice(p,ncircuits,dirname):
+ print 'Making new directory:',dirname
+ if not os.path.isdir(dirname):
+ os.mkdir(dirname)
+ else:
+ print 'Directory',dirname,'exists, not making a new one.'
+ print 'Guard percentiles:',p,'to',p+5
+ print '#Circuits',ncircuits
+ basefile_name = dirname + '/' + str(p) + '-' + str(p+5) + '.' + str(ncircuits)
+ aggfile_name = basefile_name + '.agg'
+ __selmgr.percent_fast = p+5
+ __selmgr.percent_skip = p
+ c = getdata(basefile_name,ncircuits)
+ for i in xrange(0,ncircuits):
+ print 'Building circuit',i
+ try:
+ # XXX: hrmm.. race conditions on the path_selectior members
+ # for the event handler thread?
+ # Probably only if streams end up coming in during this test..
+ circ = c.build_circuit(__selmgr.pathlen,__selmgr.path_selector)
+ c._handler.circuits[circ.circ_id] = circ
+ except TorCtl.ErrorReply,e:
+ plog("NOTICE","Error building circuit: " + str(e.args))
+ while True:
+ time.sleep(1)
+ if c._handler.circ_built + c._handler.circ_failed >= ncircuits:
+ print 'Done gathering stats for slice',p,'to',p+5,'on',ncircuits
+ print c._handler.circ_built,'built',c._handler.circ_failed,'failed'
+ break
+ c._handler.write_stats(aggfile_name)
+def main():
+ ncircuits,p,dirname = setargs()
+ if p is None:
+ # do all
+ for p in xrange(0,100,5):
+ guardslice(p,ncircuits,dirname)
+ else:
+ guardslice(p,ncircuits,dirname)
+if __name__ == '__main__':
+ main()
Copied: torflow/trunk/CircuitAnalysis/BuildTimes/numpy_pareto.py (from rev 17913, torflow/trunk/CircuitAnalysis/numpy_pareto.py)
--- torflow/trunk/CircuitAnalysis/BuildTimes/numpy_pareto.py (rev 0)
+++ torflow/trunk/CircuitAnalysis/BuildTimes/numpy_pareto.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,59 @@
+import numpy
+import pylab
+import matplotlib
+def loadbuildtimes():
+ f = open('40k_r1/45-50.40000.buildtimes')
+ vals = []
+ for line in f:
+ line = line.split('\t')
+ vals += [float(line[1].strip())*1000]
+ vals.sort()
+ vals.reverse()
+ return vals
+def pareto(x,k,Xm):
+ return k*(Xm**k)/(x**(k+1))
+#get buildtime data (in ms)
+Z = loadbuildtimes()
+# plot histogram.
+# args: values, number of bins, normalize y/n, width of bars
+pylab.hist(Z,len(Z) / 100.0, normed=True, width=5)
+#pareto parameters (taken from output of ./shufflebt.py buildtimes)
+#Resolution of histogram: 100 ms
+#Mean: 5746.8020777, mode: 1600
+#ParK: 0.918058347945
+#ModeN: 32775 vs integrated: 32394.9483089
+#successful runs: 41712
+k = 0.687880881456
+Xm = 1800
+n = 28921
+# args to a range: x start, x end
+X = pylab.arange(Xm, max(Z), 1) # max(Z), 0.1) # x values from 1 to max(Z) in increments of 0.1 (can adjust this to look at different parts of the graph)
+Y = map(lambda x: pareto(x,k,Xm), X) #pareto(x) (units: #measurements with value x)
+# verify sanity by integrating scaled distribution:
+modeNint = numpy.trapz(map(lambda x: n*pareto(x, k, Xm),
+ xrange(Xm,200000)))
+print modeNint
+print n*pareto(Xm, k, Xm)
+#draw pareto curve
+# X values plotted against Y values, will appear as blue circles b:blue o:circle
+#save figure
Copied: torflow/trunk/CircuitAnalysis/BuildTimes/shufflebt.py (from rev 17913, torflow/trunk/CircuitAnalysis/shufflebt.py)
--- torflow/trunk/CircuitAnalysis/BuildTimes/shufflebt.py (rev 0)
+++ torflow/trunk/CircuitAnalysis/BuildTimes/shufflebt.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,337 @@
+#!/usr/bin/env python
+# shufflebt.py
+# (c) Fallon Chen 2008
+# Shuffles a list of build times and produces a pdf of n of those buildtimes,
+# which are put into res (defaults to 100)ms blocks.
+# Requires gnuplot 4.2 and a version coreutils that provides sort -R
+# "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] <list of filenames>"
+# if outdir is not specified, the script will write files to the current directory
+# if a directory is given instead of a list of filenames, all files postfixed with '.buildtimes' will be processed
+import getopt,sys,os
+import popen2
+import math,copy
+from scipy.integrate import *
+from numpy import trapz
+import numpy
+import pylab
+import matplotlib
+class Stats:
+ def __init__(self,file):
+ self.f = open(file)
+ self.values = []
+ for line in self.f:
+ line = line.split('\t')
+ self.values += [float(line[1]) * 1000]
+ self.f.close()
+ self.buckets = {}
+ def mean(self):
+ # Borrowed from TorUtil
+ if len(self.values) > 0:
+ sum = reduce(lambda x,y: x+y,self.values,0.0)
+ return sum/len(self.values)
+ else:
+ return 0.0
+ def stddev(self):
+ # Borrowed from TorUtil
+ if len(self.values) > 1:
+ mean = self.mean()
+ sum = reduce(lambda x,y: x + ((y-mean)**2.0),self.values,0.0)
+ s = math.sqrt(sum/(len(self.values)-1))
+ return s
+ else:
+ return 0.0
+ def median(self):
+ if len(self.values) > 0:
+ values = copy.copy(self.values)
+ values.sort()
+ return values[(len(values) - 1)/2]
+ else:
+ return 0.0
+ def mode(self): # Requires makehistogram runs first
+ counts = {}
+ greatest_val = 0
+ greatest_idx = 0
+ for v in self.buckets.keys():
+ if self.buckets[v] > greatest_val:
+ greatest_idx = v
+ greatest_val = self.buckets[v]
+ return greatest_idx
+ def pyhist(self,res,histname):
+ bins = len(self.values) / res
+ print 'bins:',bins
+ x = matplotlib.numerix.arange(1,7000, 0.01)
+ S = pypareto(x,0.918058347945, 1600.0, 32775.0)
+ #pylab.hist(self.values,bins=bins,normed=False, width=1)
+ #(n,bins) = numpy.histogram(self.values,bins=bins,normed=False)
+ #pylab.plot(bins,n )
+ pylab.plot(x,S, 'bo')
+ #pylab.show()
+ pylab.savefig(histname + '.png')
+ # XXX: This doesn't seem to work for small #s of circuits
+ def makehistogram(self,res,histname):
+ #res = res /1000.0 # convert ms to s
+ values = copy.copy(self.values)
+ values.sort()
+ count = 0
+ i = 1
+ self.buckets = {}
+ for v in values:
+ if v < res * i: count += 1
+ else:
+ count += 1
+ self.buckets[int(res * i)] = count
+ #self.buckets[int(res * i * 10)] = count
+ i += 1
+ count = 0
+ f = open(histname,'w')
+ f.write('#build time <\t#circuits\n')
+ sortedkeys = self.buckets.keys()
+ sortedkeys.sort()
+ for b in sortedkeys:
+ towrite = str(b) + '\t' + str(self.buckets[b]) + '\n'
+ f.write(towrite)
+ f.close()
+ def paretoK(self, Xm):
+ n = 0
+ log_sum = 0
+ X = min(self.values)
+ for x in self.values:
+ if x < Xm: continue
+ n += 1
+ log_sum += math.log(x)
+ return n/(log_sum - n*math.log(Xm))
+ # Calculate the mean beyond a mode value
+ def modeMean(self, Xm):
+ n = 0
+ tot = 0
+ for x in self.values:
+ if x < Xm: continue
+ n += 1
+ tot += x
+ return tot/n
+ def modeN(self, Xm):
+ n = 0
+ for x in self.values:
+ if x < Xm: continue
+ n += 1
+ return n
+ def maxlikelihood(self,k):
+ # theta estimator for gamma PDF
+ # maxlikelihood estimator
+ # theta = sum(values) / N*k
+ return 10*sum(self.values)/(k * len(self.values))
+ def bayesian(self,k):
+ # bayesian estimator for gamma PDF
+ # y = sum(values)
+ # theta = y/(Nk - 1) +/- y^2/((Nk-1)^2(Nk -2))
+ y = sum(self.values) * 10
+ N = len(self.values)
+ mean = y/(N*k - 1)
+ sdev = (y*y)/((N*k - 1)* (N*k - 1) * (N*k - 2))
+ plus = mean + sdev
+ minus = mean - sdev
+ return plus,minus
+## Functions that return a gnuplot function string for a given distribution
+def gamma(k,theta, N,fname):
+ # gnuplot string for gamma PDF
+ # g(x,k,B) = (x**(k - 1) * B**k * exp(-B*x))/gamma(k)
+ B = 1.0/theta
+ ps = fname + '(x) = '+str(N)+'*((x**' + str(k-1) + ')*(' +str(B**k)+ ')*(exp(-' + str(B) +'*x)))' +'/gamma('+str(k)+')\n'
+ return ps
+def pareto(k,Xm,N,fname):
+ # gnuplot string for shifted, normalized exponential PDF
+ # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
+ ps = fname+'(x)=(x<='+str(Xm)+') ? 0 : (('+str((N*k)*(Xm**k))+')/((x)**('+str(k+1)+')))\n'
+ #ps = fname+'(x)='+str(N*k*(Xm**k))+'/x**('+str(k+1)+')\n'
+ return ps
+def pypareto(x, k,Xm):
+ # gnuplot string for shifted, normalized exponential PDF
+ # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
+ if x<Xm: return 0
+ else: return ((((k)*(Xm**k)))/((x)**((k+1))))
+def exp(mean,shift,N,fname):
+ # gnuplot string for normalized exponential PDF
+ # g(x,k,B) = N * l*exp(-l*(x-shift))
+ l = 1.0/mean
+ ps = fname+'(x)=(x<'+str(shift)+')?0:('+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+'))))\n'
+ return ps
+def shiftedExp(mean,shift,N,fname):
+ # gnuplot string for shifted, normalized exponential PDF
+ # g(x,k,B) = N * l*exp(-l*(x-shift))/(1+(1-exp(-l*shift)))
+ l = 1.0/mean
+ ps = fname+'(x)='+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+')))/(1+(1-exp(-'+str(l*shift)+')))\n'
+ return ps
+def poisson(u,N,fname):
+ ps = fname + "(x) = " + str(N) + "*(" + str(u) + "**x)*exp(-"+str(u)+")/gamma(x + 1)\n"
+ return ps
+def normal(u,d,N,fname):
+ ps = fname + "(x)="+str(int(N)/d)+"*(exp(-((x-"+str(u)+ ")**2)/"+str(2*d*d)+"))/sqrt(2*pi)\n"
+ return ps
+def usage():
+ print "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] [-r <res in ms>] <list of filenames>"
+ sys.exit(1)
+def intermediate_filename(infile,shuffle,truncate,outdir):
+ if not shuffle and not truncate: return os.path.abspath(infile)
+ intermediate = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
+ if truncate: intermediate.append(str(truncate))
+ if shuffle:
+ intermediate.append('shuffled')
+ return '.'.join(intermediate)
+def histogram_basefilename(infile,shuffle,truncate,res,outdir):
+ name = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
+ if truncate: name.append(str(truncate))
+ if shuffle: name.append('shuffled')
+ name.append('res' + str(res))
+ return '.'.join(name)
+def getargs():
+ # [-n <truncate to # circuits>] [-s] <list of filenames>
+ k = 3
+ res = 100
+ sort =False
+ truncate = None
+ graph = False
+ outdirname = "." # will write to current directory if not specified
+ filenames = []
+ if len(sys.argv) < 2: usage()
+ else:
+ arglen = len(sys.argv[1:])
+ i = 0
+ while (arglen - i) > 0:
+ if sys.argv[i+1] == '-s': sort = True
+ elif sys.argv[i+1] == '-n':
+ if not sys.argv[i + 2].isdigit(): usage()
+ truncate = sys.argv[i+2]
+ i += 1
+ elif sys.argv[i + 1] == '-g': graph = True
+ elif sys.argv[i + 1] == '-k':
+ k = float(sys.argv[i + 2])
+ i += 1
+ elif sys.argv[i+1] == '-d':
+ outdirname = sys.argv[i + 2]
+ i += 1
+ elif sys.argv[i+1] == '-r':
+ res = float(sys.argv[i+2])
+ i += 1
+ else:
+ filenames += [sys.argv[i+1]]
+ i += 1
+ return sort, truncate,graph,outdirname,filenames,k,res
+def shuffle(sort,truncate,filename,newfile):
+ if not sort and truncate is None: return
+ sortlocation = '/usr/local/bin/sort' #peculiarity of fallon's system
+ #sortlocation = 'sort'
+ if sort and truncate:
+ cmd = sortlocation + ' -R ' + filename + ' | head -n ' + truncate + ' > ' + newfile
+ elif sort and not truncate:
+ cmd = sortlocation + ' -R ' + filename + ' > ' + newfile
+ elif not sort and truncate:
+ cmd = 'cat ' + filename + ' | head -n ' + truncate + ' > ' + newfile
+ p = popen2.Popen4(cmd)
+ p.wait()
+if __name__ == "__main__":
+ sort, truncate,graph,dirname,filenames,k,res = getargs()
+ # make new directory
+ print 'Making new directory:',dirname
+ if not os.path.isdir(dirname):
+ os.mkdir(dirname)
+ else:
+ print 'Dir exists, not making a new one'
+ for filename in filenames:
+ if os.path.isdir(filename):
+ # shallow add of files in dir
+ for f in os.listdir(filename):
+ if f[-11:] == '.buildtimes':
+ filenames += [os.path.join(filename,f)]
+ filenames.remove(filename)
+ for filename in filenames:
+ print 'Processing',filename
+ print '------------------------------'
+ if not os.path.exists(filename):
+ print filename,'is not a valid path'
+ continue
+# if truncate and sort or truncate and not sort:
+# newfile = os.path.join(dirname, os.path.basename(filename) + '.' + truncate + '.shuffled')
+# elif sort and not truncate:
+# newfile = os.path.join(dirname , os.path.basename(filename) + '.shuffled')
+# else:
+# newfile = filename
+ newfile = intermediate_filename(filename,sort,truncate,dirname)
+ # shuffle, create new file
+ shuffle(sort,truncate,filename,newfile)
+ # create histogram from file
+ s = Stats(newfile)
+ histfilename = histogram_basefilename(filename,sort,truncate,res,dirname)
+ s.makehistogram(res,histfilename + '.hist')
+ mean = s.mean()
+ stddev = s.stddev()
+ median = s.median()
+ mode = s.mode() # relies on s.makehistogram for buckets
+ parK = s.paretoK(mode)
+ modeN = s.modeN(mode)
+ modeMean = s.modeMean(mode)
+ # verify sanity by integrating scaled distribution:
+ modeNint = trapz(map(lambda x: modeN* pypareto(x, parK, mode),
+ xrange(1,200000)))
+ print 'Resolution of histogram:',res,'ms'
+ print 'Mean: '+str(mean)+', mode: '+str(mode)
+ print 'ParK: '+str(parK)
+ print 'ModeN: '+str(modeN)+" vs integrated: "+str(modeNint)
+ print '#successful runs:',len(s.values)
+ # get stats
+ if graph:
+ # plot histogram
+ # args: values, # bins, normalize y/n, width of bars
+ pylab.hist(s.values,len(s.values) / res, normed=True,width=5)
+ #plot Pareto curve
+ X = pylab.arange(mode, max(s.values), 1)
+ Y = map(lambda x: pypareto(x, parK, mode), X)
+ n = len(s.values)
+ pylab.plot(X,Y,'b-')
+ #save figure
+ pylab.savefig(histfilename + '.png')
+ pylab.clf()
Copied: torflow/trunk/CircuitAnalysis/OPAddon/README-op-addon (from rev 17873, torflow/trunk/README-op-addon)
--- torflow/trunk/CircuitAnalysis/OPAddon/README-op-addon (rev 0)
+++ torflow/trunk/CircuitAnalysis/OPAddon/README-op-addon 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,290 @@
+* For instructions on how to get OP-Addon, see section 9
+* Installation instructions and prerequisites can be found in section 10
+1. Introduction to OP-Addon:
+ This software is intended for anybody who is researching or experimenting
+ with the circuit creation mechanisms and path selection in Tor, as well as
+ for ambitious Tor users, who want to optimize their performance in user-tasks
+ or otherwise customize Tor's method of path selection at their own risk.
+ The OP-Addon is a controller for Tor (clients) that is written in Python and
+ can be applied to any locally running onion proxy that has a control port
+ configured. By making use of the Tor control protocol, it replaces Tor's
+ default path selection and circuit management by highly configurable and
+ customizable mechanisms. Users can freely configure the method of path
+ selection that is to be used, while the created circuits can either be
+ evaluated regarding their performance, or specifically be used to handle user
+ streams, e.g. for browsing the web. Additionally, the add-on can be used to
+ run simulations that can be useful to determine a degree of anonymity a
+ certain method of path selection can provide, when using the current
+ network status (see section 7.).
+ Currently implemented performance tests include a method of measuring Tor
+ latencies that is based on violating the exit policy of the last hop in a
+ path. Using this method, it is possible to measure latencies of complete
+ n-hop circuits, as well as of single links between Tor routers (see sections
+ 5. & 6.). Further, OP-Addon can actively measure the throughput of created
+ circuits by explicitly requesting a number of bytes from a stream-server that
+ needs to be listening on the same host as OP-Addon. Such latency- and
+ throughput-tests can be used to compare the performance of circuits that were
+ created using different methods of path selection.
+ If you do not know what this is all about and plan to implement your own
+ application that creates circuits in a customized way or new measurings and
+ tests, please refer to section 8. The following sections will explain the
+ available features of OP-Addon that can be enabled and configured using
+ configuration options that are grouped into several sections (The file
+ 'pathrc.example' contains a commented example configuration).
+2. General configurations (sections HOST_PORT and CIRC_MANAGEMENT):
+ Since OP-Addon is a Tor controller, you will in any case need to provide the
+ host and port, where Tor is listening for control connections (defaults are
+ and 9051). OP-Addon will make use of control port authentication,
+ as soon as a convenient way for doing this is found. The configuration option
+ 'idle_circuits' lets the user specify a number of circuits that shall be
+ created preemptively. OP-Addon will try to keep this amount of general
+ purpose circuits (allowing exit to port 80) available in the background at
+ every time. 'idle_circuits' can be set to any integer number between 0 and n.
+ If it is set to 0, OP-Addon will create the first circuit with regard to the
+ destination of the first incoming application stream.
+3. Evaluations and user mode (section EVALUATE):
+ In the most basic configuration, OP-Addon will create the configured amount
+ of circuits, and wait for incoming streams from applications to attach them.
+ If any user wants to specifically evaluate the performance of circuits, where
+ the paths were created using an arbitrary configuration, she can make use of
+ the option 'evaluate'.
+ If 'evaluate' is set to 'yes', one additionally needs to specify the options
+ 'num_rtt_tests' (int) and 'num_bw_tests' (int). These specify the number of
+ tests to perform on each successfully created circuit, before actively
+ closing it down again. The mean value of the results from the RTT-tests is
+ written to a file, together with the setup duration of the specific circuit
+ and the (optionally) actively measured throughput. Every single line of the
+ results-file contains values received from a circuit in the following order:
+ setup_duration throughput average_RTT
+ Note that there will be at most one bandwidth-test, even if 'num_bw_tests' is
+ set to a number greater than 1 and the script 'stream-server.pl' needs to be
+ run on the _same_ host as OP-Addon for measuring a circuit's throughput. The
+ add-on will then connect to this server, using the circuit that is to be
+ tested, and request a number of bytes that is then actively transferred. This
+ is implemented using a simple protocol, where the server parses its input and
+ uses the first occuring integer on a line as the amount of bytes to send to
+ the client (see 'stream-server.pl').
+ Further, the option 'num_records' is used to specify the total amount of
+ circuits that is to be tested, before terminating the actual evaluation.
+ Note that 'evaluate' is NOT useful for transporting user traffic at all,
+ since every circuit will be closed, as soon as all the tests have completed.
+ If 'evaluate' is set to 'no', OP-Addon is running in user mode. In user mode,
+ the script simply maintains the specified amount of circuits created with the
+ configured method of path selection at every time, waiting to handle incoming
+ user streams. One can optionally specify that circuits shall be 'pinged' with
+ any configurable frequency (see 5.), and hence a ranked list of the circuits
+ will be maintained. Incoming user streams are then attached to the first
+ suitable circuit on the sorted list. In both of the modes, OP-Addon will
+ record general circuit creation statistics about _all_ created circuits to a
+ file ('circ-setup-stats'), including the median and mean setup duration,
+ min/max values and the number of failures that occurred during circuit
+ setups, as well as on already established circuits.
+4. Basic path selection configuration (sections NODE_SELECTION and GEOIP):
+ The method of path selection that shall be used can be freely configured
+ using configuration options from the sections NODE_SELECTION and GEOIP.
+ Internally this is done by combining arbitrary restrictions on the selection
+ of single nodes, as well as on complete paths. It is possible to choose from
+ different node generators and node/path restrictions by changing options in
+ the configuration. Some of the implemented restrictions make use of
+ geographical data (using the geoip library for Python from
+ http://www.maxmind.com) to consider the location of routers while choosing.
+ This can be used to ensure a specific geographic (non-)diversity of the
+ routers in a path, especially lower and upper bounds regarding the diversity
+ of routers in paths. But it is also possible to apply any non-geographic
+ restrictions, like explicitly specifying an exit node to be used, or the
+ length of the generated paths, as a basic example of a path restriction. The
+ following is a list of already implemented generators and restrictions that
+ can be configured using the following options from the config-file:
+ General:
+ * pathlen: specify the number of hops to be used in paths
+ * min_bw: specify a min-value for advertised bandwidth of routers
+ * exit_node: explicitly specify an exit node by its nickname or IDhex
+ * use_guards: choose guards on the entry positions (yes|no)
+ NodeGenerators:
+ * uniform: choose nodes uniformly (yes|no), can be combined with
+ * ordered_exits: choose exits from an ordered list
+ * uniform=no --> weighted selection regarding advertised bandwidths
+ GeoIP:
+ * unique_country:
+ - 'yes' will enforce distinct countries for all hops in a path
+ - 'no' will put all hops in the same country,
+ - comment out means do not care about countries
+ * entry_, middle_, exit_country: specify countries for positions
+ * continent_crossings:
+ - 0-n specifies the max number of continent hops in a single path
+ - comment this out to choose all hops on different continents
+ * ocean_crossings:
+ - 0-n specifies the max number of ocean crossings in a single path.
+ This is done by grouping the continents in three groups and
+ considerating crossings between these groups:
+ 1. North and South America
+ 2. Europe, Africa and Asia
+ 3. Oceania
+ - comment out to not care about ocean crossings
+ * TODO: echelon (entry in the sender`s, exit in the destination`s country)
+ * TODO: excludes (list of countries to exclude from path selection)
+ To extend these path selection features or to add new restrictions to be
+ applied to single nodes or complete paths, one can easily design and
+ implement new Node or PathRestrictions using the existing interfaces from
+ TorFlow.
+5. Latency measurements (section RTT):
+ It is possible to use OP-Addon to measure latencies of complete circuits, as
+ well as of single links between routers. By setting 'ping_circs' to 'yes',
+ OP-Addon will ping the complete circuits that are currently available with a
+ frequency that is specified by 'frequency' (in seconds given as float).
+ Additionally an initial interval needs to be specified, that shall be waited,
+ before triggering the first ping. Since most of the circuit creations need
+ less then 6 seconds, something like 10 seconds will be a safe value. Further
+ OP-Addon can be configured to close a circuit after n timeouts experienced
+ during measurement, where n is configured using 'timeout_limit'.
+ Measurements of RTTs are done by sending a relay connect cell, heading to a
+ destination that the exit policy of last router in the path will surely deny.
+ This destination is set in 'ping_dummy_*' options and the values in
+ pathrc.example are working well ( and port 100). Since OP-Addon will
+ try to connect somewhere over Tor, also the Tor SOCKS-host and -port need to
+ be specified (mostly and 9050).
+6. Circuit creation based on measured latencies (section MODEL):
+ Because of the leaky-pipe circuit topology in Tor, it is possible to address
+ every hop in a created circuit as the exit node for a stream. OP-Addon
+ implements a technique to measure and store RTTs of single links between
+ routers, by using every hop in a path as the exit once. The subtracted
+ results of this measurements are stored in a graph model that represents the
+ currently known Tor subnet of a client. Setting 'network_model' to 'yes' will
+ enable this measurings, as well as circuit creation from the network model.
+ The 'max_rtt' option lets users specify a maximum RTT value to choose only
+ paths below this threshold (seconds given as float, e.g. 0.5). The actual
+ selection from all suitable paths, that are currently found in the model, is
+ done in a probabilistic way, weighting path proposals by their (summed up)
+ latencies, combined with the minimum advertised bandwidth of the routers in
+ the path. Using another option ('min_proposals'), OP-Addon will begin to
+ create circuits from the model only if there are 'min_proposals'
+ suitable path proposals found, satisfying the configured threshold on RTTs.
+ If the intension is to grow a network model only, without creating circuits
+ based on it, set 'min_ratio' to 1. 'min_ratio' defines the ratio of available
+ circuits that were *not* created based on measurings. Setting it to 0.5 will
+ enforce that at most 50% of the circuits in the pool were created from the
+ model at every time. This can ensure steady growing of the network model,
+ while already choosing paths from it for building circuits. Setting
+ 'min_ratio' to 0 will lead to circuits created from the model only. This
+ might be useful, if one wants to use a model, but not to extend or refresh it
+ anymore. The regular circuits are created using the parameters defined in
+ section 4.
+7. Using OP-Addon to run simulations:
+ Another feature of OP-Addon is to run simulations using any given method of
+ path selection, by specifying the argument '--simulate' plus a number 'n' to
+ specify the number of paths that shall be generated. When running a
+ simulation, OP-Addon simply generates n paths employing the method of path
+ selection that is given by the configuration file, without actually creating
+ any circuits. The control connection to the Tor process is therefore used
+ only for querying the list of all currently known routers. An example
+ simulation (generating 100000 paths) can be run by typing:
+ ./op-addon pathrc.example --simulate 100000
+ Any algorithm can be specified to be used in the simulation, even those that
+ choose paths from a given network model. Afterwards, the created paths are
+ evaluated with regard to the degree of anonymity they would provide, e.g.~the
+ anonymity would be poor, if the same path would be chosen 100000 times.
+ Since nodes are mostly not chosen uniformly, it is necessary to calculate
+ empirical probabilities, to determine the actual distribution of the nodes to
+ the positions in paths. If many paths are created, this makes it possible to
+ empirically measure the quality of protection certain methods of path
+ selection can provide. Much more work could be done here to introduce
+ additional methods for analyzing the generated paths regarding several
+ possible attacks.
+8. Implementing custom tests and measurings:
+ Anybody who wants/needs to implement his/her own custom measurings or
+ performance tests, probably will need to write an event handler that extends
+ from the existing classes in PathSupport.py, similar to the PingHandler
+ contained in OP-Addon. Therefore consider CircuitHandler, which is a class
+ that simply maintains a pool of circuits of configurable size, created with
+ any given method depending on the configuration. The StreamHandler class is
+ extending from the CircuitHandler and generally handles the attaching of
+ streams to circuits from the pool. You therefore might want to extend from
+ the StreamHandler for implementing your own tests.
+9. Instructions to get OP-Addon:
+ OP-Addon is part of the 'TorFlow' project that is hosted within the Tor
+ subversion. To check out the latest revision, 'cd' to the directory where
+ you want to install and type:
+ svn checkout https://tor-svn.freehaven.net/svn/torflow/trunk torflow
+10. Prerequisites and instructions to run OP-Addon:
+ Note that Linux is the only operating system, that OP-Addon was tested on
+ until now, but it might also run on other platforms. Let me know, if you
+ experimented with Windows or any other OS.
+ To run OP-Addon, you will need a Python interpreter and a running Tor client
+ with the ControlPort set (control port authentication is currently not yet
+ supported). Note that if you plan to measure latencies of single links
+ between routers, you need to compile the Tor client from source and to apply
+ a patch that allows to measure the latency from the proxy to the first hop
+ ('one-hop.diff' is included in the distribution in the '/tordiffs'-folder).
+ To make use of the complete functionalities, it is further necessary to
+ install the following Python libraries:
+ - GeoIP [http://www.maxmind.com/app/python]
+ - NetworkX [https://networkx.lanl.gov]
+ - SocksiPy [get it from http://socksipy.sourceforge.net/]
+ On Debian systems, the first two libraries can be installed by simply running:
+ apt-get install python-geoip networkx
+ To run OP-Addon, simply 'cd' to the installation directory and start the
+ script by calling:
+ ./op-addon.py [config-file]
+ If no config-file is given, OP-Addon will try to find 'pathrc.example',
+ which is included in the distribution. It is intended to be copied and
+ modified though.
+(c) 2007 Johannes Renner (renner <AT> i4.informatik.rwth-aachen.de)
Copied: torflow/trunk/CircuitAnalysis/OPAddon/TODO-op-addon (from rev 17873, torflow/trunk/TODO-op-addon)
--- torflow/trunk/CircuitAnalysis/OPAddon/TODO-op-addon (rev 0)
+++ torflow/trunk/CircuitAnalysis/OPAddon/TODO-op-addon 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,25 @@
+TODO-lists regarding OP-Addon:
+Implementation tasks:
+ - Perform DNS requests within OP-Addon to make 'echelon' possible for given
+ URLs. Currently 'echelon' is working for given IPs only.
+ - This needs also integration into circuit management: If there is currently
+ a circuit available fulfilling the echelon-property regarding the current
+ request, then use this circuit and do not create a new one. Else create a
+ new circuit in the echelon-style.
+ - Add port-history learning to StreamHandler or CircuitHandler and/or
+ port-preconfiguring to be able to configure which ports will be needed.
+ - Validate any given configurations.
+ - Add a convenient method of control port authentication.
+ - Modify OP-Addon to _not_ measure latencies to the first hop, to make
+ one-hop.diff obsolete (would it still be useful?).
+ - Modify OP-Addon to make it possible to connect to hidden services?
+ - Implement new events in TorCtl.py (GUARD, DESCCHANGED, STATUS_*, ...)?
+Research tasks:
+ - What is a beneficial network-model and how long does it take to learn it?
+ - What is a reasonable method of analyzing big amounts of generated paths to
+ empirically determine a degree of anonymity 'd' of the used method of path
+ selection?
+ - Ideally this method would consider _all_ aspects that somehow influence
+ anonymity of users. Collect these!
Copied: torflow/trunk/CircuitAnalysis/OPAddon/op-addon.py (from rev 17873, torflow/trunk/op-addon.py)
--- torflow/trunk/CircuitAnalysis/OPAddon/op-addon.py (rev 0)
+++ torflow/trunk/CircuitAnalysis/OPAddon/op-addon.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,1364 @@
+ Copyright (C) 2007,2008 Johannes Renner
+ Contact: renner <AT> i4.informatik.rwth-aachen.de
+import os
+import re
+import sys
+import copy
+import math
+import time
+import random
+import socket
+import threading
+import Queue
+import ConfigParser
+from TorCtl import *
+from TorCtl.TorUtil import plog, sort_list
+## CONFIGURATION ##############################################################
+# Set the version
+VERSION = "0.01"
+# Path to the data directory
+DATADIR = "data/"
+# Our IP-address
+IP = None
+# Simulation modus
+# Try to get the config-file from the commandline first
+if len(sys.argv) == 1:
+ CONFIG_FILE = "pathrc.example"
+elif len(sys.argv) == 2:
+ CONFIG_FILE = sys.argv[1]
+# Check if '--simulate' is given
+elif len(sys.argv) == 3 or len(sys.argv) == 4:
+ if sys.argv[2] == "--simulate":
+ CONFIG_FILE = sys.argv[1]
+ else:
+ plog("ERROR", "Unknown argument: '" + sys.argv[2] + "' exiting.")
+ sys.exit(0)
+ plog("ERROR", "Too many arguments, exiting.")
+ sys.exit(0)
+# Set some defaults for string-variables that can be None
+string_defaults = {"use_exit":None, "entry_country":None,
+ "middle_country":None, "exit_country":None}
+config = ConfigParser.SafeConfigParser(string_defaults)
+if os.path.exists(CONFIG_FILE):
+ plog("INFO", "Loading configuration from '" + CONFIG_FILE + "'")
+ config.read(CONFIG_FILE)
+ plog("ERROR", "Config file '" + CONFIG_FILE + "' does not exist, exiting.")
+ sys.exit(0)
+# Different configuration sections
+RTT = "RTT"
+# Measure RTTs of circuits
+ping_circs = config.getboolean(RTT, "ping_circs")
+network_model = False
+if ping_circs:
+ import socks
+ # Hosts and ports to use for ping streams
+ socks_host = config.get(RTT, "socks_host")
+ socks_port = config.getint(RTT, "socks_port")
+ # Choose randomly from a set of hosts/ports?
+ ping_dummy_host = config.get(RTT, "ping_dummy_host")
+ ping_dummy_port = config.getint(RTT, "ping_dummy_port")
+ # Sleep interval between working loads in sec
+ initial_interval = config.getfloat(RTT, "initial_interval")
+ frequency = config.getfloat(RTT, "frequency")
+ # Close a circ after n timeouts
+ timeout_limit = config.getint(RTT, "timeout_limit")
+ # Set to True to measure RTTs of partial circuits,
+ # also enables circuit creation from the model
+ network_model = config.getboolean(MODEL, "network_model")
+ if network_model:
+ import networkx
+ # RTT-threshold when creating circs from the model
+ max_rtt = config.getfloat(MODEL, "max_rtt")
+ # Minimum number of proposals to choose from
+ min_proposals = config.getint(MODEL, "min_proposals")
+ # Min ratio of traditionally created circs
+ # ensures growing of the explored subnet
+ min_ratio = config.getfloat(MODEL, "min_ratio")
+ # Testing mode: Collect latencies of circuits and links in the
+ # network. Close circuits after num_xx_tests measures and involve
+ # a FileHandler to write data to a file
+ EVAL_MODE = config.getboolean(EVALUATE, "evaluate")
+ num_rtt_tests = config.getint(EVALUATE, "num_rtt_tests")
+ num_bw_tests = config.getint(EVALUATE, "num_bw_tests")
+ num_records = config.getint(EVALUATE, "num_records")
+def get_geoip_config():
+ """ Read the geoip-configuration from the config-file """
+ # Check for GeoIP
+ if config.getboolean(GEOIP, "use_geoip"):
+ # Set optional parameters to 'None'
+ unique_countries = None
+ max_continent_crossings = None
+ max_ocean_crossings = None
+ if config.has_option(GEOIP, "unique_countries"):
+ unique_countries = config.getboolean(GEOIP, "unique_countries")
+ if config.has_option(GEOIP, "max_continent_crossings"):
+ max_continent_crossings = config.getint(GEOIP, "max_continent_crossings")
+ if config.has_option(GEOIP,"max_ocean_crossings"):
+ max_ocean_crossings = config.getint(GEOIP, "max_ocean_crossings")
+ path_config = GeoIPSupport.GeoIPConfig(
+ unique_countries,
+ max_continent_crossings,
+ max_ocean_crossings,
+ entry_country = config.get(GEOIP, "entry_country"),
+ middle_country = config.get(GEOIP, "middle_country"),
+ exit_country = config.get(GEOIP, "exit_country"),
+ excludes = None)
+ else: path_config = None
+ return path_config
+# Configure the SelectionManager here!!
+# Do NOT modify this object directly after it is handed to
+# PathBuilder, Use PathBuilder.schedule_selmgr instead.
+__selmgr = PathSupport.SelectionManager(
+ pathlen= config.getint(NODE_SELECTION, "pathlen"),
+ order_exits = config.getboolean(NODE_SELECTION, "order_exits"),
+ percent_fast = config.getint(NODE_SELECTION, "percent_fast"),
+ percent_skip = config.getint(NODE_SELECTION, "percent_skip"),
+ min_bw = config.getint(NODE_SELECTION, "min_bw"),
+ use_all_exits = config.getboolean(NODE_SELECTION, "use_all_exits"),
+ uniform = config.getboolean(NODE_SELECTION, "uniform"),
+ use_exit = config.get(NODE_SELECTION, "use_exit"),
+ use_guards = config.getboolean(NODE_SELECTION, "use_guards"),
+ geoip_config = get_geoip_config())
+## Connection #################################################################
+class Connection(PathSupport.Connection):
+ """ Connection-class that uses the RTTCircuit-class
+ TODO: add the circuit class to be used """
+ def build_circuit(self, pathlen, path_sel):
+ circ = Circuit()
+ circ.path = path_sel.build_path(pathlen)
+ circ.exit = circ.path[pathlen-1]
+ circ.circ_id = self.extend_circuit(0, circ.id_path())
+ return circ
+ def build_circuit_from_path(self, path):
+ """ Build circuit using a given path (= router-objects),
+ used to build circuits from a NetworkModel """
+ circ = Circuit()
+ circ.path = path
+ circ.exit = path[len(path)-1]
+ circ.circ_id = self.extend_circuit(0, circ.id_path())
+ return circ
+## Stats ######################################################################
+class Stats:
+ """ Statistics class that is used for recording stats """
+ def __init__(self):
+ self.values = []
+ self.min = 0.0
+ self.max = 0.0
+ self.mean = 0.0
+ self.dev = 0.0
+ self.median = 0.0
+ def add_value(self, value):
+ """ Add a value to the stats """
+ self.values.append(value)
+ # Set min & max
+ if self.min == 0: self.min = value
+ elif self.min > value: self.min = value
+ if self.max < value: self.max = value
+ # Refresh everything
+ self.mean = self._mean()
+ self.dev = self._dev()
+ self.median = self._median()
+ def _mean(self):
+ """ Compute mean from the values """
+ if len(self.values) > 0:
+ sum = reduce(lambda x, y: x+y, self.values, 0.0)
+ return sum/len(self.values)
+ else:
+ return 0.0
+ def _dev(self):
+ """ Return the stddev of the values """
+ if len(self.values) > 1:
+ mean = self._mean()
+ sum = reduce(lambda x, y: x + ((y-mean)**2.0), self.values, 0.0)
+ s = math.sqrt(sum/(len(self.values)-1))
+ return s
+ else:
+ return 0.0
+ def _median(self):
+ """ Return the median of the values """
+ if len(self.values) > 0:
+ values = copy.copy(self.values)
+ values.sort()
+ return values[(len(values)-1)/2]
+ else: return 0.0
+## CircuitBuildingStats #######################################################
+class CircuitBuildingStats(Stats):
+ """ Create an instance of this and gather overall circuit stats """
+ def __init__(self):
+ Stats.__init__(self)
+ self.failures_buildup = 0 # Failures during buildup
+ self.failures_established = 0 # Failures on established
+ def to_string(self):
+ """ Create a string for writing to a file """
+ s = "Successful circuit buildups: "
+ s += str(len(self.values)) + " records, median=" + str(self.median)
+ s += " s, avg=" + str(self.mean) + " s"
+ s += ", dev=" + str(self.dev) + " s (min=" + str(self.min)
+ s += " s, max=" + str(self.max) + " s)\n"
+ s += "Failures during circuit buildups: " + str(self.failures_buildup) + "\n"
+ s += "Failures on established circuits: " + str(self.failures_established)
+ return s
+## FileHandler ################################################################
+class FileHandler:
+ """ FileHandler class for writing/appending collected data to a file """
+ def __init__(self, filename):
+ self.filename = filename
+ def write(self, line):
+ self.filehandle = open(self.filename, 'w')
+ self.filehandle.write(line + "\n")
+ self.filehandle.close()
+ def append(self, line):
+ self.filehandle = open(self.filename, 'a')
+ self.filehandle.write(line + "\n")
+ self.filehandle.close()
+ def get_line_count(self):
+ self.filehandle = open(self.filename)
+ lines = self.filehandle.readlines()
+ # Close handle?
+ return len(lines)
+## Circuit & Stream ###########################################################
+class Circuit(PathSupport.Circuit):
+ """ Circuit class extended to RTTs and related stats """
+ def __init__(self):
+ PathSupport.Circuit.__init__(self)
+ # RTT stuff
+ self.part_rtts = {} # Dict of partial RTTs, pathlen 3: 1-2-None
+ self.current_rtt = None # Double (sec): current ranking of this circ
+ self.stats = Stats() # Stats about total RTT, contains the history
+ # Counters and flags
+ self.age = 0 # Age in rounds
+ self.timeout_counter = 0 # Timeout limit
+ self.rtt_created = False # Created from the model
+ # XXX: BW stuff
+ self.bw = 0
+ self.bw_tested = False
+ def add_rtt(self, rtt):
+ """ Add a new value and refresh stats and current """
+ # Set current circuit-ranking
+ if self.current_rtt == None:
+ self.current_rtt = rtt
+ else:
+ # Weight the current value with the previous
+ self.current_rtt = (self.current_rtt * 0.5) + (rtt * 0.5)
+ plog("DEBUG", "Computing new current RTT from " + str(rtt) + " to " +
+ str(self.current_rtt))
+ # Add a new RTT to the stats
+ self.stats.add_value(rtt)
+ # Increase the age
+ self.age += 1
+ def to_string(self):
+ """ Create a current string representation """
+ s = "Circuit " + str(self.circ_id) + ": "
+ for r in self.path: s += " " + r.nickname + "(" + str(r.country_code) + ")"
+ if not self.built: s += " (not yet built)"
+ else: s += " (age=" + str(self.age) + ")"
+ if self.current_rtt:
+ s += ": " "RTT [current (median/mean/dev)]: "
+ s += str(self.current_rtt) + " (" + str(self.stats.median) + "/"
+ s += str(self.stats.mean) + "/" + str(self.stats.dev) + ")"
+ if self.rtt_created: s += "*"
+ if self.bw > 0: s+= "\n\t --> bw = " + str(self.bw) + " byte/s"
+ return s
+class Stream(PathSupport.Stream):
+ """ Stream class extended to hop """
+ def __init__(self, sid, host, port, kind):
+ PathSupport.Stream.__init__(self, sid, host, port, kind)
+ self.hop = None # Save hop if this is a ping, hop=None is complete circ
+ self.bw_timestamp = None # Timestamp of the last stream_bw event
+## NetworkModel ###############################################################
+class TorLink:
+ """ This class contains infos about a link: source, destination, RTT
+ plus: rtt_history, methods to compute stats, etc. """
+ def __init__(self, src, dest, rtt=0):
+ # Set src and dest
+ self.src = src
+ self.dest = dest
+ # The current value
+ self.current_rtt = None
+ # Set the RTT
+ self.add_rtt(rtt)
+ def add_rtt(self, rtt):
+ # Compute new current value from the last
+ if self.current_rtt == None:
+ self.current_rtt = rtt
+ else:
+ self.current_rtt = (self.current_rtt * 0.5) + (rtt * 0.5)
+ plog("DEBUG", "Computing new current RTT from " + str(rtt) + " to " +
+ str(self.current_rtt))
+class PathProposal:
+ """ Instances of this class are path-proposals found in the model """
+ def __init__(self, links, path):
+ # This is a list of TorLink objects
+ self.links = links
+ # Cut off the ROOT here
+ self.path = path[1:len(path)]
+ # Compute the expected RTT
+ self.rtt = reduce(lambda x,y: x + y.current_rtt, self.links, 0.0)
+ self.rtt_score = 0 # RTT score
+ self.bw_score = 0 # BW score
+ self.min_bw = 0 # Minimum bw of routers in path
+ self.ranking_index = None # Index computed from bw and RTT
+ def to_string(self):
+ """ Create a string for printing out information """
+ s = ""
+ for l in self.links:
+ s += str(l.src) + "--" + l.dest + " (" + str(l.current_rtt) + ") " + ", "
+ return s + "--> " + str(self.rtt) + " sec"
+class NetworkModel:
+ """ This class is used to record measured RTTs of single links in a model
+ of the 'currently explored subnet' (undirected graph) """
+ def __init__(self, routers):
+ """ Constructor: pass the list of routers """
+ self.pickle_path = DATADIR + "network-model.pickle"
+ self.logfile = None # FileHandler(DATADIR + "proposals")
+ self.proposals = [] # Current list of path proposals
+ self.prefixes = {} # Prefixes for DFS
+ self.routers = routers # Link to the router-list
+ self.target_host = None
+ self.target_port = None
+ self.max_rtt = 0
+ try:
+ self.graph = self.load_graph()
+ self.up_to_date = False
+ except:
+ plog("INFO", "Could not load a model, creating a new one ..")
+ self.graph = networkx.XGraph(name="Tor Subnet")
+ self.graph.add_node(None)
+ self.up_to_date = True
+ self.print_info()
+ plog("INFO", "NetworkModel initiated")
+ def save_graph(self):
+ """ Write the graph to a binary file """
+ start = time.time()
+ networkx.write_gpickle(self.graph, self.pickle_path)
+ plog("INFO", "Stored Tor-graph to '" + self.pickle_path +
+ "' in " + str(time.time()-start) + " sec")
+ def load_graph(self):
+ """ Load a graph from a binary file and return it """
+ graph = networkx.read_gpickle(self.pickle_path)
+ plog("INFO", "Loaded Tor-graph from '" + self.pickle_path + "'")
+ return graph
+ def add_link(self, src, dest, rtt):
+ """ Add link to the graph given src, dest (router-ids) & RTT (TorLink) """
+ self.graph.add_edge(src, dest, TorLink(src, dest, rtt))
+ def add_circuit(self, c):
+ """ Check if we can compute RTTs of single links for a circuit
+ and store these in the model """
+ # Get the length
+ path_len = len(c.path)
+ # Go through the path
+ for i in xrange(1,path_len):
+ if i in c.part_rtts:
+ # First hop --> add Link from Root to 1
+ if i == 1:
+ link_rtt = c.part_rtts[i]
+ self.add_link(None, c.path[i-1].idhex, link_rtt)
+ # Handle i -- (i+1)
+ if i+1 in c.part_rtts:
+ link_rtt = c.part_rtts[i+1] - c.part_rtts[i]
+ if link_rtt > 0:
+ plog("INFO", "Computed link-RTT " + str(i) + ": " + str(link_rtt))
+ self.add_link(c.path[i-1].idhex, c.path[i].idhex, link_rtt)
+ else:
+ plog("WARN", "Negative link-RTT " + str(i) + ": " + str(link_rtt))
+ # Handle (n-1) -- n
+ elif None in c.part_rtts:
+ # We have a total value
+ link_rtt = c.part_rtts[None] - c.part_rtts[i]
+ if link_rtt > 0:
+ plog("INFO", "Computed link-RTT " + str(i) + ": " + str(link_rtt))
+ self.add_link(c.path[i-1].idhex, c.path[i].idhex, link_rtt)
+ else:
+ plog("WARN", "Negative link-RTT " + str(i) + ": " + str(link_rtt))
+ self.up_to_date = False
+ def delete_node(self, idhex):
+ """ Delete a router from the model """
+ if idhex in self.graph:
+ # Delete links first
+ edges = self.graph.edge_boundary(idhex)
+ for e in edges:
+ self.graph.delete_edge(e)
+ # Then remove the node
+ self.graph.delete_node(idhex)
+ plog("INFO", "Deleted node with ID " + idhex + " from the model")
+ self.up_to_date = False
+ def update(self):
+ """ Update model with the current list of routers """
+ nodes = self.graph.nodes()
+ for id in nodes:
+ if not id in self.routers:
+ if id:
+ plog("INFO", "Router with ID " + id +
+ " is not known, deleting node ..")
+ self.delete_node(id)
+ plog("INFO", "Updated model with current router-list")
+ def set_target(self, host, port, max_rtt=0):
+ """ Change the properties for generating paths """
+ if self.target_host != host or self.target_port != port\
+ or self.max_rtt != max_rtt:
+ self.target_host = host
+ self.target_port = port
+ self.max_rtt = max_rtt
+ self.up_to_date = False
+ plog("INFO", "Set the target to "+self.target_host+":"+
+ str(self.target_port))
+ def generate_proposals(self):
+ """ Call visit() on the root-node """
+ self.update()
+ # Reset list of proposals and prefixes for DFS
+ self.proposals = []
+ self.prefixes.clear()
+ start = time.time()
+ # Start the search
+ self.visit(None, [])
+ self.up_to_date = True
+ plog("INFO", "Generating " + str(len(self.proposals)) +
+ " proposals took " + str(time.time()-start) +
+ " seconds [max_rtt=" + str(self.max_rtt) + "]")
+ def get_link_info(self, path):
+ """ From a path given as list of ids, return link-infos """
+ links = []
+ for i in xrange(0, len(path)-1):
+ links.append(self.graph.get_edge(path[i], path[i+1]))
+ return links
+ def visit(self, node, path, i=1):
+ """ Recursive Depth-First-Search: Maybe use some existing methods """
+ if node not in path:
+ path.append(node)
+ # Root -- Exit
+ if len(path) == 4:
+ # This could be an option
+ if "Exit" in self.routers[node].flags:
+ # XXX: Performance problem?
+ if self.routers[node].will_exit_to(self.target_host, self.target_port):
+ p = PathProposal(self.get_link_info(path), path)
+ if self.max_rtt > 0:
+ if p.rtt <= self.max_rtt:
+ self.proposals.append(p)
+ else: self.proposals.append(p)
+ else:
+ self.prefixes[i] = path
+ # The graph is also a dict
+ for n in self.graph[node]:
+ if n not in self.prefixes[i]:
+ self.visit(n, copy.copy(self.prefixes[i]), i+1)
+ def keys_to_routers(self, keys):
+ """ See if we know the routers specified by keys and return them """
+ routers = []
+ for id in keys:
+ if id in self.routers:
+ routers.append(self.routers[id])
+ else:
+ plog("INFO", "We do not know about a router having ID " + id)
+ try:
+ self.model.delete_node(id)
+ except:
+ plog("ERROR", "Could not delete router with ID " + id)
+ if len(routers) == len(keys):
+ return routers
+ def _set_min_bw(self):
+ """ Find the smallest advertised bw of the routers in each proposal """
+ for p in self.proposals:
+ # Get the routers
+ r_path = self.keys_to_routers(p.path)
+ if r_path:
+ # Find min(bw_i)
+ bw = []
+ for r in r_path:
+ bw.append(r.bw)
+ p.min_bw = min(bw)
+ else:
+ self.proposals.remove(p)
+ plog("DEBUG", "Could not find the routers, removed ..")
+ def update_ranking(self, rtt_weight, bw_weight):
+ """ Compute a ranking for each path proposal using
+ measured RTTs and bandwidth from the descriptors """
+ start = time.time()
+ # High bandwidths get high scores
+ if bw_weight > 0:
+ self._set_min_bw()
+ sort_list(self.proposals, lambda x: x.min_bw)
+ plog("DEBUG", "MIN_BWs of proposals between: " +
+ str(self.proposals[0].min_bw) + " and " +
+ str(self.proposals[len(self.proposals)-1].min_bw))
+ i = 1
+ for p in self.proposals:
+ p.bw_score = i
+ i += 1
+ # Low Latencies get high scores
+ if rtt_weight > 0:
+ sort_list(self.proposals, lambda x: x.rtt)
+ plog("DEBUG", "RTTs of proposals between: " + str(self.proposals[0].rtt) +
+ " and " + str(self.proposals[len(self.proposals)-1].rtt))
+ i = len(self.proposals)
+ for p in self.proposals:
+ p.rtt_score = i
+ i -= 1
+ # Compute weights from both of the values
+ for p in self.proposals:
+ # Calculate ranking index based on both scores
+ p.ranking_index = (rtt_weight*p.rtt_score)+(bw_weight*p.bw_score)
+ sort_list(self.proposals, lambda x: x.ranking_index)
+ plog("DEBUG", "Ranking indices of proposals between: " +
+ str(self.proposals[0].ranking_index) + " and " +
+ str(self.proposals[len(self.proposals)-1].ranking_index))
+ plog("INFO", "Updating ranking indices of proposals took "
+ + str(time.time()-start) + " sec")
+ def weighted_selection(self, weight):
+ """ Select a proposal in a probabilistic way """
+ choice = None
+ # Compute the sum of weights
+ sum = 0
+ for p in self.proposals:
+ sum += weight(p)
+ plog("DEBUG", "Sum of all weights is " + str(sum))
+ # Choose a random number from [0,sum-1]
+ i = random.randint(0, sum-1)
+ plog("DEBUG", "Chosen random number is " + str(i))
+ # Go through the proposals and subtract
+ for p in self.proposals:
+ i -= weight(p)
+ if i < 0:
+ choice = p
+ plog("DEBUG", "Chosen object with ranking " +
+ str(weight(choice)))
+ return choice
+ def print_info(self):
+ """ Create a string holding info and the proposals for printing """
+ out = str(self.graph.info())
+ for p in self.proposals:
+ out += "\nProposal: " + p.to_string()
+ # Only print them out if there are not too much
+ if len(self.proposals) > 50:
+ plog("INFO", "Currently " + str(len(self.proposals)) +
+ " proposals [max_rtt=" + str(self.max_rtt) +
+ "]! Not printing them out ..")
+ else:
+ print(out)
+ # Log all of them to the file if it exists
+ if self.logfile: self.logfile.write(out)
+## PingHandler ################################################################
+class PingHandler(PathSupport.StreamHandler):
+ """ This class extends the general StreamHandler to handle ping-requests """
+ def __init__(self, c, selmgr, num_circs, RouterClass, use_model=False):
+ # Different loggers for recording statistics
+ self.circ_stats = CircuitBuildingStats() # record setup-durations
+ self.stats_logger = FileHandler(DATADIR + "circ-setup-stats")
+ self.setup_logger = None # FileHandler(DATADIR + "circ-setup-durations")
+ self.testing_logger = FileHandler(DATADIR + "circ-data")
+ self.bw_queue = Queue.Queue() # circ_ids to bw-test
+ # Queue containing circs to be tested
+ self.ping_queue = Queue.Queue() # (circ_id, hop)-pairs
+ if use_model:
+ PathSupport.StreamHandler.__init__(self, c, selmgr, 0, RouterClass)
+ self.model = NetworkModel(self.routers)
+ self.num_circuits = num_circs
+ self.check_circuit_pool()
+ else:
+ self.model = None
+ PathSupport.StreamHandler.__init__(self, c, selmgr, num_circs, RouterClass)
+ # Sorted circuit list
+ self.sorted_circs = []
+ # Start the Pinger
+ self.pinger = Pinger(self)
+ self.pinger.setDaemon(True)
+ self.pinger.start()
+ def refresh_sorted_list(self):
+ """ Sort the list for their current RTTs """
+ def notlambda(x):
+ # If not measured yet, return a max value
+ if x.current_rtt == None: return 10
+ else: return x.current_rtt
+ self.sorted_circs = sort_list(self.circuits.values(), notlambda)
+ plog("DEBUG", "Refreshed sorted list of circuits")
+ def print_circuits(self, list=None):
+ """ Print out the circuits + some info, optionally pass a (sorted) list """
+ if list: circs = list
+ else: circs = self.circuits.values()
+ plog("INFO", "We have " + str(len(circs)) + " circuits:")
+ for c in circs:
+ print("+ " + c.to_string())
+ def log_circuit(self, circ):
+ """ To be called when tests are finished for writing
+ any interesting values to a file before closing circ """
+ self.testing_logger.append(str(circ.setup_duration) + "\t" +
+ str(circ.bw/1024) + "\t" + str(circ.stats.mean))
+ line_count = self.testing_logger.get_line_count()
+ if line_count >= num_records:
+ plog("INFO", "Enough records, exiting. (line_count = " +
+ str(line_count) + ")")
+ # TODO: How to kill the main thread from here?
+ sys.exit(1)
+ def start_round(self):
+ """ schedule_immediate from pinger before triggering the initial ping """
+ print("")
+ self.refresh_sorted_list()
+ # TODO: Check if there are any circs, else set 'frequency' to 10?
+ circs = self.circuits.values()
+ for c in circs:
+ # XXX: First test BW, then enqueue for RTTs
+ if EVAL_MODE and num_bw_tests > 0:
+ if self.model:
+ if c.rtt_created and c.bw_tested:
+ self.enqueue_circ(c)
+ elif not c.rtt_created:
+ self.enqueue_circ(c)
+ elif not c.bw_tested:
+ pass
+ else:
+ self.enqueue_circ(c)
+ else:
+ self.enqueue_circ(c)
+ def enqueue_circ(self, c):
+ """ Enqueue a circuit for measuring RTT """
+ if c.built:
+ # Get id of c
+ id = c.circ_id
+ if self.model:
+ # Enqueue every hop
+ path_len = len(c.path)
+ for i in xrange(1, path_len):
+ self.ping_queue.put((id, i))
+ plog("DEBUG", "Enqueued circuit " + str(id) + " hop " + str(i))
+ # And for the whole circuit ...
+ self.ping_queue.put((id, None))
+ plog("DEBUG", "Enqueued circuit " + str(id) + " hop None")
+ def attach_ping(self, stream):
+ """ Attach a ping stream to its circuit """
+ if self.ping_queue.empty():
+ # This round has finished
+ plog("INFO", "Queue is empty --> round has finished, closing stream "
+ + str(stream.strm_id))
+ self.close_stream(stream.strm_id, 5)
+ # Print information
+ self.print_circuits(self.sorted_circs)
+ if self.model:
+ self.model.print_info()
+ # Enqueue again all circs
+ self.start_round()
+ else:
+ # Get the info and extract
+ ping_info = self.ping_queue.get()
+ circ_id = ping_info[0]
+ hop = ping_info[1]
+ # Set circ to stream
+ stream.circ = circ_id
+ try:
+ # Get the circuit
+ if circ_id in self.circuits:
+ circ = self.circuits[circ_id]
+ if circ.built and not circ.closed:
+ stream.hop = hop
+ self.c.attach_stream(stream.strm_id, circ.circ_id, hop)
+ # Don't use pending for pings
+ else:
+ plog("WARN", "Circuit not built or closed")
+ self.attach_ping(stream)
+ else:
+ # Go to next test if circuit is gone or we get an ErrorReply
+ plog("WARN", "Circuit " + str(circ_id) +
+ " does not exist anymore --> passing")
+ self.attach_ping(stream)
+ except TorCtl.ErrorReply, e:
+ plog("WARN", "Error attaching stream " + str(stream.strm_id) +
+ " :" + str(e.args))
+ self.attach_ping(stream)
+ def record_ping(self, s):
+ """ Record a ping from a stream event (DETACHED or CLOSED) """
+ # No timeout, this is a successful ping: measure here
+ hop = self.streams[s.strm_id].hop
+ rtt = s.arrived_at-self.streams[s.strm_id].attached_at
+ plog("INFO", "Measured RTT: " + str(rtt) + " sec")
+ # Save RTT to circuit
+ self.circuits[s.circ_id].part_rtts[hop] = rtt
+ if hop == None:
+ # This is a total circuit measuring
+ self.circuits[s.circ_id].add_rtt(rtt)
+ plog("DEBUG", "Added RTT to history: " +
+ str(self.circuits[s.circ_id].stats.values))
+ # EVAL_MODE: close if num_rtt_tests is reached
+ if self.circuits[s.circ_id].age == num_rtt_tests:
+ plog("DEBUG", "Closing circ " + str(s.circ_id) +
+ ": num_rtt_tests is reached")
+ # Save stats to a file for generating plots etc.
+ if self.model:
+ if self.circuits[s.circ_id].rtt_created:
+ self.log_circuit(self.circuits[s.circ_id])
+ else:
+ self.log_circuit(self.circuits[s.circ_id])
+ # Close the circuit
+ self.close_circuit(s.circ_id)
+ # Resort only if this is for the complete circ
+ self.refresh_sorted_list()
+ if self.model:
+ # Add the links of this circuit to the model
+ self.model.add_circuit(self.circuits[s.circ_id])
+ def handle_bw_test(self, s):
+ """ Handle special streams to measure the bandwidth of circs """
+ output = [s.event_name, str(s.strm_id), s.status, str(s.circ_id),
+ s.target_host, str(s.target_port)]
+ if s.reason: output.append("REASON=" + s.reason)
+ if s.remote_reason: output.append("REMOTE_REASON=" + s.remote_reason)
+ plog("DEBUG", " ".join(output))
+ # NEW
+ if s.status == "NEW":
+ stream = Stream(s.strm_id, s.target_host, s.target_port, s.status)
+ self.streams[s.strm_id] = stream
+ # Set next circ_id to stream
+ stream.circ = self.bw_queue.get()
+ try:
+ if stream.circ in self.circuits:
+ circ = self.circuits[stream.circ]
+ if circ.built and not circ.closed:
+ self.c.attach_stream(stream.strm_id, circ.circ_id)
+ else:
+ plog("WARN", "Circuit not built or closed")
+ self.close_stream(s.strm_id, 5)
+ else:
+ # Go to next test if circuit is gone or we get an ErrorReply
+ plog("WARN", "Circuit " + str(circ_id) +
+ " does not exist anymore --> closing stream")
+ # Close stream, XXX: Reason?
+ self.close_stream(s.strm_id, 5)
+ except TorCtl.ErrorReply, e:
+ plog("WARN", "Error attaching stream " + str(stream.strm_id) +
+ " :" + str(e.args))
+ self.close_stream(s.strm_id, 5)
+ if s.status == "SUCCEEDED":
+ self.streams[s.strm_id].attached_at = s.arrived_at
+ # DONE
+ if s.status == "CLOSED" and s.reason == "DONE":
+ stream = self.streams[s.strm_id]
+ # Since bytes are counted from events, use the timestamp
+ # of the last stream_bw event for computing the lifespan
+ #lifespan = stream.lifespan(s.arrived_at)
+ lifespan = stream.lifespan(stream.bw_timestamp)
+ plog("INFO", "Lifespan is " + str(lifespan))
+ # Compute bandwidth
+ total_bytes = stream.bytes_read + stream.bytes_written
+ plog("DEBUG", "Total number of bytes (read+written) is " + str(total_bytes))
+ bw = total_bytes/float(lifespan)
+ plog("INFO", "Got bandwidth: " + str(bw))
+ self.circuits[s.circ_id].bw = bw
+ self.circuits[s.circ_id].bw_tested = True
+ if s.status == "DETACHED":
+ if s.remote_reason in ["EXITPOLICY","TIMEOUT"]:
+ # Close circuit and stream
+ self.close_stream(s.strm_id, 5)
+ self.close_circuit(s.circ_id)
+ def stream_status_event(self, s):
+ """ Identify different kinds of streams and treat them differently """
+ # Separate pings from others
+ if not (s.target_host == ping_dummy_host and
+ s.target_port == ping_dummy_port):
+ # TODO: Handle echelon here?
+ # - perform DNS request (or use REMAP?)
+ # - determine destination country
+ # - check if there is already a circuit with exit node
+ # in destination country
+ # Catch bandwidth-streams
+ if s.target_host == IP and s.target_port == 8041:
+ return self.handle_bw_test(s)
+ # Try to catch Tor internal streams
+ elif s.source_addr == "(Tor_internal):0":
+ return plog("DEBUG", "New internal stream")
+ # This is NO test: call the underlying method
+ else:
+ return PathSupport.StreamHandler.stream_status_event(self, s)
+ # Construct debugging output
+ output = [s.event_name, str(s.strm_id), s.status, str(s.circ_id),
+ s.target_host+':'+str(s.target_port)]
+ if s.reason: output.append("REASON=" + s.reason)
+ if s.remote_reason: output.append("REMOTE_REASON=" + s.remote_reason)
+ plog("DEBUG", " ".join(output))
+ if s.status == "NEW":
+ # Set up the stream object
+ stream = Stream(s.strm_id, s.target_host, s.target_port, s.status)
+ self.streams[s.strm_id] = stream
+ self.attach_ping(stream)
+ elif s.status == "SENTCONNECT":
+ # Measure here, means set attached_at on the stream
+ self.streams[s.strm_id].attached_at = s.arrived_at
+ elif s.status == "DETACHED":
+ if (s.reason == "TIMEOUT"):
+ self.circuits[s.circ_id].timeout_counter += 1
+ plog("DEBUG", str(self.circuits[s.circ_id].timeout_counter) +
+ " timeout(s) on circuit " + str(s.circ_id))
+ if timeout_limit > 0:
+ if self.circuits[s.circ_id].timeout_counter >= timeout_limit and\
+ not self.circuits[s.circ_id].closed:
+ # Close the circuit
+ plog("DEBUG", "Reached limit on timeouts --> closing circuit "
+ + str(s.circ_id))
+ self.close_circuit(s.circ_id)
+ # Set RTT for this circ to None
+ self.circuits[s.circ_id].current_rtt = None
+ else:
+ # No timeout: Record the result
+ self.record_ping(s)
+ # Close the stream
+ self.close_stream(s.strm_id, 5)
+ # CLOSED + END is also a ping, some routers send it when
+ # measuring RTT to a single hop, better measure on FAILED?
+ elif s.status == "CLOSED":
+ if s.reason == "END":
+ # Only record
+ self.record_ping(s)
+ def circ_status_event(self, c):
+ """ Override this to record statistics on circuit-setups and -failures """
+ if c.circ_id not in self.circuits:
+ return PathSupport.CircuitHandler.circ_status_event(self, c)
+ # Catch FAILED/CLOSED now: circ will be removed
+ elif c.status == "FAILED" or c.status == "CLOSED":
+ circ = self.circuits[c.circ_id]
+ # Setup a message for logging
+ message = ["FAILED"]
+ if c.reason: message.append("REASON=" + c.reason)
+ if c.remote_reason: message.append("REMOTE_REASON=" + c.remote_reason)
+ if not circ.built:
+ if self.setup_logger:
+ self.setup_logger.append(" ".join(message) + ": " +
+ str(circ.extend_times))
+ # Increase counter and write circ_stats to file
+ if self.model:
+ if circ.rtt_created:
+ self.circ_stats.failures_buildup += 1
+ self.stats_logger.write(self.circ_stats.to_string())
+ else:
+ self.circ_stats.failures_buildup += 1
+ self.stats_logger.write(self.circ_stats.to_string())
+ elif not c.reason == "REQUESTED":
+ # Increase *other* counter and write stats to file
+ if self.model:
+ if circ.rtt_created:
+ self.circ_stats.failures_established += 1
+ self.stats_logger.write(self.circ_stats.to_string())
+ else:
+ self.circ_stats.failures_established += 1
+ self.stats_logger.write(self.circ_stats.to_string())
+ # Call the underlying method in any case
+ PathSupport.CircuitHandler.circ_status_event(self, c)
+ if c.status == "FAILED" or c.status == "CLOSED":
+ self.refresh_sorted_list()
+ # Log something on BUILT
+ if c.status == "BUILT":
+ circ = self.circuits[c.circ_id]
+ if self.setup_logger:
+ self.setup_logger.append(str(circ.extend_times))
+ # Add duration to circ_stats and write file
+ if self.model:
+ if circ.rtt_created:
+ self.circ_stats.add_value(circ.setup_duration)
+ self.stats_logger.write(self.circ_stats.to_string())
+ else:
+ self.circ_stats.add_value(circ.setup_duration)
+ self.stats_logger.write(self.circ_stats.to_string())
+ self.refresh_sorted_list()
+ # XXX: Initialize a bw-test here
+ if EVAL_MODE and num_bw_tests > 0:
+ if self.model:
+ # Only test bandwidth on rtt_created circs
+ if circ.rtt_created:
+ self.start_bw_test(c.circ_id)
+ else: self.start_bw_test(c.circ_id)
+ def start_bw_test(self, circ_id):
+ """ Perform a bandwidth-test on circuit with given circ_id """
+ plog("INFO", "Starting BW-test on circuit " + str(circ_id))
+ # Enqueue the circuit
+ self.bw_queue.put(circ_id)
+ # Start the stream-thread (512 KB = 524288)
+ bw_tester = BwTester(1000000)
+ bw_tester.setDaemon(True)
+ bw_tester.start()
+ def stream_bw_event(self, s):
+ """ Record the timestamp of the last stream_bw event to any stream """
+ if not s.strm_id in self.streams:
+ plog("WARN", "BW event for unknown stream id: "+str(s.strm_id))
+ else:
+ self.streams[s.strm_id].bw_timestamp = s.arrived_at
+ PathSupport.PathBuilder.stream_bw_event(self, s)
+ def build_circuit(self, host, port):
+ """ Override from CircuitHandler to support circuit-creation from model """
+ if self.model:
+ circ = None
+ # This is to ensure expansion of the model:
+ # Check ratio if we would add circ from model
+ trad = self.get_trad_circs()
+ ratio = trad/(len(self.circuits.values())+1.)
+ plog("DEBUG","Expected Ratio: " + str(ratio) +
+ " >= " + str(min_ratio) + " ?")
+ if ratio >= min_ratio:
+ if self.create_circ_from_model(host, port):
+ return
+ plog("INFO", "Not enough proposals [min_proposals=" + str(min_proposals) + "]")
+ # Create a circuit using the backup-method
+ plog("INFO", "Creating circuit with the backup-method")
+ PathSupport.CircuitHandler.build_circuit(self, host, port)
+ def create_circ_from_model(self, host, port):
+ # Set the target
+ self.model.set_target(host, port, max_rtt)
+ if not self.model.up_to_date:
+ self.model.generate_proposals()
+ plog("DEBUG", "Current number of proposals is "+
+ str(len(self.model.proposals)))
+ if len(self.model.proposals) >= min_proposals:
+ # TODO: Set weights for single scores here!
+ self.model.update_ranking(1, 0)
+ # As long as there are enough
+ while len(self.model.proposals) >= min_proposals:
+ # Uniform:
+ # choice = random.choice(self.model.proposals)
+ # Fastest First:
+ # proposals = sort_list(self.model.proposals, lambda x: x.rtt)
+ # choice = proposals[0]
+ # Probabilistic selection:
+ choice = self.model.weighted_selection(lambda x: x.ranking_index)
+ # Convert ids to routers
+ r_path = self.model.keys_to_routers(choice.path)
+ if r_path and self.path_is_ok(r_path):
+ plog("INFO", "Chosen proposal: " + choice.to_string())
+ try:
+ circ = self.c.build_circuit_from_path(r_path)
+ circ.rtt_created = True
+ self.circuits[circ.circ_id] = circ
+ plog("INFO", "Created circ from model: " + str(circ.circ_id))
+ return True
+ except TorCtl.ErrorReply, e:
+ plog("NOTICE", "Error building circuit: " + str(e.args))
+ else:
+ self.model.proposals.remove(choice)
+ # Helper functions ==========================================================
+ def get_trad_circs(self):
+ """ Count the circuits with rtt_created == False """
+ trad_circs = 0
+ for c in self.circuits.values():
+ if c.rtt_created == False:
+ trad_circs += 1
+ return trad_circs
+ def path_is_ok(self, path):
+ """ Check if there is currently a circuit with the given path (Routers) """
+ if path:
+ for c in self.circuits.values():
+ if c.path == path:
+ plog("ERROR", "Proposed circuit already exists")
+ return False
+ return True
+## Pinger #####################################################################
+class Pinger(threading.Thread):
+ """ Separate thread that triggers the Socks4-connections for pings """
+ def __init__(self, ping_handler):
+ self.handler = ping_handler # the PingHandler
+ threading.Thread.__init__(self)
+ def run(self):
+ """ The run()-method """
+ time.sleep(initial_interval)
+ self.handler.schedule_immediate(lambda x: x.start_round())
+ while self.isAlive():
+ self.ping()
+ time.sleep(frequency)
+ # No "try .. except .. finally .." in Python < 2.5 !
+ def ping(self):
+ """ Create a connection to dummy_host/_port using Socks4 """
+ s = None
+ try:
+ try:
+ s = socks.socksocket()
+ s.setproxy(socks.PROXY_TYPE_SOCKS4, socks_host, socks_port)
+ s.connect((ping_dummy_host, ping_dummy_port))
+ except socks.Socks4Error, e:
+ # Don't do nothing, this will actually happen
+ # print("Got Exception: " + str(e))
+ pass
+ finally:
+ # Close the socket if open
+ if s: s.close()
+## BW-Tester ##################################################################
+class BwTester(threading.Thread):
+ """ Thread that connects to our own IP and downloads a stream """
+ def __init__(self, bytes):
+ self.bytes = bytes # Amount of bytes to request
+ threading.Thread.__init__(self) # Call the thread-constructor
+ def run(self):
+ """ The run()-method """
+ self.run_test()
+ # No "try .. except .. finally .." in Python < 2.5 !
+ def run_test(self):
+ """ Create a connection to stream-server.pl using SOCKS4 """
+ s = None
+ try:
+ try:
+ s = socks.socksocket()
+ s.setproxy(socks.PROXY_TYPE_SOCKS4, socks_host, socks_port)
+ s.connect((IP, 8041))
+ plog("INFO", "Connected to " + IP)
+ # Request bytes
+ s.send(str(self.bytes) + "\n")
+ plog("INFO", "Sent request for " + str(self.bytes) + " bytes")
+ byte_counter = 0
+ while 1:
+ buffer = s.recv(4096)
+ if buffer:
+ #plog("INFO", "Received " + str(len(buffer)) + " bytes")
+ byte_counter += len(buffer)
+ if byte_counter >= self.bytes:
+ plog("INFO", "Received " + str(byte_counter) + " bytes in total")
+ s.send("close\n")
+ break
+ except socks.Socks4Error, e:
+ print("Got Exception: " + str(e))
+ finally:
+ # Close the socket if open
+ if s: s.close()
+## End of Classes #############################################################
+def connect():
+ """ Return a connection to Tor's control port """
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((config.get(GENERAL, "control_host"),
+ config.getint(GENERAL, "control_port")))
+ conn = Connection(sock)
+ conn.authenticate()
+ #conn.debug(file("control.log", "w"))
+ except socket.error, e:
+ plog("ERROR", "Could not connect to Tor process .. running?")
+ sys.exit(-1)
+ return conn
+def setup_location(conn):
+ """ Setup a router object representing this proxy """
+ #global path_config
+ global IP
+ try:
+ # Try to determine our IP
+ info = conn.get_info("address")
+ IP = info["address"]
+ # Get the country_code
+ country_code = GeoIPSupport.get_country(IP)
+ plog("INFO", "Our IP address is " + str(IP) + " [" + str(country_code) + "]")
+ except:
+ plog("ERROR", "Could not get our IP and country")
+ return False
+ # Here we could set the current entry-country
+ # path_config.entry_country = country_code
+ return True
+def configure(conn):
+ """ Set events and options """
+ conn.set_events([TorCtl.EVENT_TYPE.STREAM,
+ # Set options: We attach streams now & build circuits
+ conn.set_option("__DisablePredictedCircuits", "1")
+ conn.set_option("__LeaveStreamsUnattached", "1")
+def startup(argv):
+ # Connect to Tor process
+ conn = connect()
+ # Setup our location
+ setup_location(conn)
+ # Configure myself
+ configure(conn)
+ # Get the size of the circuit-pool from config
+ num_circs = config.getint(CIRC_MANAGEMENT, "idle_circuits")
+ # Set an EventHandler to the connection
+ if ping_circs:
+ if network_model:
+ handler = PingHandler(conn, __selmgr, num_circs,
+ GeoIPSupport.GeoIPRouter, True)
+ else:
+ handler = PingHandler(conn, __selmgr, num_circs,
+ GeoIPSupport.GeoIPRouter)
+ else:
+ # No pings, only a StreamHandler
+ handler = PathSupport.StreamHandler(conn, __selmgr, num_circs,
+ GeoIPSupport.GeoIPRouter)
+ # Go to sleep to be able to get killed from the commandline
+ # TODO: Do this only if *not* in testing_mode?
+ try:
+ while True:
+ time.sleep(60)
+ except KeyboardInterrupt:
+ # XXX: Schedule this?
+ if ping_circs:
+ if network_model:
+ handler.model.save_graph()
+ cleanup(conn)
+ sys.exit(1)
+def cleanup(conn):
+ """ To be called on exit """
+ # TODO: Stop other threads and close circuits
+ plog("INFO", "Cleaning up...")
+ conn.set_option("__LeaveStreamsUnattached", "0")
+ conn.set_option("__DisablePredictedCircuits", "0")
+ conn.close()
+def simulate(n):
+ """ Simulate circuit creations """
+ plog("INFO", "Running a simulation ..")
+ # Connect to Tor process
+ conn = connect()
+ setup_location(conn)
+ # The generated paths
+ path_list = []
+ # Instantiate a PathBuilder
+ path_builder = PathSupport.PathBuilder(conn, __selmgr, GeoIPSupport.GeoIPRouter)
+ plog("INFO", "Generating "+str(n)+" paths")
+ if network_model:
+ model = NetworkModel(path_builder.routers)
+ model.set_target("", 80, max_rtt)
+ model.generate_proposals()
+ # TODO: Set weights for single scores (RTT, advertised BW) here!
+ model.update_ranking(1, 0)
+ while n > 0:
+ # Probabilistic selection
+ choice = model.weighted_selection(lambda x: x.ranking_index)
+ # Convert ids to routers
+ path = model.keys_to_routers(choice.path)
+ path_list.append(path)
+ n -= 1
+ else:
+ while n > 0:
+ path = path_builder.build_path()
+ path_list.append(path)
+ n -= 1
+ if n%1000 == 0:
+ plog("INFO", str(time.localtime())+": Still "+str(n)+" paths to create --")
+ # Evaluate the generated paths and exit
+ evaluate(path_list)
+ cleanup(conn)
+ sys.exit(1)
+def evaluate(path_list):
+ """ Currently evaluates lists of 3-hop paths only """
+ import sets
+ entries = sets.Set()
+ middles = sets.Set()
+ exits = sets.Set()
+ ee_combinations = {}
+ # Count occurrences of routers on single positions and
+ # different combinations of [entry,exit]
+ for p in path_list:
+ entries.add(p[0])
+ middles.add(p[1])
+ exits.add(p[2])
+ if not ee_combinations.has_key((p[0], p[2])):
+ ee_combinations[(p[0], p[2])] = 1
+ else:
+ ee_combinations[(p[0], p[2])] += 1
+ # General logging
+ logfile = FileHandler(DATADIR+"simulation")
+ output = [str(len(entries)), str(len(middles)), str(len(exits))]
+ logfile.append(str(len(path_list))+" paths: "+" - ".join(output))
+ # Verbose about numbers of chosen nodes
+ plog("INFO", "Different nodes [entry/middle/exit]: "+"/".join(output))
+ # And combinations of entries and exits
+ plog("INFO", "Different [entry,exit]-combinations: " +
+ str(len(ee_combinations)))
+ # Get list of the counters and sort it
+ counters = ee_combinations.values()
+ sort_list(counters, lambda x: x)
+ # Log probabilities
+ probs = []
+ output = ""
+ for i in counters:
+ if i > 0:
+ # Calculate probability from counter i
+ prob = float(i)/len(path_list)
+ # Add it to the list
+ probs.append(prob)
+ # And add a new line to the output
+ line = str(i)+"\t"+str(prob)+"\n"
+ output += line
+ prob_logger = FileHandler(DATADIR+"ee_probs")
+ prob_logger.write(output)
+ # Determine entropies
+ m_entropy = get_max_entropy(len(path_list))
+ entropy = get_entropy(probs)
+ d = entropy/m_entropy
+ plog("INFO", "Maximum entropy: "+str(m_entropy))
+ plog("INFO", "Entropy of this sample: "+str(entropy))
+ plog("INFO", "Degree of anonymity: "+str(d))
+ # Calculate percentiles from the sorted list
+ percentile_logger = FileHandler(DATADIR+"percentiles")
+ percentile_logger.write("")
+ percents = []
+ i = counters.pop(0)
+ n = 1
+ while len(counters)>0:
+ new = counters.pop(0)
+ if new == i:
+ n += 1
+ else:
+ percentile = (float(n*i)/len(path_list))*100
+ percents.append(percentile)
+ prob = float(i)/len(path_list)
+ plog("DEBUG", str(percentile)+
+ " percent of the paths having ee_prob = "+str(prob))
+ percentile_logger.append(str(percentile)+"\t"+str(prob))
+ i = new
+ n = 1
+ percentile = (float(n*i)/len(path_list))*100
+ percents.append(percentile)
+ prob = float(i)/len(path_list)
+ plog("DEBUG", str(percentile)+
+ " percent of the paths having ee_prob = "+str(prob))
+ percentile_logger.append(str(percentile)+"\t"+str(prob))
+ # Checking percentiles
+ sum = reduce(lambda x, y: x+y, percents, 0.0)
+ plog("DEBUG", "(Sum of percentiles is "+str(sum)+")")
+def get_entropy(probs):
+ """ Return the entropy of a given list of probabilities """
+ # Check if the sum is 1
+ sum = reduce(lambda x, y: x+y, probs, 0.0)
+ plog("DEBUG", "(Sum of probs is "+str(sum)+")")
+ # Compute the entropy
+ entropy = -reduce(lambda x, y: x+(y*math.log(y,2)), probs, 0.0)
+ return entropy
+def get_max_entropy(n):
+ """ Calculate the maximum entropy in a sample of size n """
+ sum = 0.0
+ p = 1/float(n)
+ for i in range(1,n+1):
+ sum += p*math.log(p,2)
+ max_entropy = -sum
+ return max_entropy
+if __name__ == '__main__':
+ plog("INFO", "Starting OP-Addon v" + VERSION)
+ if len(sys.argv) == 3:
+ simulate(10)
+ else:
+ simulate(int(sys.argv[3]))
+ else:
+ startup(sys.argv)
Copied: torflow/trunk/CircuitAnalysis/OPAddon/pathrc.example (from rev 17873, torflow/trunk/pathrc.example)
--- torflow/trunk/CircuitAnalysis/OPAddon/pathrc.example (rev 0)
+++ torflow/trunk/CircuitAnalysis/OPAddon/pathrc.example 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,123 @@
+# Set the host and port where Tor is
+# listening for control-connections
+control_host =
+control_port = 9051
+# Size of the maintained pool of circuits
+idle_circuits = 3
+# TODO: Configure ports to use
+# Number of hops to be used in paths and
+# a minimum-value for advertised bandwidth
+pathlen = 3
+min_bw = 1024
+# Percentiles
+percent_fast = 100
+percent_skip = 0
+use_all_exits = yes
+# UniformGenerator with optionally ordered exits,
+# 'uniform = no' --> bandwidth-weighted selection
+uniform = no
+order_exits = no
+# Make use of guard-nodes (yes|no) or a specific
+# exit node (nickname or IDHex) for every path
+use_guards = yes
+#use_exit = xyz
+# Use GeoIP
+# yes|no
+use_geoip = no
+# yes|no for unique|equal country codes
+# ! comment out to don't care
+unique_countries = yes
+# Maximum number of continent crossings: 0-n
+# ! comment out to enforce distinct continents
+# ! set >= pathlen to not care about
+max_continent_crossings = 2
+# Maximum number of ocean crossings: 0-n
+# ! comment out to don't care
+max_ocean_crossings = 1
+# If echelon is set, OP-Addon will try to find an
+# exit in the destination country of the current
+# request (exit_country may be used as backup)
+# yes|no
+# TODO: echelon = yes
+# Set country codes for single positions
+#entry_country = DE
+#middle_country = RU
+#exit_country = US
+# TODO: excludes = [".."]
+# Frequently ping the latencies of complete circuits
+# yes|no
+ping_circs = yes
+# Tor socks-properties
+socks_host =
+socks_port = 9050
+# Host- and port-dummies to be used
+# for ping-connections
+ping_dummy_host =
+ping_dummy_port = 100
+# Time interval to wait before triggering
+# pings and frequency of pings in seconds (float)
+initial_interval = 10
+frequency = 5
+# Close a circuit after n timeouts on measurings
+# Set to 0 to never close circs (int)
+timeout_limit = 1
+# Set to 'yes' to measure latencies of single links
+# and enable circuit creation from the model
+# yes|no
+network_model = no
+# Min ratio of circs created with the backup-method,
+# controls growing of the model (float in [0,1])
+# 0: no growing
+# 1: growing only
+min_ratio = 0.5
+# RTT-threshhold in seconds when creating circs (float):
+# 0: no threshhold, choose from all proposals
+max_rtt = 0
+# Minimum number of proposals to choose from (int)
+min_proposals = 100
+# Evaluation mode: close every circuit after measuring performance
+# yes|no
+evaluate = no
+# Number of latency-tests per circuit (int: 0-n)
+num_rtt_tests = 3
+# Number of bandwidth-tests per circuit (int:0 or 1)
+# Requires stream-server.pl listening on the same host
+num_bw_tests = 0
+# Total amount of circuits to test (int)
+num_records = 300
Deleted: torflow/trunk/CircuitAnalysis/buildtimes.py
--- torflow/trunk/CircuitAnalysis/buildtimes.py 2009-01-05 16:04:10 UTC (rev 17913)
+++ torflow/trunk/CircuitAnalysis/buildtimes.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -1,214 +0,0 @@
-#!/usr/bin/env python
-# uses metatroller to collect circuit build times for 5% slices of guard nodes
-# [OUTPUT] one directory, with three files: StatsHandler aggregate stats file, file with all circuit events (for detailed reference), file with just buildtimes
-import socket,sys,time,getopt,os
-from TorCtl.TorUtil import meta_port,meta_host,control_port,control_host
-from TorCtl.StatsSupport import StatsHandler
-from TorCtl import PathSupport, TorCtl
-__selmgr = PathSupport.SelectionManager(
- pathlen=3,
- order_exits=True,
- percent_fast=80,
- percent_skip=0,
- min_bw=1024,
- use_all_exits=True,
- uniform=True,
- use_exit=None,
- use_guards=True,
- restrict_guards=True)
-class Connection(PathSupport.Connection):
- """ thread quits when required number of circuits found, otherwise identical"""
- def __init__(self,s):
- PathSupport.Connection.__init__(self,s)
- def _loop(self):
- while 1:
- try:
- isEvent, reply = self._read_reply()
- except:
- self._err(sys.exc_info())
- return
- if isEvent:
- if self._handler is not None:
- self._eventQueue.put((time.time(), reply))
- else:
- cb = self._queue.get() # atomic..
- cb(reply)
- if self._handler is not None:
- if self._handler.circ_failed + self._handler.circ_built >= self._handler.nstats:
- print 'Finished gathering',self._handler.circ_failed + self._handler.circ_built,'circuits'
- print self._handler.circ_failed,'failed',self._handler.circ_built,'built'
- return
-class StatsGatherer(StatsHandler):
- def __init__(self,c, selmgr,basefile_name,nstats):
- StatsHandler.__init__(self,c, selmgr)
- self.detailfile = open(basefile_name + '.detail','w')
- self.buildtimesfile = open(basefile_name + '.buildtimes','w')
- self.circ_built = 0
- self.nstats = nstats
- # sometimes relevant CircEvents occur before the circ_id is
- # added to self.circuits, which means they get discarded
- # we track them in self.othercircs: a dictionary of list of events
- self.othercircs = {}
- def circ_event_str(self,now,circ_event):
- """ returns an string summarizing the circuit event"""
- output = [circ_event.event_name, str(circ_event.circ_id),
- circ_event.status]
- if circ_event.path:
- output.append(",".join(circ_event.path))
- if circ_event.reason:
- output.append("REASON=" + circ_event.reason)
- if circ_event.remote_reason:
- output.append("REMOTE_REASON=" + circ_event.remote_reason)
- output = [now]+ output
- outstr = ' '.join(output) + '\n'
- return outstr
- def add_missed_events(self,circ_id):
- """ if there are events for a circuit that were missed, add them"""
- if circ_id in self.othercircs:
- for e_str in self.othercircs[circ_id]:
- self.detailfile.write(e_str)
- self.detailfile.flush()
- # now in self.circuits, so can delete it from self.othercircs
- del self.othercircs[circ_id]
- def circ_status_event(self, circ_event):
- """ handles circuit status event """
- now = time.time()
- now = '%3.10f' % now
- if circ_event.circ_id in self.circuits.keys():
- self.add_missed_events(circ_event.circ_id)
- if circ_event.status == 'EXTENDED':
- extend_time = circ_event.arrived_at-self.circuits[circ_event.circ_id].last_extended_at
- self.circuits[circ_event.circ_id].extend_times.append(extend_time)
- self.circuits[circ_event.circ_id].last_extended_at = circ_event.arrived_at
- if circ_event.status == 'BUILT':
- circ = self.circuits[circ_event.circ_id]
- buildtime = reduce(lambda x,y:x+y,circ.extend_times,0.0)
- self.buildtimesfile.write(str(circ.circ_id) + '\t' + str(buildtime) + '\n')
- self.buildtimesfile.flush()
- outstr = self.circ_event_str(now,circ_event)
- self.detailfile.write(outstr)
- self.detailfile.flush()
- # check to see if done gathering data
- if circ_event.status == 'BUILT': self.circ_built += 1
- else:
- #eventstr =
- #if circ_event.circ_id in self.othercircs.keys():
- if circ_event.circ_id not in self.othercircs.keys():
- self.othercircs[circ_event.circ_id] = []
- self.othercircs[circ_event.circ_id] += [self.circ_event_str(now,circ_event)]
- StatsHandler.circ_status_event(self,circ_event)
-def getdata(filename,ncircuits):
- """ starts stat gathering thread """
- s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
- s.connect((control_host,control_port))
- c = Connection(s)
- c.authenticate() # also launches thread...
- h = StatsGatherer(c,__selmgr,filename,ncircuits)
- c.set_event_handler(h)
- c.set_events([TorCtl.EVENT_TYPE.STREAM,
- return c
-def setargs():
- ncircuits = ""
- dirname = ""
- filename = ""
- if len(sys.argv[1:]) < 3:
- usage()
- sys.exit(2)
- try:
- opts,args = getopt.getopt(sys.argv[1:],"p:n:d:")
- except getopt.GetoptError,err:
- print str(err)
- usage()
- ncircuits=None
- percentile=None
- dirname=""
- for o,a in opts:
- if o == '-n':
- if a.isdigit(): ncircuits = int(a)
- else: usage()
- elif o == '-d': dirname = a #directory where output files go
- elif o == '-p':
- if a.isdigit(): percentile = int(a)
- else: usage()
- else:
- assert False, "Bad option"
- return ncircuits,percentile,dirname
-def usage():
- print 'usage: statscontroller.py [-p <#percentile>] -n <# circuits> -d <output dir name>'
- sys.exit(1)
-def guardslice(p,ncircuits,dirname):
- print 'Making new directory:',dirname
- if not os.path.isdir(dirname):
- os.mkdir(dirname)
- else:
- print 'Directory',dirname,'exists, not making a new one.'
- print 'Guard percentiles:',p,'to',p+5
- print '#Circuits',ncircuits
- basefile_name = dirname + '/' + str(p) + '-' + str(p+5) + '.' + str(ncircuits)
- aggfile_name = basefile_name + '.agg'
- __selmgr.percent_fast = p+5
- __selmgr.percent_skip = p
- c = getdata(basefile_name,ncircuits)
- for i in xrange(0,ncircuits):
- print 'Building circuit',i
- try:
- # XXX: hrmm.. race conditions on the path_selectior members
- # for the event handler thread?
- # Probably only if streams end up coming in during this test..
- circ = c.build_circuit(__selmgr.pathlen,__selmgr.path_selector)
- c._handler.circuits[circ.circ_id] = circ
- except TorCtl.ErrorReply,e:
- plog("NOTICE","Error building circuit: " + str(e.args))
- while True:
- time.sleep(1)
- if c._handler.circ_built + c._handler.circ_failed >= ncircuits:
- print 'Done gathering stats for slice',p,'to',p+5,'on',ncircuits
- print c._handler.circ_built,'built',c._handler.circ_failed,'failed'
- break
- c._handler.write_stats(aggfile_name)
-def main():
- ncircuits,p,dirname = setargs()
- if p is None:
- # do all
- for p in xrange(0,100,5):
- guardslice(p,ncircuits,dirname)
- else:
- guardslice(p,ncircuits,dirname)
-if __name__ == '__main__':
- main()
Deleted: torflow/trunk/CircuitAnalysis/numpy_pareto.py
--- torflow/trunk/CircuitAnalysis/numpy_pareto.py 2009-01-05 16:04:10 UTC (rev 17913)
+++ torflow/trunk/CircuitAnalysis/numpy_pareto.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -1,59 +0,0 @@
-import numpy
-import pylab
-import matplotlib
-def loadbuildtimes():
- f = open('40k_r1/45-50.40000.buildtimes')
- vals = []
- for line in f:
- line = line.split('\t')
- vals += [float(line[1].strip())*1000]
- vals.sort()
- vals.reverse()
- return vals
-def pareto(x,k,Xm):
- return k*(Xm**k)/(x**(k+1))
-#get buildtime data (in ms)
-Z = loadbuildtimes()
-# plot histogram.
-# args: values, number of bins, normalize y/n, width of bars
-pylab.hist(Z,len(Z) / 100.0, normed=True, width=5)
-#pareto parameters (taken from output of ./shufflebt.py buildtimes)
-#Resolution of histogram: 100 ms
-#Mean: 5746.8020777, mode: 1600
-#ParK: 0.918058347945
-#ModeN: 32775 vs integrated: 32394.9483089
-#successful runs: 41712
-k = 0.687880881456
-Xm = 1800
-n = 28921
-# args to a range: x start, x end
-X = pylab.arange(Xm, max(Z), 1) # max(Z), 0.1) # x values from 1 to max(Z) in increments of 0.1 (can adjust this to look at different parts of the graph)
-Y = map(lambda x: pareto(x,k,Xm), X) #pareto(x) (units: #measurements with value x)
-# verify sanity by integrating scaled distribution:
-modeNint = numpy.trapz(map(lambda x: n*pareto(x, k, Xm),
- xrange(Xm,200000)))
-print modeNint
-print n*pareto(Xm, k, Xm)
-#draw pareto curve
-# X values plotted against Y values, will appear as blue circles b:blue o:circle
-#save figure
Deleted: torflow/trunk/CircuitAnalysis/shufflebt.py
--- torflow/trunk/CircuitAnalysis/shufflebt.py 2009-01-05 16:04:10 UTC (rev 17913)
+++ torflow/trunk/CircuitAnalysis/shufflebt.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -1,337 +0,0 @@
-#!/usr/bin/env python
-# shufflebt.py
-# (c) Fallon Chen 2008
-# Shuffles a list of build times and produces a pdf of n of those buildtimes,
-# which are put into res (defaults to 100)ms blocks.
-# Requires gnuplot 4.2 and a version coreutils that provides sort -R
-# "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] <list of filenames>"
-# if outdir is not specified, the script will write files to the current directory
-# if a directory is given instead of a list of filenames, all files postfixed with '.buildtimes' will be processed
-import getopt,sys,os
-import popen2
-import math,copy
-from scipy.integrate import *
-from numpy import trapz
-import numpy
-import pylab
-import matplotlib
-class Stats:
- def __init__(self,file):
- self.f = open(file)
- self.values = []
- for line in self.f:
- line = line.split('\t')
- self.values += [float(line[1]) * 1000]
- self.f.close()
- self.buckets = {}
- def mean(self):
- # Borrowed from TorUtil
- if len(self.values) > 0:
- sum = reduce(lambda x,y: x+y,self.values,0.0)
- return sum/len(self.values)
- else:
- return 0.0
- def stddev(self):
- # Borrowed from TorUtil
- if len(self.values) > 1:
- mean = self.mean()
- sum = reduce(lambda x,y: x + ((y-mean)**2.0),self.values,0.0)
- s = math.sqrt(sum/(len(self.values)-1))
- return s
- else:
- return 0.0
- def median(self):
- if len(self.values) > 0:
- values = copy.copy(self.values)
- values.sort()
- return values[(len(values) - 1)/2]
- else:
- return 0.0
- def mode(self): # Requires makehistogram runs first
- counts = {}
- greatest_val = 0
- greatest_idx = 0
- for v in self.buckets.keys():
- if self.buckets[v] > greatest_val:
- greatest_idx = v
- greatest_val = self.buckets[v]
- return greatest_idx
- def pyhist(self,res,histname):
- bins = len(self.values) / res
- print 'bins:',bins
- x = matplotlib.numerix.arange(1,7000, 0.01)
- S = pypareto(x,0.918058347945, 1600.0, 32775.0)
- #pylab.hist(self.values,bins=bins,normed=False, width=1)
- #(n,bins) = numpy.histogram(self.values,bins=bins,normed=False)
- #pylab.plot(bins,n )
- pylab.plot(x,S, 'bo')
- #pylab.show()
- pylab.savefig(histname + '.png')
- # XXX: This doesn't seem to work for small #s of circuits
- def makehistogram(self,res,histname):
- #res = res /1000.0 # convert ms to s
- values = copy.copy(self.values)
- values.sort()
- count = 0
- i = 1
- self.buckets = {}
- for v in values:
- if v < res * i: count += 1
- else:
- count += 1
- self.buckets[int(res * i)] = count
- #self.buckets[int(res * i * 10)] = count
- i += 1
- count = 0
- f = open(histname,'w')
- f.write('#build time <\t#circuits\n')
- sortedkeys = self.buckets.keys()
- sortedkeys.sort()
- for b in sortedkeys:
- towrite = str(b) + '\t' + str(self.buckets[b]) + '\n'
- f.write(towrite)
- f.close()
- def paretoK(self, Xm):
- n = 0
- log_sum = 0
- X = min(self.values)
- for x in self.values:
- if x < Xm: continue
- n += 1
- log_sum += math.log(x)
- return n/(log_sum - n*math.log(Xm))
- # Calculate the mean beyond a mode value
- def modeMean(self, Xm):
- n = 0
- tot = 0
- for x in self.values:
- if x < Xm: continue
- n += 1
- tot += x
- return tot/n
- def modeN(self, Xm):
- n = 0
- for x in self.values:
- if x < Xm: continue
- n += 1
- return n
- def maxlikelihood(self,k):
- # theta estimator for gamma PDF
- # maxlikelihood estimator
- # theta = sum(values) / N*k
- return 10*sum(self.values)/(k * len(self.values))
- def bayesian(self,k):
- # bayesian estimator for gamma PDF
- # y = sum(values)
- # theta = y/(Nk - 1) +/- y^2/((Nk-1)^2(Nk -2))
- y = sum(self.values) * 10
- N = len(self.values)
- mean = y/(N*k - 1)
- sdev = (y*y)/((N*k - 1)* (N*k - 1) * (N*k - 2))
- plus = mean + sdev
- minus = mean - sdev
- return plus,minus
-## Functions that return a gnuplot function string for a given distribution
-def gamma(k,theta, N,fname):
- # gnuplot string for gamma PDF
- # g(x,k,B) = (x**(k - 1) * B**k * exp(-B*x))/gamma(k)
- B = 1.0/theta
- ps = fname + '(x) = '+str(N)+'*((x**' + str(k-1) + ')*(' +str(B**k)+ ')*(exp(-' + str(B) +'*x)))' +'/gamma('+str(k)+')\n'
- return ps
-def pareto(k,Xm,N,fname):
- # gnuplot string for shifted, normalized exponential PDF
- # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
- ps = fname+'(x)=(x<='+str(Xm)+') ? 0 : (('+str((N*k)*(Xm**k))+')/((x)**('+str(k+1)+')))\n'
- #ps = fname+'(x)='+str(N*k*(Xm**k))+'/x**('+str(k+1)+')\n'
- return ps
-def pypareto(x, k,Xm):
- # gnuplot string for shifted, normalized exponential PDF
- # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
- if x<Xm: return 0
- else: return ((((k)*(Xm**k)))/((x)**((k+1))))
-def exp(mean,shift,N,fname):
- # gnuplot string for normalized exponential PDF
- # g(x,k,B) = N * l*exp(-l*(x-shift))
- l = 1.0/mean
- ps = fname+'(x)=(x<'+str(shift)+')?0:('+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+'))))\n'
- return ps
-def shiftedExp(mean,shift,N,fname):
- # gnuplot string for shifted, normalized exponential PDF
- # g(x,k,B) = N * l*exp(-l*(x-shift))/(1+(1-exp(-l*shift)))
- l = 1.0/mean
- ps = fname+'(x)='+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+')))/(1+(1-exp(-'+str(l*shift)+')))\n'
- return ps
-def poisson(u,N,fname):
- ps = fname + "(x) = " + str(N) + "*(" + str(u) + "**x)*exp(-"+str(u)+")/gamma(x + 1)\n"
- return ps
-def normal(u,d,N,fname):
- ps = fname + "(x)="+str(int(N)/d)+"*(exp(-((x-"+str(u)+ ")**2)/"+str(2*d*d)+"))/sqrt(2*pi)\n"
- return ps
-def usage():
- print "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] [-r <res in ms>] <list of filenames>"
- sys.exit(1)
-def intermediate_filename(infile,shuffle,truncate,outdir):
- if not shuffle and not truncate: return os.path.abspath(infile)
- intermediate = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
- if truncate: intermediate.append(str(truncate))
- if shuffle:
- intermediate.append('shuffled')
- return '.'.join(intermediate)
-def histogram_basefilename(infile,shuffle,truncate,res,outdir):
- name = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
- if truncate: name.append(str(truncate))
- if shuffle: name.append('shuffled')
- name.append('res' + str(res))
- return '.'.join(name)
-def getargs():
- # [-n <truncate to # circuits>] [-s] <list of filenames>
- k = 3
- res = 100
- sort =False
- truncate = None
- graph = False
- outdirname = "." # will write to current directory if not specified
- filenames = []
- if len(sys.argv) < 2: usage()
- else:
- arglen = len(sys.argv[1:])
- i = 0
- while (arglen - i) > 0:
- if sys.argv[i+1] == '-s': sort = True
- elif sys.argv[i+1] == '-n':
- if not sys.argv[i + 2].isdigit(): usage()
- truncate = sys.argv[i+2]
- i += 1
- elif sys.argv[i + 1] == '-g': graph = True
- elif sys.argv[i + 1] == '-k':
- k = float(sys.argv[i + 2])
- i += 1
- elif sys.argv[i+1] == '-d':
- outdirname = sys.argv[i + 2]
- i += 1
- elif sys.argv[i+1] == '-r':
- res = float(sys.argv[i+2])
- i += 1
- else:
- filenames += [sys.argv[i+1]]
- i += 1
- return sort, truncate,graph,outdirname,filenames,k,res
-def shuffle(sort,truncate,filename,newfile):
- if not sort and truncate is None: return
- sortlocation = '/usr/local/bin/sort' #peculiarity of fallon's system
- #sortlocation = 'sort'
- if sort and truncate:
- cmd = sortlocation + ' -R ' + filename + ' | head -n ' + truncate + ' > ' + newfile
- elif sort and not truncate:
- cmd = sortlocation + ' -R ' + filename + ' > ' + newfile
- elif not sort and truncate:
- cmd = 'cat ' + filename + ' | head -n ' + truncate + ' > ' + newfile
- p = popen2.Popen4(cmd)
- p.wait()
-if __name__ == "__main__":
- sort, truncate,graph,dirname,filenames,k,res = getargs()
- # make new directory
- print 'Making new directory:',dirname
- if not os.path.isdir(dirname):
- os.mkdir(dirname)
- else:
- print 'Dir exists, not making a new one'
- for filename in filenames:
- if os.path.isdir(filename):
- # shallow add of files in dir
- for f in os.listdir(filename):
- if f[-11:] == '.buildtimes':
- filenames += [os.path.join(filename,f)]
- filenames.remove(filename)
- for filename in filenames:
- print 'Processing',filename
- print '------------------------------'
- if not os.path.exists(filename):
- print filename,'is not a valid path'
- continue
-# if truncate and sort or truncate and not sort:
-# newfile = os.path.join(dirname, os.path.basename(filename) + '.' + truncate + '.shuffled')
-# elif sort and not truncate:
-# newfile = os.path.join(dirname , os.path.basename(filename) + '.shuffled')
-# else:
-# newfile = filename
- newfile = intermediate_filename(filename,sort,truncate,dirname)
- # shuffle, create new file
- shuffle(sort,truncate,filename,newfile)
- # create histogram from file
- s = Stats(newfile)
- histfilename = histogram_basefilename(filename,sort,truncate,res,dirname)
- s.makehistogram(res,histfilename + '.hist')
- mean = s.mean()
- stddev = s.stddev()
- median = s.median()
- mode = s.mode() # relies on s.makehistogram for buckets
- parK = s.paretoK(mode)
- modeN = s.modeN(mode)
- modeMean = s.modeMean(mode)
- # verify sanity by integrating scaled distribution:
- modeNint = trapz(map(lambda x: modeN* pypareto(x, parK, mode),
- xrange(1,200000)))
- print 'Resolution of histogram:',res,'ms'
- print 'Mean: '+str(mean)+', mode: '+str(mode)
- print 'ParK: '+str(parK)
- print 'ModeN: '+str(modeN)+" vs integrated: "+str(modeNint)
- print '#successful runs:',len(s.values)
- # get stats
- if graph:
- # plot histogram
- # args: values, # bins, normalize y/n, width of bars
- pylab.hist(s.values,len(s.values) / res, normed=True,width=5)
- #plot Pareto curve
- X = pylab.arange(mode, max(s.values), 1)
- Y = map(lambda x: pypareto(x, parK, mode), X)
- n = len(s.values)
- pylab.plot(X,Y,'b-')
- #save figure
- pylab.savefig(histfilename + '.png')
- pylab.clf()
Copied: torflow/trunk/NodeMonitors/bw-informer.py (from rev 17873, torflow/trunk/bw-informer.py)
--- torflow/trunk/NodeMonitors/bw-informer.py (rev 0)
+++ torflow/trunk/NodeMonitors/bw-informer.py 2009-01-05 16:27:59 UTC (rev 17914)
@@ -0,0 +1,387 @@
+ RWTH Aachen University, Informatik IV
+ Copyright (C) 2007 Johannes Renner
+ Contact: renner at i4.informatik.rwth-aachen.de
+# Addon for onion routers:
+# Shall provide information about available bandwidth on single
+# TLS-connections as well as globally available bandwidth for
+# requesting clients in an anonymity-preserving way (?).
+# TODO: Make the document be served by Tor via HTTP
+import re
+import sys
+import sched
+import time
+import socket
+import atexit
+import threading
+import traceback
+from TorCtl import *
+from TorCtl.TorUtil import control_port, control_host
+from TorCtl.TorUtil import *
+# Set the version here
+VERSION = "0.0-alpha"
+# Move these to a config file:
+# Listen host and port
+listen_host = ""
+listen_port = 9053
+# Duration of single measuring interval (seconds)
+interval = 20
+# Alpha for computing new max values, let max
+# decrease slowly if no traffic or not topped
+alpha = .9999
+# Minimum 'available' bandwidth (byte/sec)
+# to show up on the document
+available_min = 0
+# Global variable marks the start of an interval
+start = time.time()
+# Overall start time
+total_start = time.time()
+# Variable that contains the status-document
+bw_status = "no status document available yet :(\r\n"
+# Dictionary that contains all stats
+stats = {}
+stats_lock = threading.Lock()
+# Dicts that contain mappings
+key_to_name = {}
+name_to_key = {}
+# We use the same class for recording global stats and link stats
+class LinkBandwidthStats(TorCtl.Router):
+ def __init__(self, r=None):
+ if r:
+ self.__dict__ = r.__dict__
+ else:
+ self.down = 0
+ # Total counters
+ self.tot_age = 0
+ self.tot_count = 0
+ self.tot_ncircs = 0
+ self.tot_read = 0
+ self.tot_written = 0
+ self.tot_bytes = 0 # total read + written
+ # Interval stats
+ self.int_read = 0 # count bytes read & written ..
+ self.int_written = 0 # in the last interval
+ self.int_bytes = 0 # sum of both, gets set on update()
+ self.curr_throughput = 0.0 # avg throughput for the last interval
+ self.max_throughput = 0.0 # throughput max-value
+ self.available = 0.0 # max - avg
+ def read(self, bytes_read):
+ self.tot_read += bytes_read
+ self.int_read += bytes_read
+ def written(self, bytes_written):
+ self.tot_written += bytes_written
+ self.int_written += bytes_written
+ # Reset all of the interval counters
+ def reset_interval_counters(self):
+ self.int_read = 0
+ self.int_written = 0
+ self.int_bytes = 0
+ # Most important method here
+ def update(self, elapsed):
+ # Compute the interval-bytes read+written
+ self.int_bytes = self.int_read + self.int_written
+ # Compute total bytes
+ self.tot_bytes = self.tot_read + self.tot_written
+ # Compute avg interval throughput
+ self.curr_throughput = self.int_bytes/elapsed
+ # Max handling ..
+ if self.curr_throughput > self.max_throughput:
+ # We have a new max!
+ self.max_throughput = self.curr_throughput
+ plog("DEBUG", self.nickname + " reached new max: " +
+ str(self.max_throughput) + " byte/sec")
+ else:
+ # Saving old max for debugging only
+ old_max = self.max_throughput
+ # Decrease the max-value using alpha-formula
+ self.max_throughput = max(self.curr_throughput, (self.max_throughput*alpha + self.curr_throughput*(1-alpha)))
+ #plog("DEBUG", self.nickname + ": max decreased from "
+ # + str(old_max) + " to " + str(self.max_throughput))
+ # Compute the difference as 'available'
+ # TODO: Add the frac part from the approaches
+ self.available = self.max_throughput - self.curr_throughput
+ # Reset the counters
+ self.reset_interval_counters()
+# Special instance of LinkBandwidthStats for recording of bw-events
+global_stats = LinkBandwidthStats()
+global_stats.nickname = "Global stats"
+# We need an EventHandler
+# extend from TorCtl.EventHandler
+class LinkHandler(TorCtl.EventHandler):
+ def __init__(self, conn):
+ # Set the connection
+ self.c = conn
+ TorCtl.EventHandler.__init__(self)
+ # Method to handle BW-events for recording total bw
+ def bandwidth_event(self, event):
+ #plog("NOTICE", "BW-Event: " + str(event.read) + " bytes read, " + str(event.written) + " bytes written")
+ if event.read: global_stats.read(event.read)
+ if event.written: global_stats.written(event.written)
+ # Method to handle ORCONN-events
+ def or_conn_status_event(self, o):
+ # Count all clients as one:
+ # If o.endpoint is an idhash
+ if re.search(r"^\$", o.endpoint):
+ if o.endpoint not in key_to_name:
+ o.endpoint = "AllClients:HASH"
+ else: o.endpoint = key_to_name[o.endpoint]
+ # If it is no idhash and not in name_to_key
+ elif o.endpoint not in name_to_key:
+ plog("DEBUG", "IP? " + o.endpoint)
+ o.endpoint = "AllClients:IP"
+ if o.status == "NEW" or o.status == "LAUNCHED" or o.status == "CONNECTED":
+ plog("NOTICE", "Connection to " + o.endpoint + " is now " + o.status)
+ # If status is READ or WRITE
+ elif o.status == "READ" or o.status == "WRITE":
+ #plog("DEBUG", o.endpoint + ", read: " + str(o.read_bytes) + " wrote: " + str(o.wrote_bytes))
+ stats_lock.acquire()
+ # If not in stats: add!
+ if o.endpoint not in stats:
+ stats[o.endpoint] = LinkBandwidthStats()
+ stats[o.endpoint].nickname = o.endpoint
+ plog("NOTICE", "+ Added " + o.endpoint + " to the stats")
+ # Add number of bytes to total and interval
+ if o.read_bytes: stats[o.endpoint].read(o.read_bytes)
+ if o.wrote_bytes: stats[o.endpoint].written(o.wrote_bytes)
+ stats_lock.release()
+ elif o.status == "CLOSED" or o.status == "FAILED":
+ # Don't record reasons!
+ stats_lock.acquire()
+ if o.endpoint not in stats:
+ # Add .. if there will be no traffic it will be removed in the next round
+ stats[o.endpoint] = LinkBandwidthStats()
+ stats[o.endpoint].nickname = o.endpoint
+ plog("NOTICE", "+ Added " + o.endpoint + " to the stats")
+ # Add 'running' to status
+ if o.status == "FAILED" and not stats[o.endpoint].down:
+ o.status = o.status + "(Running)"
+ # 'Total' stats
+ stats[o.endpoint].tot_ncircs += o.ncircs
+ stats[o.endpoint].tot_count += 1
+ if o.age: stats[o.endpoint].tot_age += o.age
+ #if o.read_bytes: stats[o.endpoint].tot_read += o.read_bytes
+ #if o.wrote_bytes: stats[o.endpoint].tot_wrote += o.wrote_bytes
+ stats_lock.release()
+ # This is only for constructing debug output
+ if o.age: age = "AGE="+str(o.age)
+ else: age = ""
+ if o.read_bytes: read = "READ="+str(o.read_bytes)
+ else: read = ""
+ if o.wrote_bytes: wrote = "WRITTEN="+str(o.wrote_bytes)
+ else: wrote = ""
+ if o.reason: reason = "REASON="+o.reason
+ else: reason = ""
+ if o.ncircs: ncircs = "NCIRCS="+str(o.ncircs)
+ else: ncircs = ""
+ plog("DEBUG", " ".join((o.event_name, o.endpoint, o.status, age, read, wrote, reason, ncircs)))
+ # NS-EventHandler methods
+ def ns_event(self, n):
+ read_routers(self.c, n.nslist)
+ def new_desc_event(self, d):
+ for i in d.idlist: # Is this too slow?
+ read_routers(self.c, self.c.get_network_status("id/"+i))
+# Sort a list by a specified key
+def sort_list(list, key):
+ list.sort(lambda x,y: cmp(key(y), key(x))) # Python < 2.4 hack
+ return list
+# Write document to file f
+def write_file(f):
+ f.write(bw_status)
+ f.close()
+# Read the routers
+def read_routers(c, nslist):
+ global key_to_name, name_to_key
+ bad_key = 0
+ stats_lock.acquire()
+ for ns in nslist:
+ try:
+ key_to_name[ns.idhex] = ns.nickname
+ name_to_key[ns.nickname] = ns.idhex
+ r = LinkBandwidthStats(c.get_router(ns))
+ if ns.nickname in stats:
+ if stats[ns.nickname].idhex != r.idhex:
+ plog("NOTICE", "Router "+r.nickname+" has multiple keys: "
+ +stats[ns.nickname].idhex+" and "+r.idhex)
+ stats[r.nickname] = r # XXX: We get names only from ORCONN :(
+ except TorCtl.ErrorReply:
+ bad_key += 1
+ if "Running" in ns.flags:
+ plog("INFO", "Running router "+ns.nickname+"="+ns.idhex+" has no descriptor")
+ pass
+ except:
+ traceback.print_exception(*sys.exc_info())
+ continue
+ stats_lock.release()
+# Update stats and reset every router's counters
+# (Requires stats_lock.acquire())
+def update_stats(elapsed):
+ # Update & reset global stats
+ global_stats.update(elapsed)
+ # Get the links
+ links = stats.values()
+ for l in links:
+ # Update & reset stats
+ l.update(elapsed)
+# Create the new status document
+# (Requires stats_lock.acquire())
+# TODO: Somehow compress the data:
+# - if available==max --> only deliver max?
+# - only deliver available?
+# - leave out links with available==0 ?
+# - No, avail==0 means new max, but not nothing available!
+# - clustering/classification?
+def create_document():
+ new_status = ""
+ # Fill in global_stats
+ new_status += str(global_stats.available) + " "
+ new_status += str(global_stats.max_throughput) + " "
+ new_status += str(global_stats.curr_throughput) + "\r\n"
+ # Sort the document for available
+ key = lambda x: x.available
+ links_sorted = sort_list(stats.values(), key)
+ for l in links_sorted:
+ # Cutoff at available_min
+ if key(l) >= available_min and l.nickname != "AllClients:HASH":
+ new_status += l.nickname + " " + str(key(l)) + " "
+ new_status += str(l.max_throughput) + " " + str(l.curr_throughput) + "\r\n"
+ # Critical: Exchange global bw_status document
+ global bw_status
+ bw_status = new_status
+# This is the method where the main work is done
+# Schedule the call every 'interval' seconds
+def do_work(s):
+ global start
+ # Get the time and compute elapsed
+ now = time.time()
+ elapsed = now-start
+ # Acquire lock
+ stats_lock.acquire()
+ # Update stats
+ update_stats(elapsed)
+ # Create the document
+ create_document()
+ # Release lock
+ stats_lock.release()
+ # Write to file, TODO: Write to Tor-dir: data/status/
+ write_file(file("./data/bw-informer/bw-document", "w"))
+ # Some debugging
+ plog("INFO", "Created new document for the last interval (" + str(elapsed) + ") seconds\n") # + bw_status)
+ # Reschedule
+ start = time.time()
+ s.enter(interval, 1, do_work, (s,))
+# Run a scheduler that does work every interval
+def start_sched(c):
+ # Ge the network status
+ nslist = c.get_network_status()
+ read_routers(c, nslist)
+ # Setup scheduler
+ s = sched.scheduler(time.time, time.sleep)
+ start = time.time()
+ total_start = time.time()
+ s.enter(interval, 1, do_work, (s,))
+ try:
+ s.run()
+ except KeyboardInterrupt:
+ pass
+# run()-method for one client-request
+def client_thread(channel, details):
+ channel.send(bw_status)
+ channel.close()
+ plog("INFO", "Sent status to: " + details[0] + ":" + str(details[1]))
+# run()-method of the server-thread
+def start_server():
+ server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ server.bind((listen_host, listen_port))
+ server.listen(5)
+ atexit.register(cleanup, *(server,))
+ plog("INFO", "Listening on " + listen_host + ":" + str(listen_port))
+ # Have the server serve "forever":
+ while True:
+ channel, details = server.accept()
+ if not channel: break
+ thr = threading.Thread(None, lambda: client_thread(channel, details))
+ thr.setName("Client-Connection: " + details[0])
+ thr.start()
+# Close some given s (socket, connection, ...)
+def cleanup(x):
+ plog("INFO", "Closing socket/connection")
+ x.close()
+# Main function
+def main(argv):
+ plog("INFO", "bw-informer v" + VERSION)
+ # Create connection to Tor
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.connect((control_host, control_port))
+ c = TorCtl.Connection(s)
+ plog("INFO", "Successfully connected to running Tor process")
+ # Set LinkHandler here
+ c.set_event_handler(LinkHandler(c))
+ # Close connection on exit
+ atexit.register(cleanup, *(c,))
+ # Start the connection thread
+ c.launch_thread()
+ c.authenticate()
+ # Listen to some events
+ c.set_events([TorCtl.EVENT_TYPE.ORCONN,
+ # TODO: Set extra-info for descriptor here
+ # Start server thread
+ thr = threading.Thread(None, lambda: start_server())
+ thr.setName("BW-Server")
+ thr.setDaemon(1)
+ thr.start()
+ # Start the actual monitor here
+ start_sched(c)
+# Program entry point
+if __name__ == '__main__':
+ main(sys.argv)
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# This is a "top-like" interface for Tor information
+# It's goal at the start is to just tell you basic information
+# In the future, you may be able to control Tor with it.
+# See this for some of the original ideas:
+# http://archives.seul.org/or/dev/Jan-2008/msg00005.html
+# A typical output of moniTor could look like this (with some fake data
+# for the purpose of this example):
+# ~ Name/ID: gabelmoo 6833 3D07 61BC F397 A587 A0C0 B963 E4A9 E99E C4D3
+# ~ Version: (r13077) on Linux x86_64
+# ~ Config: /home/tor/gabelmoo/torrc, Exit policy: no exit allowed
+# ~ IP address:, OR port: 443, Dir port: 80
+# ~ CPU: 9.0% this tor, 3.4% other processes, 87.6% idle
+# ~ Mem: 49.9% this tor, 2.0% other processes, 48.1% free
+# ~ Connections: 1090 OR conns, 320 Dir conns
+# ~ Bandwidth: 1.2 MB/s current, 1.3 MB/s avg
+# ~ Recent events (see also /home/tor/gabelmoo/monitor.log):
+# ~ 14:30:01 [warn] Consensus does not include configured authority 'moria
+# ~ 14:30:01 [warn] Consensus does not include configured authority 'ides'
+# ~ 14:30:01 [warn] 0 unknown, 0 missing key, 2 good, 0 bad, 1 no signatur
+# ~ 14:30:01 [warn] Not enough info to publish pending consensus
+__author__ = "Jacob Appelbaum"
+__version__ = "0.1-2008_01_16"
+__copyright__ = "http://www.torproject.org/Jacob Appelbaum 2008"
+import curses
+import time
+import sys
+import socket
+# Hack.. Can also set PYTHONPATH..
+# http://docs.python.org/tut/node8.html#searchPath
+from TorCtl import TorCtl, TorUtil
+from TorCtl.TorCtl import *
+# Parse authenticate string from file here
+#moniTorConf = "/etc/moniTor.conf"
+#authSecret = open(moniTorConf).read().strip()
+authSecret = ""
+def parse_config():
+ #moniTorConf = "/etc/moniTor.conf"
+ #authSecret = open(moniTorConf).read().strip()
+ #authSecret = ""
+ return
+def create_oracle(host,port):
+ """ Create a useful TorCtl object
+ """
+ print "I'm going to connect to %s and connect to port %i" %(sh,sp)
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.connect((host,port))
+ oracle = Connection(s)
+ oracle_thread = oracle.launch_thread()
+ oracle.authenticate(authSecret)
+ return oracle, oracle_thread
+# Much like run_example from TorCtl
+def collect_status(oracle):
+ """ A basic loop for collecting static information from our TorCtl object
+ """
+ # add name/id, exit policy, or-port, dir-port
+ static_keys = ['version', 'config-file', 'address', 'fingerprint', 'exit-policy/default', 'accounting/enabled']
+ static_info = dict([(key, oracle.get_info(key)[key]) for key in static_keys])
+ # Dynamic information can be collected by using our returned socket
+ return static_info, static_keys
+if __name__ == '__main__':
+ if len(sys.argv) > 1:
+ print "Syntax: ",sys.argv[0]
+ sys.exit(1)
+ else:
+ sys.argv.append("localhost:9051")
+ parse_config()
+ sh,sp = parseHostAndPort(sys.argv[1])
+ torctl_oracle, torctl_oracle_thread = create_oracle(sh,sp)
+ static_info, static_keys, = collect_status(torctl_oracle)
+ # Number of connections, current bw
+ dynamic_keys = ['version', 'config-file', 'address', 'fingerprint']
+ torctl_oracle.set_event_handler(DebugEventHandler())
+ torctl_oracle.set_events([EVENT_TYPE.STREAM, EVENT_TYPE.CIRC,
+ while True:
+ # Populate the dynamic info each run
+ dynamic_info = dict([(key, torctl_oracle.get_info(key)[key]) for key in dynamic_keys])
+ # Now we can draw a few interesting things to the screen
+ for key in static_info:
+ print key + " is " + static_info[key]
+ for key in dynamic_info:
+ print key + " is " + dynamic_info[key]
+ time.sleep(1)
+ # So ghetto, so ghetto!
+ os.system('clear')
@@ -0,0 +1,227 @@
+# Nodemon - Tor node monitor
+Nodemon - Tor node monitor
+import sys
+from TorCtl import *
+import socket
+import traceback
+import re
+from TorCtl.TorUtil import control_port, control_host
+from TorCtl.TorUtil import *
+import sched, time
+import thread
+class Reason:
+ def __init__(self, reason): self.reason = reason
+ ncircs = 0
+ count = 0
+class RouterStats(TorCtl.Router):
+ # Behold, a "Promotion Constructor"!
+ # Also allows null superclasses! Python is awesome
+ def __init__(self, r=None):
+ if r:
+ self.__dict__ = r.__dict__
+ else:
+ self.down = 0
+ self.reasons = {} # For a fun time, move this outside __init__
+ tot_ncircs = 0
+ tot_count = 0
+ tot_read = 0
+ tot_wrote = 0
+ running_read = 0
+ running_wrote = 0
+ tot_age = 0
+errors = {}
+errors_lock = thread.allocate_lock()
+key_to_name = {}
+name_to_key = {}
+# TODO: Move these to config file
+max_detach = 3
+def read_routers(c, nslist):
+ bad_key = 0
+ errors_lock.acquire()
+ for ns in nslist:
+ try:
+ key_to_name[ns.idhex] = ns.nickname
+ name_to_key[ns.nickname] = ns.idhex
+ r = RouterStats(c.get_router(ns))
+ if ns.nickname in errors:
+ if errors[ns.nickname].idhex != r.idhex:
+ plog("NOTICE", "Router "+r.nickname+" has multiple keys: "
+ +errors[ns.nickname].idhex+" and "+r.idhex)
+ errors[r.nickname] = r # XXX: We get names only from ORCONN :(
+ except TorCtl.ErrorReply:
+ bad_key += 1
+ if "Running" in ns.flags:
+ plog("INFO", "Running router "+ns.nickname+"="
+ +ns.idhex+" has no descriptor")
+ pass
+ except:
+ traceback.print_exception(*sys.exc_info())
+ continue
+ errors_lock.release()
+# Make eventhandler
+class NodeHandler(TorCtl.EventHandler):
+ def __init__(self, c):
+ TorCtl.EventHandler.__init__(self)
+ self.c = c
+ def or_conn_status_event(self, o):
+ # XXX: Count all routers as one?
+ if re.search(r"^\$", o.endpoint):
+ if o.endpoint not in key_to_name:
+ o.endpoint = "AllClients:HASH"
+ else: o.endpoint = key_to_name[o.endpoint]
+ elif o.endpoint not in name_to_key:
+ plog("DEBUG", "IP? " + o.endpoint)
+ o.endpoint = "AllClients:IP"
+ if o.status == "READ" or o.status == "WRITE":
+ #plog("DEBUG", "Read: " + str(read) + " wrote: " + str(wrote))
+ errors_lock.acquire()
+ if o.endpoint not in errors:
+ plog("NOTICE", "Buh?? No "+o.endpoint)
+ errors[o.endpoint] = RouterStats()
+ errors[o.endpoint].nickname = o.endpoint
+ errors[o.endpoint].running_read += o.read_bytes
+ errors[o.endpoint].running_wrote += o.wrote_bytes
+ errors_lock.release()
+ if o.status == "CLOSED" or o.status == "FAILED":
+ errors_lock.acquire()
+ if o.endpoint not in errors:
+ plog("NOTICE", "Buh?? No "+o.endpoint)
+ errors[o.endpoint] = RouterStats()
+ errors[o.endpoint].nickname = o.endpoint
+ if o.status == "FAILED" and not errors[o.endpoint].down:
+ o.status = o.status + "(Running)"
+ o.reason = o.status+":"+o.reason
+ if o.reason not in errors[o.endpoint].reasons:
+ errors[o.endpoint].reasons[o.reason] = Reason(o.reason)
+ errors[o.endpoint].reasons[o.reason].ncircs += o.ncircs
+ errors[o.endpoint].reasons[o.reason].count += 1
+ errors[o.endpoint].tot_ncircs += o.ncircs
+ errors[o.endpoint].tot_count += 1
+ if o.age: errors[o.endpoint].tot_age += o.age
+ if o.read_bytes: errors[o.endpoint].tot_read += o.read_bytes
+ if o.wrote_bytes: errors[o.endpoint].tot_wrote += o.wrote_bytes
+ errors_lock.release()
+ else: return
+ if o.age: age = "AGE="+str(o.age)
+ else: age = ""
+ if o.read_bytes: read = "READ="+str(o.read_bytes)
+ else: read = ""
+ if o.wrote_bytes: wrote = "WRITTEN="+str(o.wrote_bytes)
+ else: wrote = ""
+ if o.reason: reason = "REASON="+o.reason
+ else: reason = ""
+ if o.ncircs: ncircs = "NCIRCS="+str(o.ncircs)
+ else: ncircs = ""
+ plog("DEBUG",
+ " ".join((o.event_name, o.endpoint, o.status, age, read, wrote,
+ reason, ncircs)))
+ def ns_event(self, n):
+ read_routers(self.c, n.nslist)
+ def new_desc_event(self, d):
+ for i in d.idlist: # Is this too slow?
+ read_routers(self.c, self.c.get_network_status("id/"+i))
+def bw_stats(key, f):
+ routers = errors.values()
+ routers.sort(lambda x,y: cmp(key(y), key(x))) # Python < 2.4 hack
+ for r in routers:
+ f.write(r.nickname+"="+str(key(r))+"\n")
+ f.close()
+def save_stats(s):
+ errors_lock.acquire()
+ # Yes yes, adding + 0.005 to age is bloody.. but who cares,
+ # 1. Routers sorted by bytes read
+ bw_stats(lambda x: x.tot_read, file("./data/nodemon/r_by_rbytes", "w"))
+ # 2. Routers sorted by bytes written
+ bw_stats(lambda x: x.tot_wrote, file("./data/nodemon/r_by_wbytes", "w"))
+ # 3. Routers sorted by tot bytes
+ bw_stats(lambda x: x.tot_read+x.tot_wrote,
+ file("./data/nodemon/r_by_tbytes", "w"))
+ # 4. Routers sorted by downstream bw
+ bw_stats(lambda x: x.tot_read/(x.tot_age+0.005),
+ file("./data/nodemon/r_by_rbw", "w"))
+ # 5. Routers sorted by upstream bw
+ bw_stats(lambda x: x.tot_wrote/(x.tot_age+0.005),
+ file("./data/nodemon/r_by_wbw", "w"))
+ # 6. Routers sorted by total bw
+ bw_stats(lambda x: (x.tot_read+x.tot_wrote)/(x.tot_age+0.005),
+ file("./data/nodemon/r_by_tbw", "w"))
+ bw_stats(lambda x: x.running_read,
+ file("./data/nodemon/r_by_rrunbytes", "w"))
+ bw_stats(lambda x: x.running_wrote,
+ file("./data/nodemon/r_by_wrunbytes", "w"))
+ bw_stats(lambda x: x.running_read+x.running_wrote,
+ file("./data/nodemon/r_by_trunbytes", "w"))
+ f = file("./data/nodemon/reasons", "w")
+ routers = errors.values()
+ def notlambda(x, y):
+ if y.tot_ncircs or x.tot_ncircs:
+ return cmp(y.tot_ncircs, x.tot_ncircs)
+ else:
+ return cmp(y.tot_count, x.tot_count)
+ routers.sort(notlambda)
+ for r in routers:
+ f.write(r.nickname+" " +str(r.tot_ncircs)+"/"+str(r.tot_count)+"\n")
+ for reason in r.reasons.itervalues():
+ f.write("\t"+reason.reason+" "+str(reason.ncircs)+
+ "/"+str(reason.count)+"\n")
+ errors_lock.release()
+ f.close()
+ s.enter(60, 1, save_stats, (s,))
+def startmon(c):
+ global key_to_name, name_to_key
+ nslist = c.get_network_status()
+ read_routers(c, nslist)
+ s=sched.scheduler(time.time, time.sleep)
+ s.enter(60, 1, save_stats, (s,))
+ s.run();
+def main(argv):
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.connect((control_host,control_port))
+ c = TorCtl.Connection(s)
+ c.set_event_handler(NodeHandler(c))
+ c.launch_thread()
+ c.authenticate()
+ c.set_events([TorCtl.EVENT_TYPE.ORCONN,
+ startmon(c)
+if __name__ == '__main__':
+ main(sys.argv)
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+use strict;
+use IO::Socket::INET;
+# specify the port
+my $port = 8041;
+# create the socket
+my $server = IO::Socket::INET->new(Listen=>100, LocalPort=>$port, Proto=>'tcp', Reuse=>'yes');
+# set the number of bytes one line contains: 1024 Byte = 1 kB
+my $line_count = 1000000;
+# print some startup-information
+print "pid ".$$.": listening on port ".$server->sockport."\n";
+# main loop
+while(my $client = $server->accept) {
+ if(fork()) {
+ # parent
+ close($client);
+ } else {
+ # child
+ print "pid ".$$.": accepted connection from ".$client->peerhost."\n";
+ while(my $line = <$client>) {
+ if ($line =~ /(\d+)/) {
+ my $counter = $1;
+ while($counter>0) {
+ my $send = ($counter>$line_count) ? $line_count : $counter;
+ print $client "X" x $send;
+ print $client "\r\n";
+ $counter -= $send;
+ }
+ }
+ elsif ($line =~ m/close/) {
+ print "pid ".$$.": closing connection to ".$client->peerhost."\n";
+ close($client);
+ exit(0);
+ }
+ }
+ close($client);
+ }
