[or-cvs] r15329: Modified to test out the gamma distribution; adjust k via th (torflow/branches/gsoc2008/tools/BTAnalysis)
fallon at seul.org
fallon at seul.org
Tue Jun 17 11:32:15 UTC 2008
Author: fallon
Date: 2008-06-17 07:32:15 -0400 (Tue, 17 Jun 2008)
New Revision: 15329
Modified:
torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
Log:
Modified to test out the gamma distribution; adjust k via the -k parameter.
Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
===================================================================
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-06-17 10:51:59 UTC (rev 15328)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-06-17 11:32:15 UTC (rev 15329)
@@ -17,6 +17,7 @@
for line in self.f:
self.values += [float(line[:-1])]
self.f.close()
+ self.buckets = {}
def mean(self):
# Borrowed from TorUtil
if len(self.values) > 0:
@@ -61,23 +62,49 @@
values.sort()
count = 0
i = 1
- buckets = {}
+ self.buckets = {}
for v in values:
if v < res * i: count += 1
else:
count += 1
- buckets[res * i] = count
+ self.buckets[res * i] = count
i += 1
count = 0
f = open(histname,'w')
f.write('#build time <\t#circuits\n')
- sortedkeys = buckets.keys()
+ sortedkeys = self.buckets.keys()
sortedkeys.sort()
for b in sortedkeys:
- towrite = str(b) + '\t' + str(buckets[b]) + '\n'
+ towrite = str(b) + '\t' + str(self.buckets[b]) + '\n'
f.write(towrite)
f.close()
+ def maxlikelihood(self,k):
+ # theta estimator for gamma PDF
+ # maxlikelihood estimator
+ # theta = sum(values) / N*k
+ return sum(self.values)/(k * len(self.values))
+
+ def bayesian(self,k):
+ # bayesian estimator for gamma PDF
+ # y = sum(values)
+ # theta = y/(Nk - 1) +/- y^2/((Nk-1)^2(Nk -2))
+ y = sum(self.values)
+ N = len(self.values)
+ mean = y/(N*k - 1)
+ sdev = (y*y)/((N*k - 1)* (N*k - 1) * (N*k - 2))
+ plus = mean + sdev
+ minus = mean - sdev
+ return plus,minus
+
+## Functions that return a gnuplot function string for a given distribution
+def gamma(k,theta, N,fname):
+ # gnuplot string for gamma PDF
+ # g(x,k,B) = (x**(k - 1) * B**k * exp(-B*x))/gamma(k)
+ B = 1.0/theta
+
+ ps = fname + '(x) = '+str(N)+'*((x**' + str(k-1) + ')*(' +str(B**k)+ ')*(exp(-' + str(B) +'*x)))' +'/gamma('+str(k)+')\n'
+ return ps
def poisson(u,N,fname):
ps = fname + "(x) = " + str(N) + "*(" + str(u) + "**x)*exp(-"+str(u)+")/gamma(x + 1)\n"
return ps
@@ -86,8 +113,9 @@
ps = fname + "(x)="+str(int(N)/d)+"*(exp(-((x-"+str(u)+ ")**2)/"+str(2*d*d)+"))/sqrt(2*pi)\n"
return ps
+
def usage():
- print "shufflebt.py -n <# circuits> -f <timefile> -d <outdirname>"
+ print "shufflebt.py -n <# circuits> -f <timefile> -d <outdirname> -k <k val.>"
def setargs():
@@ -98,7 +126,7 @@
usage()
sys.exit(2)
try:
- opts,args = getopt.getopt(sys.argv[1:],"n:f:d:")
+ opts,args = getopt.getopt(sys.argv[1:],"n:f:d:k:")
except getopt.GetoptError,err:
print str(err)
usage()
@@ -111,11 +139,12 @@
usage()
elif o == '-f': filename = a
elif o == '-d': dirname = a
+ elif o == '-k': k = int(a)
else:
assert False, "Bad option"
- return ncircuits,filename,dirname
+ return ncircuits,filename,dirname,k
if __name__ == "__main__":
- ncircuits,filename,dirname = setargs()
+ ncircuits,filename,dirname,k = setargs()
print 'Num. Circuits:[',ncircuits,'] Filename:[',filename,'] Dir. Name:[',dirname,']'
# make new directory
@@ -130,6 +159,7 @@
newfile = dirname + '/' + filename + '.' + ncircuits
cmd = 'sort -R ' + filename + ' | head -n ' + ncircuits + ' > ' + newfile
+
p = popen2.Popen4(cmd)
p.wait()
print 'Done'
@@ -149,31 +179,40 @@
# create gnuplot file
print 'Creating gnuplot plot file...',
plotname = newfile + '.plt'
- plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill solid 1.00 border -1\nset style histogram clustered gap 1 title offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits'\nset ylabel '# Circuits'\nset xlabel 'time (in 100ms)'\n"
- plotstr += "set label 'std dev=" + str(stddev) + "' at 450,1380\n"
+
+ plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill solid 1.00 border -1\nset style histogram clustered gap 1 title offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (in 100ms)'\n"
+ plotstr += "set label 'std dev=" + str(stddev) + "' at 170,15\n"
# FIXME: Hrmm... http://en.wikipedia.org/wiki/Skewness? Seems like a hack
# Or better: http://en.wikipedia.org/wiki/Gamma_distribution with k=3?
# Would make sense if this is the sum of 3 paretos for the individual
# hop distributions.
- plotstr += poisson(mean*10,ncircuits,'pmean')
- plotstr += poisson(median*10,ncircuits,'pmedian')
- plotstr += poisson(mode*10,ncircuits,'pmode')
- plotstr += normal(mean*10,stddev*10,ncircuits,'nmean')
- plotstr += normal(median*10,stddev*10,ncircuits,'nmedian')
- plotstr += normal(mode*10,stddev*10,ncircuits,'nmode')
+ # Theta estimations
+ maxliketheta = s.maxlikelihood(k)
+ baytheta = s.bayesian(k)
+
+ # N is the value to multipy the probabilities by
+ N = len(s.values)
+
+ #FIX? Other potential values of N: #circuits that match mode? median? mean?
+ #print 'Mean:',mean,'Median:', median,'Mode:', mode
+ #i = float("%3.1f" % mean) # crappy way of rounding
+ #N = s.buckets[i] # num. circuits that have buildtimes
+ #close to mean/median/mode from histogram
+
+ plotstr += gamma(k,maxliketheta,N, 'maxl')
+ plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
+ plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
+
plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
- plotstr += "nmean(x) title '" + "mean=" + str(mean) + "' ,\\\n"
- plotstr += "nmedian(x) title '" + "median=" + str(median) + "' ,\\\n"
- plotstr += "nmode(x) title '" + "mode=" + str(mode) + "' ,\\\n"
+ plotstr += "maxl(x) title '" + "max. likelihood', \\\n"
+ plotstr += "bayplus(x) title '" + "bayesian theta +', \\\n"
+ plotstr += "bayminus(x) title '" + "bayesian theta -' \n"
- plotstr += "pmean(x) title '" + "mean=" + str(mean) + "' ,\\\n"
- plotstr += "pmedian(x) title '" + "median=" + str(median) + "' ,\\\n"
- plotstr += "pmode(x) title '" + "mode=" + str(mode) + "' \\\n"
+
-
f = open(plotname,'w')
f.write(plotstr)
f.close()
More information about the tor-commits
mailing list