[or-cvs] r15478: Changed the command line. Now: -sort and truncate are separa (torflow/branches/gsoc2008/tools/BTAnalysis)
fallon at seul.org
fallon at seul.org
Thu Jun 26 20:29:11 UTC 2008
Author: fallon
Date: 2008-06-26 16:29:11 -0400 (Thu, 26 Jun 2008)
New Revision: 15478
Modified:
torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
Log:
Changed the command line. Now:
-sort and truncate are separate and optional
-graphing is optional
-we can input multiple data files.
For example:
./shufflebt.py -n 100 -s -d shuffledir file1 file2 file3
will provide shuffled, truncated files in shuffledir and output paretoK and mean for file1,file2, and file3
Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
===================================================================
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-06-26 19:34:48 UTC (rev 15477)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-06-26 20:29:11 UTC (rev 15478)
@@ -4,8 +4,10 @@
# Shuffles a list of build times and produces a pdf of n of those buildtimes,
# which are put into 100ms blocks.
# Requires gnuplot 4.2 and a version coreutils that provides sort -R
-# usage: shufflebt.py -n <# circuits> -f <timefile> -d <outdirname>
-
+# "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] <list of filenames>"
+# if outdir is not specified, the script will write files to the current directory
+# FIXME: you need to be in the directory containing the timefile when you call
+# this script, or supply an absolute path
import getopt,sys,os
import popen2
import math,copy
@@ -15,7 +17,9 @@
self.f = open(file)
self.values = []
for line in self.f:
- self.values += [float(line[:-1])]
+ line = line.split('\t')
+ self.values += [float(line[1])]
+
self.f.close()
self.buckets = {}
def mean(self):
@@ -159,37 +163,56 @@
def usage():
- print "shufflebt.py -n <# circuits> -f <timefile> -d <outdirname> -k <k val.>"
+ print "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] <list of filenames>"
+ sys.exit(1)
+def getargs():
+ # [-n <truncate to # circuits>] [-s] <list of filenames>
+ k = 3
+ sort =False
+ truncate = None
+ graph = False
+ outdirname = "." # will write to current directory if not specified
+ filenames = []
+ if len(sys.argv) < 2: usage()
+ else:
+ arglen = len(sys.argv[1:])
+ i = 0
+ while (arglen - i) > 0:
+ if sys.argv[i+1] == '-s': sort = True
+ elif sys.argv[i+1] == '-n':
+ if not sys.argv[i + 2].isdigit(): usage()
+ truncate = sys.argv[i+2]
+ i += 1
+ elif sys.argv[i + 1] == '-g': graph = True
+ elif sys.argv[i + 1] == '-k':
+ k = sys.argv[i + 2]
+ i += 1
+ elif sys.argv[i+1] == '-d':
+ outdirname = sys.argv[i + 2]
+ i += 1
+ else:
+ filenames += [sys.argv[i+1]]
+ i += 1
-def setargs():
- ncircuits = ""
- dirname = ""
- filename = ""
- if len(sys.argv[1:]) < 4:
- usage()
- sys.exit(2)
- try:
- opts,args = getopt.getopt(sys.argv[1:],"n:f:d:k:")
- except getopt.GetoptError,err:
- print str(err)
- usage()
- sys.exit(2)
- for o,a in opts:
- if o == '-n':
- if a.isdigit():
- ncircuits = a
- else:
- usage()
- elif o == '-f': filename = a
- elif o == '-d': dirname = a
- elif o == '-k': k = float(a)
- else:
- assert False, "Bad option"
- return ncircuits,filename,dirname,k
+
+ return sort, truncate,graph,outdirname,filenames,k
+
+
+def shuffle(sort,truncate,filename,newfile):
+ if not sort and truncate is None: return
+ sortlocation = '/usr/local/bin/sort'
+ if sort and truncate:
+ cmd = sortlocation + ' -R ' + filename + ' | head -n ' + truncate + ' > ' + newfile
+ elif sort and not truncate:
+ cmd = sortlocation + ' -R ' + filename + ' > ' + newfile
+ elif not sort and truncate:
+ cmd = 'cat ' + filename + ' | head -n ' + truncate + ' > ' + newfile
+
+ p = popen2.Popen4(cmd)
+ p.wait()
if __name__ == "__main__":
- ncircuits,filename,dirname,k = setargs()
- print 'Num. Circuits:[',ncircuits,'] Filename:[',filename,'] Dir. Name:[',dirname,']'
+ sort, truncate,graph,dirname,filenames,k = getargs()
# make new directory
print 'Making new directory:',dirname
@@ -198,86 +221,83 @@
else:
print 'Dir exists, not making a new one'
- # shuffle, create new file
- print 'Shuffling...',
-
- newfile = dirname + '/' + filename + '.' + ncircuits
- cmd = 'sort -R ' + filename + ' | head -n ' + ncircuits + ' > ' + newfile
-
- p = popen2.Popen4(cmd)
- p.wait()
- print 'Done'
-
- # create histogram from file
- print 'Calculating statistics and creating histogram...',
- s = Stats(newfile)
- s.makehistogram(100,newfile,newfile + '.hist')
- mean = s.mean()
- stddev = s.stddev()
- median = s.median()
- mode = s.mode()/10.0 # relies on s.makehistogram for buckets
- parK = s.paretoK(mode)
- modeN = s.modeN(mode)
- modeMean = s.modeMean(mode)
- print 'Done.'
- print 'Mean: '+str(mean)+', mode: '+str(mode)
- print 'ParK: '+str(parK)
-
- # get stats
-
- # create gnuplot file
- print 'Creating gnuplot plot file...',
- plotname = newfile + '.plt'
-
- plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill solid 1.00 border -1\nset style histogram clustered gap 1 title offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (in 100ms)'\n"
- plotstr += "set label 'std dev=" + str(stddev) + "' at 170,15\n"
-
- # FIXME: Hrmm... http://en.wikipedia.org/wiki/Skewness? Seems like a hack
- # Or better: http://en.wikipedia.org/wiki/Gamma_distribution with k=3?
- # Would make sense if this is the sum of 3 paretos for the individual
- # hop distributions.
-
- # Theta estimations
- maxliketheta = s.maxlikelihood(k)
- baytheta = s.bayesian(k)
-
- # N is the value to multipy the probabilities by
- N = len(s.values)
-
- #FIX? Other potential values of N: #circuits that match mode? median? mean?
- #print 'Mean:',mean,'Median:', median,'Mode:', mode
- #i = float("%3.0f" % int(mean * 10)) # crappy way of rounding
- #i = int(mode * 10)
- #N = s.buckets[i] # num. circuits that have buildtimes
- #close to mean/median/mode from histogram
-
-# plotstr += gamma(k,maxliketheta,N, 'maxl')
-# plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
-# plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
-
- plotstr += pareto(parK,mode*10,modeN,'pareto')
- plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
-
- plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
-
- plotstr += "pareto(x) title '" + "Shifted Pareto', \\\n"
- plotstr += "expShifted(x) title '" + "Shifted Exp' \n"
-
-
- f = open(plotname,'w')
- f.write(plotstr)
- f.close()
- print 'Done'
-
- # plot the file
- print 'Plotting...',
- p = popen2.Popen4('gnuplot ' + plotname)
-# p = popen2.Popen4('gp4.2 ' + plotname)
-
- p.wait()
- for err in p.fromchild:
- print err
- print 'Done'
-
+ for filename in filenames:
+ print 'Processing',filename
+ if truncate and sort or truncate and not sort:
+ newfile = dirname + '/' + filename + '.' + truncate + '.shuffled'
+ elif sort and not truncate:
+ newfile = dirname + '/' + filename + '.shuffled'
+ else:
+ newfile = filename
+ # shuffle, create new file
+ shuffle(sort,truncate,filename,newfile)
+ # create histogram from file
+ s = Stats(newfile)
+ s.makehistogram(100,newfile,newfile + '.hist')
+ mean = s.mean()
+ stddev = s.stddev()
+ median = s.median()
+ mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+ parK = s.paretoK(mode)
+ modeN = s.modeN(mode)
+ modeMean = s.modeMean(mode)
+ print 'Mean: '+str(mean)+', mode: '+str(mode)
+ print 'ParK: '+str(parK)
+
+ # get stats
+
+ if graph:
+ # create gnuplot file
+ ncircuits = str(len(s.values))
+ plotname = newfile + '.plt'
+
+ plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill solid 1.00 border -1\nset style histogram clustered gap 1 title offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (in 100ms)'\n"
+ plotstr += "set label 'std dev=" + str(stddev) + "' at 170,15\n"
+ # FIXME: Hrmm... http://en.wikipedia.org/wiki/Skewness? Seems like a hack
+ # Or better: http://en.wikipedia.org/wiki/Gamma_distribution with k=3?
+ # Would make sense if this is the sum of 3 paretos for the individual
+ # hop distributions.
+
+ # Theta estimations
+ maxliketheta = s.maxlikelihood(k)
+ baytheta = s.bayesian(k)
+
+ # N is the value to multipy the probabilities by
+ N = len(s.values)
+
+ #FIX? Other potential values of N: #circuits that match mode? median? mean?
+ #print 'Mean:',mean,'Median:', median,'Mode:', mode
+ #i = float("%3.0f" % int(mean * 10)) # crappy way of rounding
+ #i = int(mode * 10)
+ #N = s.buckets[i] # num. circuits that have buildtimes
+ #close to mean/median/mode from histogram
+
+ # plotstr += gamma(k,maxliketheta,N, 'maxl')
+ # plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
+ # plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
+
+ plotstr += pareto(parK,mode*10,modeN,'pareto')
+ plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
+
+ plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
+
+ plotstr += "pareto(x) title '" + "Shifted Pareto', \\\n"
+ plotstr += "expShifted(x) title '" + "Shifted Exp' \n"
+
+
+ f = open(plotname,'w')
+ f.write(plotstr)
+ f.close()
+
+ # plot the file
+ # p = popen2.Popen4('gnuplot ' + plotname)
+ p = popen2.Popen4('gp4.2 ' + plotname)
+
+ p.wait()
+ for err in p.fromchild:
+ print err
+
+
+
More information about the tor-commits
mailing list