[or-cvs] r15623: -Fixed (?) x-axis problem: x-axis of graph is now in ms hist (torflow/branches/gsoc2008/tools/BTAnalysis)
fallon at seul.org
fallon at seul.org
Thu Jul 3 10:32:23 UTC 2008
Author: fallon
Date: 2008-07-03 06:32:23 -0400 (Thu, 03 Jul 2008)
New Revision: 15623
Modified:
torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
Log:
-Fixed (?) x-axis problem:
x-axis of graph is now in ms
histogram indices are now in ms
Stat.values now in ms (buildtimes are in s, so we divide by 1000)
-pareto is off (far lower than the curve), though seems to be more in line with mode
Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
===================================================================
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-07-03 10:25:01 UTC (rev 15622)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-07-03 10:32:23 UTC (rev 15623)
@@ -17,7 +17,7 @@
self.values = []
for line in self.f:
line = line.split('\t')
- self.values += [float(line[1])]
+ self.values += [float(line[1]) * 1000]
self.f.close()
self.buckets = {}
@@ -47,17 +47,17 @@
def mode(self): # Requires makehistogram runs first
counts = {}
- greatest = 0
greatest_val = 0
+ greatest_idx = 0
for v in self.buckets.keys():
- if self.buckets[v] > greatest:
- greatest_val = v
- greatest = self.buckets[v]
- return greatest_val
+ if self.buckets[v] > greatest_val:
+ greatest_idx = v
+ greatest_val = self.buckets[v]
+ return greatest_idx
# XXX: This doesn't seem to work for small #s of circuits
- def makehistogram(self,res,ncircuits,histname):
- res = res /1000.0 # convert ms to s
+ def makehistogram(self,res,histname):
+ #res = res /1000.0 # convert ms to s
values = copy.copy(self.values)
values.sort()
count = 0
@@ -67,7 +67,8 @@
if v < res * i: count += 1
else:
count += 1
- self.buckets[int(res * i * 10)] = count
+ self.buckets[int(res * i)] = count
+ #self.buckets[int(res * i * 10)] = count
i += 1
count = 0
f = open(histname,'w')
@@ -136,6 +137,7 @@
# gnuplot string for shifted, normalized exponential PDF
# g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
ps = fname+'(x)=(x<'+str(Xm)+') ? 0 : ('+str(N*k*(Xm**k))+'/x**('+str(k+1)+'))\n'
+ #ps = fname+'(x)='+str(N*k*(Xm**k))+'/x**('+str(k+1)+')\n'
return ps
def exp(mean,shift,N,fname):
@@ -165,6 +167,24 @@
print "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] [-r <res in ms>] <list of filenames>"
sys.exit(1)
+def intermediate_filename(infile,shuffle,truncate,outdir):
+
+ if not shuffle and not truncate: return os.path.abspath(infile)
+
+ intermediate = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
+ if truncate: intermediate.append(str(truncate))
+ if shuffle:
+ intermediate.append('shuffled')
+ return '.'.join(intermediate)
+
+def histogram_basefilename(infile,shuffle,truncate,res,outdir):
+ name = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
+
+ if truncate: name.append(str(truncate))
+ if shuffle: name.append('shuffled')
+ name.append('res' + str(res))
+ return '.'.join(name)
+
def getargs():
# [-n <truncate to # circuits>] [-s] <list of filenames>
k = 3
@@ -235,30 +255,33 @@
for filename in filenames:
print 'Processing',filename
+ print '------------------------------'
if not os.path.exists(filename):
print filename,'is not a valid path'
continue
- if truncate and sort or truncate and not sort:
- newfile = os.path.join(dirname, os.path.basename(filename) + '.' + truncate + '.shuffled')
- elif sort and not truncate:
- newfile = os.path.join(dirname , os.path.basename(filename) + '.shuffled')
- else:
- newfile = filename
- print newfile
+# if truncate and sort or truncate and not sort:
+# newfile = os.path.join(dirname, os.path.basename(filename) + '.' + truncate + '.shuffled')
+# elif sort and not truncate:
+# newfile = os.path.join(dirname , os.path.basename(filename) + '.shuffled')
+# else:
+# newfile = filename
+ newfile = intermediate_filename(filename,sort,truncate,dirname)
# shuffle, create new file
shuffle(sort,truncate,filename,newfile)
# create histogram from file
s = Stats(newfile)
- if not sort and not truncate:
- histfilename = os.path.join(dirname ,os.path.basename(newfile )+ '.res' + str(res) + '.hist')
- else:
- histfilename = newfile + '.res' + str(res) +'.hist'
- s.makehistogram(res,newfile,histfilename)
+ histfilename = histogram_basefilename(filename,sort,truncate,res,dirname)
+# if not sort and not truncate:
+# histfilename = os.path.join(dirname ,os.path.basename(newfile )+ '.res' + str(res) + '.hist')
+# else:
+# histfilename = newfile + '.res' + str(res) +'.hist'
+ s.makehistogram(res,histfilename + '.hist')
mean = s.mean()
stddev = s.stddev()
median = s.median()
- mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+ #mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+ mode = s.mode() # relies on s.makehistogram for buckets
parK = s.paretoK(mode)
modeN = s.modeN(mode)
modeMean = s.modeMean(mode)
@@ -270,14 +293,16 @@
if graph:
# create gnuplot file
- if not sort and not truncate:
- newfile = os.path.join(dirname, newfile)
- plotname = newfile + '.plt'
+# if not sort and not truncate:
+# newfile = os.path.join(dirname, newfile)
+# plotname = newfile + '.plt'
+ plotname = histfilename + '.plt'
ncircuits = str(len(s.values))
-
- plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill solid 1.00 border -1\nset style histogram clustered gap 1 title offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (in " + str(res) + " ms)'\n"
- plotstr += "set label 'std dev=" + str(stddev) + "' at 170,15\n"
+ xtics = max(s.values) / 10.0
+ plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + histfilename + ".png'\nset style fill solid 1.00 border -1\nset style histogram clustered gap 1 title offset character 0, 0, 0\nset datafile missing '-'\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (ms)'\nset xtics " + str(xtics) + " \n"
+ plotstr += "set label 'std dev=" + str(stddev) + "' at 25000,100\n"
+
# FIXME: Hrmm... http://en.wikipedia.org/wiki/Skewness? Seems like a hack
# Or better: http://en.wikipedia.org/wiki/Gamma_distribution with k=3?
# Would make sense if this is the sum of 3 paretos for the individual
@@ -301,11 +326,13 @@
# plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
# plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
- plotstr += pareto(parK,mode*10,modeN,'pareto')
+ plotstr += pareto(parK,mode,modeN*10,'pareto')
plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
-
+
+ #plotstr += pareto(parK,mode*10,modeN,'pareto')
+ #plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
# plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
- plotstr += "plot '" + histfilename + "' using 2,\\\n"
+ plotstr += "plot '" + histfilename + ".hist' using 1:2 with boxes,\\\n"
plotstr += "pareto(x) title '" + "Shifted Pareto', \\\n"
plotstr += "expShifted(x) title '" + "Shifted Exp' \n"
More information about the tor-commits
mailing list