[tor-commits] [depictor/master] Update the parseOldconsensuses script
tom at torproject.org
tom at torproject.org
Wed Dec 22 02:26:09 UTC 2021
commit 97bea9d85284ef38b589cd75d599d39ef5cd64db
Author: Tom Ritter <tom at ritter.vg>
Date: Tue Dec 21 21:25:46 2021 -0500
Update the parseOldconsensuses script
Make print statements python3-compatible
DescribtorReader was removed from stem, replace it
Add bastet to the known dirauths
convert tabs to spaces and completely fuck up the diff
---
parseOldConsensuses.py | 528 +++++++++++++++++++++++++------------------------
1 file changed, 265 insertions(+), 263 deletions(-)
diff --git a/parseOldConsensuses.py b/parseOldConsensuses.py
index defa2f5..e72d7a9 100755
--- a/parseOldConsensuses.py
+++ b/parseOldConsensuses.py
@@ -18,293 +18,295 @@ import stem.util.conf
import stem.util.enum
from stem import Flag
-from stem.descriptor.reader import DescriptorReader
-from stem.util.lru_cache import lru_cache
def get_dirauths_in_tables():
- return "faravahar, gabelmoo, dizum, moria1, urras, maatuska, longclaw, tor26, dannenberg, turtles".split(", ")
+ return "faravahar, gabelmoo, dizum, moria1, urras, maatuska, longclaw, tor26, dannenberg, turtles, bastet".split(", ")
def get_dirauth_from_filename(filename):
- key = filename.split('-')
- if len(key) < 9:
- raise Exception("Strange filename: " + filename)
-
- key = key[-2]
- if key == "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97":
- return "faravahar"
- elif key == "ED03BB616EB2F60BEC80151114BB25CEF515B226":
- return "gabelmoo"
- elif key == "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58":
- return "dizum"
- elif key == "D586D18309DED4CD6D57C18FDB97EFA96D330566":
- return "moria1"
- elif key == "80550987E1D626E3EBA5E5E75A458DE0626D088C":
- return "urras"
- elif key == "49015F787433103580E3B66A1707A00E60F2D15B":
- return "maatuska"
- elif key == "23D15D965BC35114467363C165C4F724B64B4F66":
- return "longclaw"
- elif key == "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4":
- return "tor26"
- elif key == "0232AF901C31A04EE9848595AF9BB7620D4C5B2E" or key == "585769C78764D58426B8B52B6651A5A71137189A":
- return "dannenberg"
- elif key == "27B6B5996C426270A5C95488AA5BCEB6BCC86956":
- return "turtles"
- else:
- raise Exception("Unexpcected dirauth key: " + key + " " + filename)
+ key = filename.split('-')
+ if len(key) < 9:
+ raise Exception("Strange filename: " + filename)
+
+ key = key[-2]
+ if key == "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97":
+ return "faravahar"
+ elif key == "ED03BB616EB2F60BEC80151114BB25CEF515B226":
+ return "gabelmoo"
+ elif key == "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58":
+ return "dizum"
+ elif key == "D586D18309DED4CD6D57C18FDB97EFA96D330566":
+ return "moria1"
+ elif key == "80550987E1D626E3EBA5E5E75A458DE0626D088C":
+ return "urras"
+ elif key == "49015F787433103580E3B66A1707A00E60F2D15B":
+ return "maatuska"
+ elif key == "23D15D965BC35114467363C165C4F724B64B4F66":
+ return "longclaw"
+ elif key == "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4":
+ return "tor26"
+ elif key == "0232AF901C31A04EE9848595AF9BB7620D4C5B2E" or key == "585769C78764D58426B8B52B6651A5A71137189A":
+ return "dannenberg"
+ elif key == "27B6B5996C426270A5C95488AA5BCEB6BCC86956":
+ return "turtles"
+ elif key == "27102BC123E7AF1D4741AE047E160C91ADC76B21":
+ return "bastet"
+ else:
+ raise Exception("Unexpcected dirauth key: " + key + " " + filename)
def unix_time(dt):
return (dt - datetime.datetime.utcfromtimestamp(0)).total_seconds() * 1000.0
def ut_to_datetime(ut):
- return datetime.datetime.utcfromtimestamp(ut / 1000)
+ return datetime.datetime.utcfromtimestamp(ut / 1000)
def ut_to_datetime_format(ut):
- return ut_to_datetime(ut).strftime("%Y-%m-%d-%H-%M-%S")
+ return ut_to_datetime(ut).strftime("%Y-%m-%d-%H-%M-%S")
def get_time_from_filename(filename):
- voteTime = filename.split('-')
- if len(voteTime) < 7:
- raise Exception("Strange filename: " + filename)
+ voteTime = filename.split('-')
+ if len(voteTime) < 7:
+ raise Exception("Strange filename: " + filename)
- v = [int(x) for x in filename.split('-')[0:6]]
- voteTime = datetime.datetime(v[0], v[1], v[2], v[3], v[4], v[5])
- voteTime = unix_time(voteTime)
- return voteTime
+ v = [int(x) for x in filename.split('-')[0:6]]
+ voteTime = datetime.datetime(v[0], v[1], v[2], v[3], v[4], v[5])
+ voteTime = unix_time(voteTime)
+ return voteTime
def dirauth_relay_votes(directory, dirAuths, dbc):
- dirauth_columns = ""
- dirauth_columns_questions = ""
- for d in dirAuths:
- dirauth_columns += d + "_known integer, " + d + "_running integer, " + d + "_bwauth integer, "
- dirauth_columns_questions += ",?,?,?"
-
- dbc.execute("CREATE TABLE IF NOT EXISTS vote_data(date integer, " + dirauth_columns + "PRIMARY KEY(date ASC))")
- dbc.commit()
-
- votes = {}
- for root, dirs, files in os.walk(directory):
- for f in files:
- filepath = os.path.join(root, f)
- print filepath
-
- if '"' in f:
- raise Exception("Potentially malicious filename")
- elif "votes-" in f and ".tar" in f:
- continue
- elif "consensuses-" in f and ".tar" in f:
- continue
- elif "-vote-" not in f:
- continue
-
- voteTime = get_time_from_filename(f)
- if voteTime not in votes:
- votes[voteTime] = {}
-
- dirauth = get_dirauth_from_filename(f)
- if dirauth not in dirAuths:
- raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
- elif dirauth not in votes[voteTime]:
- votes[voteTime][dirauth] = {}
- else:
- print "Found two votes for dirauth " + dirauth + " and time " + filepath
-
- votes[voteTime][dirauth]['present'] = 1
- votes[voteTime][dirauth]['known'] = int(subprocess.check_output('egrep "^r " "' + filepath + '" | wc -l', shell=True))
- votes[voteTime][dirauth]['running'] = int(subprocess.check_output('egrep "^s " "' + filepath + '" | grep " Running" | wc -l', shell=True))
- votes[voteTime][dirauth]['bwlines'] = int(subprocess.check_output('grep Measured= "' + filepath + '" | wc -l', shell=True))
-
- for t in votes:
- print ut_to_datetime(t)
- print "\t", len(votes[t])
- for d in votes[t]:
- print "\t", d, votes[t][d]['bwlines'], votes[t][d]['running']
-
- insertValues = [t]
- for d in dirAuths:
- if d in votes[t]:
- insertValues.append(votes[t][d]['known'])
- insertValues.append(votes[t][d]['running'])
- insertValues.append(votes[t][d]['bwlines'])
- else:
- insertValues.append(None)
- insertValues.append(None)
- insertValues.append(None)
-
- dbc.execute("INSERT OR REPLACE INTO vote_data VALUES (?" + dirauth_columns_questions + ")", insertValues)
- dbc.commit()
+ dirauth_columns = ""
+ dirauth_columns_questions = ""
+ for d in dirAuths:
+ dirauth_columns += d + "_known integer, " + d + "_running integer, " + d + "_bwauth integer, "
+ dirauth_columns_questions += ",?,?,?"
+
+ dbc.execute("CREATE TABLE IF NOT EXISTS vote_data(date integer, " + dirauth_columns + "PRIMARY KEY(date ASC))")
+ dbc.commit()
+
+ votes = {}
+ for root, dirs, files in os.walk(directory):
+ for f in files:
+ filepath = os.path.join(root, f)
+ print(filepath)
+
+ if '"' in f:
+ raise Exception("Potentially malicious filename")
+ elif "votes-" in f and ".tar" in f:
+ continue
+ elif "consensuses-" in f and ".tar" in f:
+ continue
+ elif "-vote-" not in f:
+ continue
+
+ voteTime = get_time_from_filename(f)
+ if voteTime not in votes:
+ votes[voteTime] = {}
+
+ dirauth = get_dirauth_from_filename(f)
+ if dirauth not in dirAuths:
+ raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
+ elif dirauth not in votes[voteTime]:
+ votes[voteTime][dirauth] = {}
+ else:
+ print("Found two votes for dirauth " + dirauth + " and time " + filepath)
+
+ votes[voteTime][dirauth]['present'] = 1
+ votes[voteTime][dirauth]['known'] = int(subprocess.check_output('egrep "^r " "' + filepath + '" | wc -l', shell=True))
+ votes[voteTime][dirauth]['running'] = int(subprocess.check_output('egrep "^s " "' + filepath + '" | grep " Running" | wc -l', shell=True))
+ votes[voteTime][dirauth]['bwlines'] = int(subprocess.check_output('grep Measured= "' + filepath + '" | wc -l', shell=True))
+
+ for t in votes:
+ print(ut_to_datetime(t))
+ print("\t", len(votes[t]))
+ for d in votes[t]:
+ print("\t", d, votes[t][d]['bwlines'], votes[t][d]['running'])
+
+ insertValues = [t]
+ for d in dirAuths:
+ if d in votes[t]:
+ insertValues.append(votes[t][d]['known'])
+ insertValues.append(votes[t][d]['running'])
+ insertValues.append(votes[t][d]['bwlines'])
+ else:
+ insertValues.append(None)
+ insertValues.append(None)
+ insertValues.append(None)
+
+ dbc.execute("INSERT OR REPLACE INTO vote_data VALUES (?" + dirauth_columns_questions + ")", insertValues)
+ dbc.commit()
def bwauth_measurements(directory, dirAuths, dbc):
- #Find all the consensuses and votesrm
- votes = {}
- consensuses = {}
- for root, dirs, files in os.walk(directory):
- for f in files:
- filepath = os.path.join(root, f)
-
- if '"' in f:
- raise Exception("Potentially malicious filename")
- elif "votes-" in f and ".tar" in f:
- continue
- elif "consensuses-" in f and ".tar" in f:
- continue
-
- if "-consensus" in f:
- consensusTime = get_time_from_filename(f)
- if consensusTime not in consensuses:
- consensuses[consensusTime] = filepath
- else:
- print "Found two consensuses with the same time:", ut_to_datetime(consensusTime)
-
- #print "Consensus:", filepath
- elif "-vote-" in f:
- voteTime = get_time_from_filename(f)
-
- # Test to see if we already processed this one
- cur = dbc.cursor()
- cur.execute("SELECT * FROM bwauth_data WHERE date = ?", (voteTime,))
- if cur.fetchone():
- print "Skipping", f, "because we already processed it"
- continue
- elif voteTime not in votes:
- votes[voteTime] = {}
-
- dirauth = get_dirauth_from_filename(f)
-
- if dirauth not in dirAuths:
- raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
- elif dirauth not in votes[voteTime]:
- votes[voteTime][dirauth] = filepath
- else:
- print "Found two votes for dirauth " + dirauth + ":", filepath, "and", votes[voteTime][dirauth]
-
- #print "Vote:", dirauth, filepath
-
- #Make sure we have a consensus for each vote
- to_del = []
- for v in votes:
- if v not in consensuses:
- print "Have votes for time", ut_to_datetime(v), "but no consensus!"
- to_del.append(v)
- #sys.exit(1)
- for i in to_del:
- del votes[i]
-
- #Make the table
- bwauth_columns = ""
- bwauth_columns_questions = ""
- for d in dirAuths:
- bwauth_columns += d + "_above integer, " + d + "_shared integer, " + d + "_exclusive integer, " + d + "_below integer, " + d + "_unmeasured integer, "
- bwauth_columns_questions += ",?,?,?,?,?"
-
- dbc.execute("CREATE TABLE IF NOT EXISTS bwauth_data(date integer, " + bwauth_columns + "PRIMARY KEY(date ASC))")
- dbc.commit()
-
- reviewed = 0
- for v in votes:
- reviewed += 1
- print "Reviewing", consensuses[v], "(" + str(reviewed) + "/" + str(len(votes)) + ")"
-
- #Get the consensus data
- consensusRouters = {}
- with DescriptorReader(consensuses[v]) as reader:
- reader.register_skip_listener(my_listener)
- for relay in reader:
- consensusRouters[relay.fingerprint] = "Unmeasured" if relay.is_unmeasured else relay.bandwidth
-
- #The vote data
- bwauthVotes = {}
- for d in votes[v]:
- if d not in bwauthVotes:
- bwauthVotes[d] = {}
-
- measured_something = False
- with DescriptorReader(votes[v][d]) as reader:
- reader.register_skip_listener(my_listener)
- for relay in reader:
- if relay.measured:
- bwauthVotes[d][relay.fingerprint] = relay.measured
- measured_something = True
- if not measured_something:
- del bwauthVotes[d]
-
- #Now match them up and store the data
- thisConsensusResults = {}
- for r in consensusRouters:
- for d in bwauthVotes:
- had_any_value = False
- if d not in thisConsensusResults:
- thisConsensusResults[d] = {'unmeasured' : 0, 'above' : 0, 'below' : 0, 'exclusive' : 0 , 'shared' : 0}
-
- if consensusRouters[r] == "Unmeasured":
- continue
- elif r not in bwauthVotes[d]:
- had_any_value = True
- thisConsensusResults[d]['unmeasured'] += 1
- elif consensusRouters[r] < bwauthVotes[d][r]:
- had_any_value = True
- thisConsensusResults[d]['above'] += 1
- elif consensusRouters[r] > bwauthVotes[d][r]:
- had_any_value = True
- thisConsensusResults[d]['below'] += 1
- elif consensusRouters[r] == bwauthVotes[d][r] and \
- 1 == len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r]]):
- had_any_value = True
- thisConsensusResults[d]['exclusive'] += 1
- elif consensusRouters[r] == bwauthVotes[d][r] and \
- 1 != len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r] ]):
- had_any_value = True
- thisConsensusResults[d]['shared'] += 1
- else:
- print "What case am I in???"
- sys.exit(1)
-
- if not had_any_value:
- del thisConsensusResults[d]
-
- insertValues = [v]
- for d in dirAuths:
- if d in thisConsensusResults:
- insertValues.append(thisConsensusResults[d]['above'])
- insertValues.append(thisConsensusResults[d]['shared'])
- insertValues.append(thisConsensusResults[d]['exclusive'])
- insertValues.append(thisConsensusResults[d]['below'])
- insertValues.append(thisConsensusResults[d]['unmeasured'])
- else:
- insertValues.append(None)
- insertValues.append(None)
- insertValues.append(None)
- insertValues.append(None)
- insertValues.append(None)
-
- dbc.execute("INSERT OR REPLACE INTO bwauth_data VALUES (?" + bwauth_columns_questions + ")", insertValues)
- dbc.commit()
-
+ #Find all the consensuses and votesrm
+ votes = {}
+ consensuses = {}
+ for root, dirs, files in os.walk(directory):
+ for f in files:
+ filepath = os.path.join(root, f)
+
+ if '"' in f:
+ raise Exception("Potentially malicious filename")
+ elif "votes-" in f and ".tar" in f:
+ continue
+ elif "consensuses-" in f and ".tar" in f:
+ continue
+
+ if "-consensus" in f:
+ consensusTime = get_time_from_filename(f)
+ if consensusTime not in consensuses:
+ consensuses[consensusTime] = filepath
+ else:
+ print("Found two consensuses with the same time:", ut_to_datetime(consensusTime))
+
+ #print "Consensus:", filepath
+ elif "-vote-" in f:
+ voteTime = get_time_from_filename(f)
+
+ # Test to see if we already processed this one
+ cur = dbc.cursor()
+ cur.execute("SELECT * FROM bwauth_data WHERE date = ? AND faravahar_above IS NOT NULL", (voteTime,))
+ if cur.fetchone():
+ #print("Skipping", f, "because we already processed it")
+ continue
+ elif voteTime not in votes:
+ votes[voteTime] = {}
+
+ dirauth = get_dirauth_from_filename(f)
+
+ if dirauth not in dirAuths:
+ raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
+ elif dirauth not in votes[voteTime]:
+ votes[voteTime][dirauth] = filepath
+ else:
+ print("Found two votes for dirauth " + dirauth + ":", filepath, "and", votes[voteTime][dirauth])
+
+ #print "Vote:", dirauth, filepath
+
+ print("Found %s consensuses" % len(consensuses))
+ print("Found %s votes" % len(votes))
+
+ #Make sure we have a consensus for each vote
+ to_del = []
+ for v in votes:
+ if v not in consensuses:
+ print("Have votes for time", ut_to_datetime(v), "but no consensus!")
+ to_del.append(v)
+ #sys.exit(1)
+ for i in to_del:
+ del votes[i]
+
+ #Make the table
+ bwauth_columns = ""
+ bwauth_columns_questions = ""
+ for d in dirAuths:
+ bwauth_columns += d + "_above integer, " + d + "_shared integer, " + d + "_exclusive integer, " + d + "_below integer, " + d + "_unmeasured integer, "
+ bwauth_columns_questions += ",?,?,?,?,?"
+
+ dbc.execute("CREATE TABLE IF NOT EXISTS bwauth_data(date integer, " + bwauth_columns + "PRIMARY KEY(date ASC))")
+ dbc.commit()
+
+ reviewed = 0
+ for v in votes:
+ reviewed += 1
+ print("Reviewing", consensuses[v], "(" + str(reviewed) + "/" + str(len(votes)) + ")")
+
+ #Get the consensus data
+ consensusRouters = {}
+ reader = stem.descriptor.parse_file(consensuses[v])
+ for relay in reader:
+ consensusRouters[relay.fingerprint] = "Unmeasured" if relay.is_unmeasured else relay.bandwidth
+
+ #The vote data
+ bwauthVotes = {}
+ for d in votes[v]:
+ if d not in bwauthVotes:
+ bwauthVotes[d] = {}
+
+ measured_something = False
+ reader = stem.descriptor.parse_file(votes[v][d])
+ for relay in reader:
+ if relay.measured:
+ bwauthVotes[d][relay.fingerprint] = relay.measured
+ measured_something = True
+
+ if not measured_something:
+ del bwauthVotes[d]
+
+ #Now match them up and store the data
+ thisConsensusResults = {}
+ for r in consensusRouters:
+ for d in bwauthVotes:
+ had_any_value = False
+ if d not in thisConsensusResults:
+ thisConsensusResults[d] = {'unmeasured' : 0, 'above' : 0, 'below' : 0, 'exclusive' : 0 , 'shared' : 0}
+
+ if consensusRouters[r] == "Unmeasured":
+ continue
+ elif r not in bwauthVotes[d]:
+ had_any_value = True
+ thisConsensusResults[d]['unmeasured'] += 1
+ elif consensusRouters[r] < bwauthVotes[d][r]:
+ had_any_value = True
+ thisConsensusResults[d]['above'] += 1
+ elif consensusRouters[r] > bwauthVotes[d][r]:
+ had_any_value = True
+ thisConsensusResults[d]['below'] += 1
+ elif consensusRouters[r] == bwauthVotes[d][r] and \
+ 1 == len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r]]):
+ had_any_value = True
+ thisConsensusResults[d]['exclusive'] += 1
+ elif consensusRouters[r] == bwauthVotes[d][r] and \
+ 1 != len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r] ]):
+ had_any_value = True
+ thisConsensusResults[d]['shared'] += 1
+ else:
+ print("What case am I in???")
+ sys.exit(1)
+
+ if not had_any_value:
+ del thisConsensusResults[d]
+
+ insertValues = [v]
+ for d in dirAuths:
+ if d in thisConsensusResults:
+ insertValues.append(thisConsensusResults[d]['above'])
+ insertValues.append(thisConsensusResults[d]['shared'])
+ insertValues.append(thisConsensusResults[d]['exclusive'])
+ insertValues.append(thisConsensusResults[d]['below'])
+ insertValues.append(thisConsensusResults[d]['unmeasured'])
+ else:
+ insertValues.append(None)
+ insertValues.append(None)
+ insertValues.append(None)
+ insertValues.append(None)
+ insertValues.append(None)
+
+ dbc.execute("INSERT OR REPLACE INTO bwauth_data VALUES (?" + bwauth_columns_questions + ")", insertValues)
+ dbc.commit()
+
def my_listener(path, exception):
- print "Skipped!"
- print path
- print exception
+ print("Skipped!")
+ print(path)
+ print(exception)
def main(itype, directory):
- dirAuths = get_dirauths_in_tables()
- dbc = sqlite3.connect(os.path.join('data', 'historical.db'))
+ dirAuths = get_dirauths_in_tables()
+ dbc = sqlite3.connect(os.path.join('data', 'historical.db'))
- if itype == "dirauth_relay_votes":
- dirauth_relay_votes(directory, dirAuths, dbc)
- elif itype == "bwauth_measurements":
- bwauth_measurements(directory, dirAuths, dbc)
- else:
- print "Unknown ingestion type"
+ if itype == "dirauth_relay_votes":
+ dirauth_relay_votes(directory, dirAuths, dbc)
+ elif itype == "bwauth_measurements":
+ bwauth_measurements(directory, dirAuths, dbc)
+ else:
+ print("Unknown ingestion type")
if __name__ == '__main__':
- try:
- if len(sys.argv) != 3:
- print "Usage: ", sys.argv[0], "ingestion-type vote-directory"
- else:
- main(sys.argv[1], sys.argv[2])
- except:
- msg = "%s failed with:\n\n%s" % (sys.argv[0], traceback.format_exc())
- print "Error: %s" % msg
+ try:
+ if len(sys.argv) != 3:
+ print("Usage: ", sys.argv[0], "ingestion-type vote-directory")
+ else:
+ main(sys.argv[1], sys.argv[2])
+ except:
+ msg = "%s failed with:\n\n%s" % (sys.argv[0], traceback.format_exc())
+ print("Error: %s" % msg)
More information about the tor-commits
mailing list