[or-cvs] [bridgedb/master 2/4] Changed and cleaned up the unallocated-bridges-to-bucket-files feature a bit, according to the review comments by Nick.
nickm at torproject.org
nickm at torproject.org
Mon Sep 27 20:10:51 UTC 2010
Author: Christian Fromme <kaner at strace.org>
Date: Sat, 17 Jul 2010 17:01:32 +0200
Subject: Changed and cleaned up the unallocated-bridges-to-bucket-files feature a bit,
Commit: 5fbe1a5cca5e526890c8a31dbdad08d7385e980a
---
bridgedb.conf | 2 +-
lib/bridgedb/Bucket.py | 196 +++++++++++++++++++++++++++++++++++++++++++++++
lib/bridgedb/Dist.py | 159 --------------------------------------
lib/bridgedb/Main.py | 11 ++-
lib/bridgedb/Opt.py | 17 ++++
lib/bridgedb/Storage.py | 53 +++++++++++--
6 files changed, 266 insertions(+), 172 deletions(-)
create mode 100644 lib/bridgedb/Bucket.py
create mode 100644 lib/bridgedb/Opt.py
diff --git a/bridgedb.conf b/bridgedb.conf
index 0af8bbb..44422a7 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -140,4 +140,4 @@ EMAIL_INCLUDE_FINGERPRINTS=False
# HTTPS_SHARE : EMAIL_SHARE : RESERVED_SHARE
RESERVED_SHARE=2
-FILE_DISTRIBUTORS = {}
+FILE_BUCKETS = {}
diff --git a/lib/bridgedb/Bucket.py b/lib/bridgedb/Bucket.py
new file mode 100644
index 0000000..ab6d83b
--- /dev/null
+++ b/lib/bridgedb/Bucket.py
@@ -0,0 +1,196 @@
+"""
+This module is responsible for everything concerning file bucket bridge
+distribution. File bucket bridge distribution means that unallocated bridges
+are allocated to a certain pseudo-distributor and later written to a file.
+
+For example, the following is a dict of pseudo-distributors (also called
+'bucket identifiers') with numbers of bridges assigned to them:
+
+ FILE_BUCKETS = { "name1": 10, "name2": 15, "foobar": 3 }
+
+This configuration for buckets would result in 3 files being created for bridge
+distribution: name1-2010-07-17.brdgs, name2-2010-07-17.brdgs and
+foobar-2010-07-17.brdgs. The first file would contain 10 bridges from BridgeDB's
+'unallocated' pool. The second file would contain 15 bridges from the same pool
+and the third one similarly 3 bridges. These files can then be handed out to
+trusted parties via mail or fed to other distribution mechanisms such as
+twitter.
+
+Note that in BridgeDB slang, the _distributor_ would still be 'unallocated',
+even though in the database, there would now by 'name1', 'name2' or 'foobar'
+instead of 'unallocated'. This is why they are called pseudo-distributors.
+"""
+
+import time
+
+import bridgedb.Storage
+
+class BucketData:
+ """A file bucket value class.
+ name - Name of the bucket (From config)
+ needed - Needed number of bridges for that bucket (From config)
+ allocated - Number of already allocated bridges for that bucket
+ """
+ def __init__(self, name, needed):
+ self.name = name
+ if needed == "*":
+ # Set to rediculously high number
+ needed = 1000000
+ self.needed = int(needed)
+ self.allocated = 0
+
+class BucketManager:
+ """BucketManager reads a number of file bucket identifiers from the config.
+ They're expected to be in the following format:
+
+ FILE_BUCKETS = { "name1": 10, "name2": 15, "foobar": 3 }
+
+ This syntax means that certain buckets ("name1", "name2" and so on)
+ are given a number of bridges (10, 15 and so on). Names can be anything.
+ The name will later be the prefix of the file that is written with the
+ assigned number of bridges in it. Instead of a number, a wildcard item
+ ("*") is allowed, too. This means that the corresponsing bucket file
+ will get the maximum number of possible bridges (as many as are left in
+ the unallocated bucket).
+
+ The files will be written in ip:port format, one bridge per line.
+
+ The way this works internally is as follows:
+
+ First of all, the assignBridgesToBuckets() routine runs through
+ the database of bridges and looks up the 'distributor' field of each
+ bridge. Unallocated bridges are sent to a pool for later assignement.
+ Already allocated bridges for file bucket distribution are sorted and
+ checked.
+ They're checked for whether their bucket identifier still exists in the
+ current config and also whether the number of assigned bridges is still
+ valid. If either the bucket identifier is not existing anymore or too
+ many bridges are currently assigned to it, bridges will go to the
+ unassigned pool.
+
+ In the second step, after bridges are sorted and the unassigned pool is
+ ready, the assignBridgesToBuckets() routine assigns one bridge
+ from the unassigned pool to a known bucket identifier at a time until it
+ either runs out of bridges in the unallocated pool or the number of
+ needed bridges for that bucket is reached.
+
+ When all bridges are assigned in this way, they can then be dumped into
+ files by calling the dumpBridges() routine.
+ """
+
+ def __init__(self, cfg):
+ self.cfg = cfg
+ self.bucketList = []
+ self.unallocatedList = []
+ self.unallocated_available = False
+ self.db = bridgedb.Storage.Database(self.cfg.DB_FILE+".sqlite",
+ self.cfg.DB_FILE)
+
+ def __del__(self):
+ self.db.close()
+
+ def addToUnallocatedList(self, hex_key):
+ """Add a bridge by hex_key into the unallocated pool
+ """
+ try:
+ self.db.updateDistributorForHexKey("unallocated", hex_key)
+ except:
+ self.db.rollback()
+ raise
+ else:
+ self.db.commit()
+ self.unallocatedList.append(hex_key)
+ self.unallocated_available = True
+
+ def isBucketIdentKnown(self, bucketIdent):
+ """Do we know this bucket identifier?
+ """
+ for d in self.bucketList:
+ if d.name == bucketIdent:
+ return d
+ return None
+
+ def assignUnallocatedBridge(self, bucket):
+ """Assign an unallocated bridge to a certain bucket
+ """
+ bucket.allocated += 1
+ hex_key = self.unallocatedList.pop()
+ #print "KEY: %d NAME: %s" % (hex_key, bucket.name)
+ try:
+ self.db.updateDistributorForHexKey(bucket.name, hex_key)
+ except:
+ self.db.rollback()
+ raise
+ else:
+ self.db.commit()
+ if len(self.unallocatedList) < 1:
+ self.unallocated_available = False
+ return True
+
+ def assignBridgesToBuckets(self):
+ """Read file bucket identifiers from the configuration, sort them and
+ write necessary changes to the database
+ """
+ # Build distributor list
+ for k, v in self.cfg.FILE_BUCKETS.items():
+ d = BucketData(k, v)
+ self.bucketList.append(d)
+
+ # Loop through all bridges and sort out our distributors
+ allBridges = self.db.getAllBridges()
+ for bridge in allBridges:
+ if bridge.distributor == "unallocated":
+ self.addToUnallocatedList(bridge.hex_key)
+ continue
+
+ # Check if we know this distributor
+ d = self.isBucketIdentKnown(bridge.distributor)
+ if d is not None:
+ # Does this distributor need another one?
+ # We assume that d.allocated is 0 in the beginning
+ if d.allocated < d.needed:
+ d.allocated += 1
+ else:
+ self.addToUnallocatedList(bridge.hex_key)
+ # We don't know it. Maybe an old entry. Free it.
+ else:
+ # DON'T free https or email allocations!
+ if bridge.distributor != "https" and \
+ bridge.distributor != "email":
+ self.addToUnallocatedList(bridge.hex_key)
+
+ # Loop though bucketList while we have and need unallocated
+ # bridges, assign one bridge at a time
+ while self.unallocated_available and len(self.bucketList) > 0:
+ for d in self.bucketList:
+ if d.allocated < d.needed:
+ if not self.assignUnallocatedBridge(d):
+ print "Couldn't assign unallocated bridge to %s" % d.name
+ else:
+ # When we have enough bridges, remove bucket identifier
+ # from list
+ self.bucketList.remove(d)
+
+
+ def dumpBridges(self):
+ """Dump all known file distributors to files
+ """
+ buckets = self.cfg.FILE_BUCKETS
+ # Dump https, email and unreserved, too
+ buckets["https"] = 0
+ buckets["email"] = 0
+ buckets["unallocated"] = 0
+ # Loop through all bucket identifiers and dump their bridges to files
+ for bucketId, _ in buckets.items():
+ fileName = bucketId + "-" + time.strftime("%Y-%m-%d") + ".brdgs"
+ f = open(fileName, 'w')
+ #f.write("Here are your bridges, %s:\n" % bucketId)
+ bForBucket = self.db.getBridgesForDistributor(bucketId)
+ # Skip empty (pseudo-)distributors
+ if len(bForBucket) < 1:
+ continue
+ print "Dumping %d bridges for %s to %s" % (len(bForBucket), bucketId, fileName)
+ for bridge in bForBucket:
+ line = "%s:%s" % (bridge.address, bridge.or_port)
+ f.write(line + '\n')
+ f.close
diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index 99ab3b4..53fdda0 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -282,162 +282,3 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
else:
db.commit()
-
-class FileDistributorBean:
- """A file distributor bean
- """
- def __init__(self, name, needed):
- self.name = name
- if needed == "*":
- # Set to rediculously high number
- needed = 1000000
- self.needed = int(needed)
- self.allocated = 0
-
-class FileDistributor:
- """FileDistributor reads a number of file distributors from the config.
- They're expected to be in the following format:
-
- FILE_DISTRIBUTORS = { "name1": 10, "name2": 15, "foobar": 3 }
-
- This syntax means that certain distributors ("name1", "name2" and so on)
- are given a number of bridges (10, 15 and so on). Names can be anything.
- The name will later be the prefix of the file that is written with the
- assigned number of bridges in it. Instead of a number, a wildcard item
- ("*") is allowed, too. This means that that file distributor will get
- maximum bridges (as many as are left in the unallocated bucket).
-
- The files will be written in ip:port format, one bridge per line.
-
- The way this works internally is as follows:
-
- First of all, the assignBridgesToDistributors() routine runs through
- the database of bridges and looks up the 'distributor' field of each
- bridge. Unallocated bridges are sent to a pool for later assignement.
- Already allocated bridges for file distributors are sorted and checked.
- They're checked for whether the distributor still exists in the current
- config and also whether the number of assigned bridges is still valid.
- If either the distributor is not existing anymore or too many bridges
- are currently assigned to her, bridges will go to the unassigned pool.
-
- In the second step, after bridges are sorted and the unassigned pool is
- ready, the assignBridgesToDistributors() routine assigns one bridge
- from the unassigned pool to a known distributor at a time until it
- either runs out of bridges in the unallocated pool or the number of
- needed bridges for that distributor is fullfilled.
-
- When all bridges are assigned in this way, they then can then be dumped
- into files by calling the dumpBridges() routine.
- """
-
- def __init__(self, cfg):
- self.cfg = cfg
- self.distributorList = []
- self.unallocatedList = []
- self.unallocated_available = False
- self.db = bridgedb.Storage.Database(self.cfg.DB_FILE+".sqlite",
- self.cfg.DB_FILE)
-
- def __del__(self):
- self.db.close()
-
- def addToUnallocatedList(self, id):
- """Add a bridge by database id into the unallocated pool
- """
- try:
- self.db.updateDistributorForId("unallocated", id)
- except:
- self.db.rollback()
- raise
- else:
- self.db.commit()
- self.unallocatedList.append(id)
- self.unallocated_available = True
-
- def knownFileDistributor(self, distributor):
- """Do we know this distributor?
- """
- for d in self.distributorList:
- if d.name == distributor:
- return d
- return None
-
- def assignUnallocatedBridge(self, distributor):
- """Assign an unallocated bridge to a certain distributor
- """
- distributor.allocated += 1
- id = self.unallocatedList.pop()
- #print "ID: %d NAME: %s" % (id, distributor.name)
- try:
- self.db.updateDistributorForId(distributor.name, id)
- except:
- self.db.rollback()
- raise
- else:
- self.db.commit()
- if len(self.unallocatedList) < 1:
- self.unallocated_available = False
- return True
-
- def assignBridgesToDistributors(self):
- """Read file distributors from the configuration, sort them and write
- necessary changes to the database
- """
- # Build distributor list
- for k, v in self.cfg.FILE_DISTRIBUTORS.items():
- d = FileDistributorBean(k, v)
- self.distributorList.append(d)
-
- # Loop through all bridges and sort out our distributors
- allBridges = self.db.getAllBridges()
- for bridge in allBridges:
- distributor = bridge[4]
- if distributor == "unallocated":
- self.addToUnallocatedList(bridge[0])
- continue
-
- # Check if we know this distributor
- d = self.knownFileDistributor(distributor)
- if d is not None:
- # Does this distributor need another one?
- # We assume that d.allocated is 0 in the beginning
- if d.allocated < d.needed:
- d.allocated += 1
- else:
- self.addToUnallocatedList(bridge[0])
- # We don't know it. Maybe an old entry. Free it.
- else:
- # DON'T free https or email allocations!
- if distributor != "https" and distributor != "email":
- self.addToUnallocatedList(bridge[0])
-
- # Loop though distributorList while we have and need unallocated
- # bridges, assign one bridge at a time
- while self.unallocated_available and len(self.distributorList) > 0:
- for d in self.distributorList:
- if d.allocated < d.needed:
- if not self.assignUnallocatedBridge(d):
- print "Couldn't assign unallocated bridge to %s" % d.name
- else:
- # When we have enough bridges, remove from list
- self.distributorList.remove(d)
-
-
- def dumpBridges(self):
- """Dump all known file distributors to files
- """
- # Dump https, email and unreserved, too
- self.cfg.FILE_DISTRIBUTORS["https"] = 0
- self.cfg.FILE_DISTRIBUTORS["email"] = 0
- self.cfg.FILE_DISTRIBUTORS["unallocated"] = 0
- # Loop through all distributors and dump their bridges to files
- for distributor, _ in self.cfg.FILE_DISTRIBUTORS.items():
- fileName = distributor + "-" + time.strftime("%Y-%m-%d") + ".brdgs"
- f = open(fileName, 'w')
- f.write("Here are your bridges, %s:\n" % distributor)
- bForDistributor = self.db.getBridgesForDistributor(distributor)
- print "Dumping %d bridges for %s to %s" % (len(bForDistributor), distributor, fileName)
- for bridge in bForDistributor:
- line = "%s:%s" % (bridge[2], bridge[3])
- f.write(line + '\n')
- f.close
diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py
index 726137f..7bf8f98 100644
--- a/lib/bridgedb/Main.py
+++ b/lib/bridgedb/Main.py
@@ -19,6 +19,7 @@ import bridgedb.Time as Time
import bridgedb.Server as Server
import bridgedb.Storage
import bridgedb.Opt as Opt
+import bridgedb.Bucket as Bucket
class Conf:
"""A configuration object. Holds unvalidated attributes.
@@ -90,7 +91,7 @@ CONFIG = Conf(
RESERVED_SHARE=2,
- FILE_DISTRIBUTORS = {}
+ FILE_BUCKETS = {}
)
def configureLogging(cfg):
@@ -326,8 +327,8 @@ def run():
Parse the configuration, and start the servers.
"""
options, arguments = Opt.parseOpts()
-
configuration = {}
+
if options.testing:
configuration = CONFIG
elif not options.configfile:
@@ -340,9 +341,9 @@ def run():
configuration = C
if options.dumpbridges:
- fileDistributor = Dist.FileDistributor(configuration)
- fileDistributor.assignBridgesToDistributors()
- fileDistributor.dumpBridges()
+ bucketManager = Bucket.BucketManager(configuration)
+ bucketManager.assignBridgesToBuckets()
+ bucketManager.dumpBridges()
else:
startup(configuration)
diff --git a/lib/bridgedb/Opt.py b/lib/bridgedb/Opt.py
new file mode 100644
index 0000000..3649213
--- /dev/null
+++ b/lib/bridgedb/Opt.py
@@ -0,0 +1,17 @@
+# Parse command line args
+
+import optparse
+
+def parseOpts():
+ cmdParser = optparse.OptionParser()
+ cmdParser.add_option("-c", "--config", dest="configfile",
+ default="./bridgedb.conf",
+ help="set config file to FILE", metavar="FILE")
+ cmdParser.add_option("-d", "--dump-bridges", dest="dumpbridges",
+ action="store_true", default=False,
+ help="dump reserved bridges into files")
+ cmdParser.add_option("-t", "--testing", dest="testing",
+ action="store_true", default=False,
+ help="do some sanity tests")
+
+ return cmdParser.parse_args()
diff --git a/lib/bridgedb/Storage.py b/lib/bridgedb/Storage.py
index d7346b5..2c64e83 100644
--- a/lib/bridgedb/Storage.py
+++ b/lib/bridgedb/Storage.py
@@ -139,6 +139,24 @@ SCHEMA1_SCRIPT = """
INSERT INTO Config VALUES ( 'schema-version', 1 );
"""
+class BridgeData:
+ """Value class carrying bridge information:
+ hex_key - The unique hex key of the given bridge
+ address - Bridge IP address
+ or_port - Bridge TCP port
+ distributor - The distributor (or pseudo-distributor) through which
+ this bridge is being announced
+ first_seen - When did we first see this bridge online?
+ last_seen - When was the last time we saw this bridge online?
+ """
+ def __init__(self, hex_key, address, or_port, distributor="unallocated",
+ first_seen="", last_seen=""):
+ self.hex_key = hex_key
+ self.address = address
+ self.or_port = or_port
+ self.distributor = distributor
+ self.first_seen = first_seen
+ self.last_seen = last_seen
class Database:
def __init__(self, sqlite_fname, db_fname=None):
@@ -151,6 +169,9 @@ class Database:
def commit(self):
self._conn.commit()
+ def rollback(self):
+ self._conn.rollback()
+
def close(self):
self._cur.close()
self._conn.close()
@@ -205,20 +226,38 @@ class Database:
"(email,when_mailed) VALUES (?,?)", (addr, t))
def getAllBridges(self):
+ """Return a list of BridgeData value classes of all bridges in the
+ database
+ """
+ retBridges = []
cur = self._cur
- cur.execute("SELECT * FROM Bridges")
- return cur.fetchall()
+ cur.execute("SELECT hex_key, address, or_port, distributor, "
+ "first_seen, last_seen FROM Bridges")
+ for b in cur.fetchall():
+ bridge = BridgeData(b[0], b[1], b[2], b[3], b[4], b[5])
+ retBridges.append(bridge)
+
+ return retBridges
def getBridgesForDistributor(self, distributor):
+ """Return a list of BridgeData value classes of all bridges in the
+ database that are allocated to distributor 'distributor'
+ """
+ retBridges = []
cur = self._cur
- cur.execute("SELECT * FROM Bridges WHERE "
+ cur.execute("SELECT hex_key, address, or_port, distributor, "
+ "first_seen, last_seen FROM Bridges WHERE "
"distributor = ?", (distributor, ))
- return cur.fetchall()
+ for b in cur.fetchall():
+ bridge = BridgeData(b[0], b[1], b[2], b[3], b[4], b[5])
+ retBridges.append(bridge)
+
+ return retBridges
- def updateDistributorForId(self, distributor, id):
+ def updateDistributorForHexKey(self, distributor, hex_key):
cur = self._cur
- cur.execute("UPDATE Bridges SET distributor = ? WHERE id = ?",
- (distributor, id))
+ cur.execute("UPDATE Bridges SET distributor = ? WHERE hex_key = ?",
+ (distributor, hex_key))
def openDatabase(sqlite_file):
conn = sqlite3.Connection(sqlite_file)
--
1.7.1
More information about the tor-commits
mailing list