[tor-commits] [bridgedb/master] 4297 - parse or-addresses from bridge descriptors
aagbsn at torproject.org
aagbsn at torproject.org
Sat Mar 16 23:46:31 UTC 2013
commit 691db20224e4311444de85b592add60a54b9d38c
Author: aagbsn <aagbsn at extc.org>
Date: Tue Dec 6 13:30:49 2011 -0800
4297 - parse or-addresses from bridge descriptors
Updates parseBridgeDesc to read or-address line from bridge descriptors,
and adds a container class PortList
parseBridgeDesc adds parseORAddress which parses or-address lines.
Valid address:portspec are added to each Bridge object in a dictionary
of or-address:PortList key:values.
class PortList is a container class that represents the or-address
PORTSPEC field, which is a list of ports and port ranges.
The implementation contains an efficient representation of port-ranges,
implements __contains__ for single ports and port-ranges, and will
also squash or merge port ranges that intersect.
The getPort() function returns a single valid port (as defined by the
PORTSPEC line). Each subsequent call to getPort() will iterate over the
valid ports, so that BridgeDB may return a single 'bridge' line for
inclusion in torrc and uniformly distribute the available ports.
getConfigLine may be passed 'selectFromORAddresses=True',
which will return a valid config line from the set of addresses
and ports. Filtering for IPv6 or IPv4 addresses is supported,
if there are no matches in or-addresses BridgeDB will return
the primary ip:orport instead.
Also added is a generator 'getAllConfigLines()' which will return
every valid address:port (including expanding ranges of ports -
which may be a lot of output) combination a bridge supports, one
per line.
---
lib/bridgedb/Bridges.py | 279 ++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 262 insertions(+), 17 deletions(-)
diff --git a/lib/bridgedb/Bridges.py b/lib/bridgedb/Bridges.py
index 358e362..306b757 100644
--- a/lib/bridgedb/Bridges.py
+++ b/lib/bridgedb/Bridges.py
@@ -1,5 +1,3 @@
-# BridgeDB by Nick Mathewson.
-# Copyright (c) 2007-2009, The Tor Project, Inc.
# See LICENSE for licensing information
"""
@@ -15,6 +13,8 @@ import re
import sha
import socket
import time
+import ipaddr
+import random
import bridgedb.Storage
import bridgedb.Bucket
@@ -44,15 +44,13 @@ def is_valid_ip(ip):
False
"""
- if not re.match(r'(\d+)\.(\d+)\.(\d+)\.(\d+)', ip):
- # inet_aton likes "1.2" as a synonym for "0.0.1.2". We don't.
- return False
+ # ipaddr does not treat "1.2" as a synonym for "0.0.1.2"
try:
- socket.inet_aton(ip)
- except socket.error:
+ ipaddr.IPAddress(ip)
+ except ValueError:
+ # not a valid IPv4 or IPv6 address
return False
- else:
- return True
+ return True
def is_valid_fingerprint(fp):
"""Return true iff fp in the right format to be a hex fingerprint
@@ -67,6 +65,17 @@ def is_valid_fingerprint(fp):
else:
return True
+def is_valid_or_address(or_address):
+ """Return true iff or_address is in the right format
+ (ip,frozenset(port)) or (ip, frozenset(port_low,port_high)) for ranges
+ """
+ if len(or_address) != 2: return False
+ ip,port = or_address
+ if not is_valid_ip(ip): return False
+ if type(port) is not int: return False
+ if not (1 <= port <= 65535): return False
+ return True
+
toHex = binascii.b2a_hex
fromHex = binascii.a2b_hex
@@ -105,12 +114,15 @@ class Bridge:
## no spaces.
## running,stable -- DOCDOC
## blockingCountries -- list of country codes blocking this bridge
- def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None):
+ def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None,
+ or_addresses=None):
"""Create a new Bridge. One of fingerprint and id_digest must be
set."""
self.nickname = nickname
self.ip = ip
self.orport = orport
+ if not or_addresses: or_addresses = {}
+ self.or_addresses = or_addresses
self.running = self.stable = None
self.blockingCountries = None
if id_digest is not None:
@@ -132,15 +144,63 @@ class Bridge:
def __repr__(self):
"""Return a piece of python that evaluates to this bridge."""
+ if self.or_addresses:
+ return "Bridge(%r,%r,%d,%r,or_addresses=%s)"%(
+ self.nickname, self.ip, self.orport, self.fingerprint,
+ self.or_addresses)
return "Bridge(%r,%r,%d,%r)"%(
self.nickname, self.ip, self.orport, self.fingerprint)
- def getConfigLine(self,includeFingerprint=False):
+ def getConfigLine(self,includeFingerprint=False,
+ selectFromORAddresses=False,
+ needIPv4=True, needIPv6=False):
"""Return a line describing this bridge for inclusion in a torrc."""
+
+ # select an address:port from or-addresses
+ if selectFromORAddresses and self.or_addresses:
+ filtered_addresses = None
+ # bridges may have both classes. we only return one.
+ if needIPv4:
+ f = lambda x: type(x[0]) is ipaddr.IPv4Address
+ filtered_addresses = filter(f, self.or_addresses.items())
+ elif needIPv6:
+ f = lambda x: type(x[0]) is ipaddr.IPv6Address
+ filtered_addresses = filter(f, self.or_addresses.items())
+
+ #XXX: we could instead have two lists of or-addresses
+ if filtered_addresses:
+ address,portlist = random.choice(filtered_addresses)
+ if type(address) is ipaddr.IPv6Address:
+ ip = "[%s]"%address
+ else:
+ ip = "%s"%address
+ orport = portlist.getPort() #magic
+
+ # default to ip,orport ; ex. when logging
+ else:
+ ip = self.ip
+ orport = self.orport
+
if includeFingerprint:
- return "bridge %s:%d %s" % (self.ip, self.orport, self.fingerprint)
+ return "bridge %s:%d %s" % (ip, orport, self.fingerprint)
else:
- return "bridge %s:%d" % (self.ip, self.orport)
+ return "bridge %s:%d" % (ip, orport)
+
+ def getAllConfigLines(self,includeFingerprint=False):
+ """Generator. Iterate over all valid config lines for this bridge."""
+ # warning: a bridge with large port ranges may generate thousands
+ # of lines of output
+ for address,portlist in self.or_addresses.items():
+ if type(address) is ipaddr.IPv6Address:
+ ip = "[%s]" % address
+ else:
+ ip = "%s" % address
+
+ for orport in portlist:
+ if includeFingerprint:
+ yield "bridge %s:%d %s" % (ip,orport,self.fingerprint)
+ else:
+ yield "bridge %s:%d" % (ip,orport)
def assertOK(self):
assert is_valid_ip(self.ip)
@@ -164,10 +224,37 @@ class Bridge:
return False
def parseDescFile(f, bridge_purpose='bridge'):
- """Generator. Parses a cached-descriptors file 'f', and yields a Bridge
- object for every entry whose purpose matches bridge_purpose.
+ """Generator. Parses a cached-descriptors file 'f' and yeilds a Bridge object
+ for every entry whose purpose matches bridge_purpose.
+ This Generator understands the new descriptor format described in
+ 186-multiple-orports.txt
+
+ The new specification provides for specifying multiple ORports as well
+ as supporting new address format for IPv6 addresses.
+
+ The router descriptor "or-address" may occur zero, one, or multiple times.
+ parseDescFile adds each ADDRESS:PORTSPEC to the Bridge.or_addresses list.
+
+ The "or-address" should not duplicate the address:port pair from the "router"
+ description. (Should we try to catch this case?)
+
+ A node may not list more than 8 or-address lines.
+ (should we try to enforce this too?)
+
+ Here is the new format:
+
+ or-address SP ADDRESS ":" PORTLIST NL
+ ADDRESS = IP6ADDR | IP4ADDR
+ IPV6ADDR = an ipv6 address, surrounded by square brackets.
+ IPV4ADDR = an ipv4 address, represented as a dotted quad.
+ PORTLIST = PORTSPEC | PORTSPEC "," PORTLIST
+ PORTSPEC = PORT | PORT "-" PORT
+ PORT = a number between 1 and 65535 inclusive.
"""
+
nickname = ip = orport = fingerprint = purpose = None
+ num_or_address_lines = 0
+ or_addresses = {}
for line in f:
line = line.strip()
@@ -185,14 +272,172 @@ def parseDescFile(f, bridge_purpose='bridge'):
orport = int(items[3])
elif line.startswith("fingerprint "):
fingerprint = line[12:].replace(" ", "")
+ elif line.startswith("or-address "):
+ if num_or_address_lines < 8:
+ line = line[11:]
+ address,portlist = parseORAddressLine(line)
+ try:
+ or_addresses[address].add(portlist)
+ except KeyError:
+ or_addresses[address] = portlist
+ else:
+ logging.warn("Skipping extra or-address line "\
+ "from Bridge with ID %r" % id)
+ num_or_address_lines += 1
elif line.startswith("router-signature"):
purposeMatches = (purpose == bridge_purpose or
bridge_purpose is None)
if purposeMatches and nickname and ip and orport and fingerprint:
- b = Bridge(nickname, ip, orport, fingerprint)
+ b = Bridge(nickname, ip, orport, fingerprint,
+ or_addresses=or_addresses)
b.assertOK()
yield b
- nickname = ip = orport = fingerprint = purpose = None
+ nickname = ip = orport = fingerprint = purpose = None
+ num_or_address_lines = 0
+ or_addresses = {}
+
+class PortList:
+ """ container class for port ranges
+ """
+
+ def __init__(self, *args, **kwargs):
+ self.ports = set()
+ self.ranges = []
+ self.portdispenser = None
+ if len(args) == 1:
+ if type(args[0]) is str:
+ ports = [p.split('-') for p in args[0].split(',')]
+ # truncate per spec
+ ports = ports[:16]
+ for ps in ports:
+ try: ps = [int(x) for x in ps]
+ except ValueError: break
+ if len(ps) == 1: self.add(ps[0])
+ elif len(ps) == 2: self.add(ps[0],ps[1])
+ else:
+ self.add(args[0])
+ elif len(args) == 2:
+ l,h = args
+ self.add(l,h)
+
+ def _sanitycheck(self, val):
+ #XXX: if debug=False this is disabled. bad!
+ assert type(val) is int
+ assert(val > 0)
+ assert(val <= 65535)
+
+ def __contains__(self, val1, val2=None):
+ self._sanitycheck(val1)
+ if val2: self.sanitycheck(val2)
+
+ # check a single port
+ if not val2 and val1:
+ if val1 in self.ports: return True
+ for start,end in self.ranges:
+ f = lambda x: start <= x <= end
+ if f(val1): return True
+ return False
+
+ if val2 and val1:
+ for start,end in self.ranges:
+ f = lambda x: start <= x <= end
+ if f(val1) and f(val2): return True
+
+ for start,end in self.ranges:
+ f = lambda x: start <= x <= end
+ if f(val): return True
+
+ def add(self, val1, val2=None):
+ self._sanitycheck(val1)
+
+ # add as a single port instead
+ if val2 == val1: val2 = None
+ if val2:
+ self._sanitycheck(val2)
+ start = min(val1,val2)
+ end = max(val1,val2)
+ self.ranges.append((start,end))
+ # reduce to largest continuous ranges
+ self._squash()
+ else:
+ if val1 in self: return
+ self.ports.add(val1)
+
+ # reset port dispenser
+ if self.portdispenser:
+ self.portdispenser = None
+
+ def getPort(self):
+ # returns a single valid port
+ if not self.portdispenser:
+ self.portdispenser = self.__iter__()
+ try:
+ return self.portdispenser.next()
+ except StopIteration, AttributeError:
+ self.portdispenser = self.__iter__()
+ return self.portdispenser.next()
+
+ def _squash(self):
+ # merge intersecting ranges
+ if len(self.ranges) > 1:
+ self.ranges.sort(key=lambda x: x[0])
+ squashed = [self.ranges.pop(0)]
+ for r in self.ranges:
+ if (squashed[-1][0] <= r[0] <= squashed[-1][1]):
+ #intersection, extend r1, drop r2
+ if r[1] > squashed[-1][1]:
+ squashed[-1] = (squashed[-1][0],r[1])
+ # drop r
+ else:
+ # keep r
+ squashed.append(r)
+
+ self.ranges = squashed
+
+ # drop enclosed ports
+ ports = self.ports.copy()
+ for p in self.ports:
+ for s,e in self.ranges:
+ if s <= p <= e:
+ ports.remove(p)
+ self.ports = ports
+
+ def __iter__(self):
+ for p in self.ports:
+ yield p
+ for l,h in self.ranges:
+ # +1 for inclusive range
+ for rr in xrange(l,h+1):
+ yield rr
+
+ def __str__(self):
+ s = ""
+ for p in self.ports:
+ s += "".join(", %s"%p)
+ for l,h in self.ranges:
+ s += ", %s-%s" % (l,h)
+ return s.lstrip(", ")
+
+ def __repr__(self):
+ return "PortList('%s')" % self.__str__()
+
+def parseORAddressLine(line):
+ #XXX should these go somewhere else?
+ re_ipv6 = re.compile("\[([a-fA-F0-9:]+)\]:(.*$)")
+ re_ipv4 = re.compile("((?:\d{1,3}\.?){4}):(.*$)")
+
+ address = None
+ portlist = None
+ # try regexp to discover ip version
+ for regex in [re_ipv4, re_ipv6]:
+ m = regex.match(line)
+ if m:
+ try:
+ address = ipaddr.IPAddress(m.group(1))
+ portstring = m.group(2)
+ except IndexError, ValueError: break
+ portlist = PortList(portstring)
+ return address,portlist
def parseStatusFile(f):
"""DOCDOC"""
More information about the tor-commits
mailing list