[tor-commits] [fallback-scripts/master] script: Make most script variables configurable using env vars
teor at torproject.org
teor at torproject.org
Thu Aug 1 06:52:39 UTC 2019
commit 18a5ef477e88b96af7a7d63ece76cf65cc9fb061
Author: teor <teor at torproject.org>
Date: Thu Jun 27 14:28:29 2019 +1000
script: Make most script variables configurable using env vars
Tested using:
$ TOR_FB_MAX_FALLBACK_COUNT=10 ./updateFallbackDirs.py
Part of #29100.
---
updateFallbackDirs.py | 181 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 127 insertions(+), 54 deletions(-)
diff --git a/updateFallbackDirs.py b/updateFallbackDirs.py
index d492da5..229967f 100755
--- a/updateFallbackDirs.py
+++ b/updateFallbackDirs.py
@@ -3,19 +3,22 @@
# Usage:
#
# Regenerate the list:
-# $ FB_MODE=""
-# $ FB_DATE=`date -u "+%Y-%m-%d-%H-%M-%S"`
-# $ FB_COUNTRY=ZZ
-# $ FB_COMMIT=`git rev-parse --short=16 HEAD`
-# $ ./updateFallbackDirs.py $FB_MODE \
-# > fallback_dirs_"$FB_DATE"_"$FB_COUNTRY"_"$FB_COMMIT".inc \
-# 2> fallback_dirs_"$FB_DATE"_"$FB_COUNTRY"_"$FB_COMMIT".log
+# $ TOR_FB_MODE=""
+# $ TOR_FB_DATE=`date -u "+%Y-%m-%d-%H-%M-%S"`
+# $ TOR_FB_COUNTRY=ZZ
+# $ TOR_FB_COMMIT=`git rev-parse --short=16 HEAD`
+# $ ./updateFallbackDirs.py $TOR_FB_MODE \
+# > fallback_dirs_"$TOR_FB_DATE"_"$TOR_FB_COUNTRY"_"$TOR_FB_COMMIT".inc \
+# 2> fallback_dirs_"$TOR_FB_DATE"_"$TOR_FB_COUNTRY"_"$TOR_FB_COMMIT".log
# $ cp fallback_dirs_*.inc ../tor/src/app/config/fallback_dirs.inc
#
# Check the existing list:
-# $ FB_MODE="check_existing"
+# $ TOR_FB_MODE="check_existing"
# Then use the commands above.
#
+# Most script variables can be overridden using TOR_FB_* environmental
+# variables.
+#
# This script should be run from a stable, reliable network connection,
# with no other network activity (and not over tor).
# If this is not possible, please disable:
@@ -40,6 +43,7 @@ import gzip
import hashlib
import json
import math
+import os
import os.path
import re
import string
@@ -70,6 +74,24 @@ except ImportError:
## Top-Level Configuration
+def getenv_conf(var_name, default_val, type_fn):
+ """Get var_name from the environment, using default_val if it is unset.
+ Cast the result using type_fn."""
+ return type_fn(os.getenv(var_name, default_val))
+
+def opt(type_fn):
+ """Higher-order function, which returns a function that converts a value
+ using type_fn, but returns None if the conversion fails."""
+ def opt_type_fn(var_value):
+ """Converts its argument var_value using the type_fn passed to the outer
+ function, and returns the result.
+ If the conversion fails, returns None."""
+ try:
+ return type_fn(var_value)
+ except TypeError:
+ return None
+ return opt_type_fn
+
# We use semantic versioning: https://semver.org
# In particular:
# * major changes include removing a mandatory field, or anything else that
@@ -77,32 +99,38 @@ except ImportError:
# * minor changes include adding a field,
# * patch changes include changing header comments or other unstructured
# content
+# These variables are not configureable, because format changes need a spec
+# and code update.
FALLBACK_FORMAT_VERSION = '2.0.0'
SECTION_SEPARATOR_BASE = '====='
SECTION_SEPARATOR_COMMENT = '/* ' + SECTION_SEPARATOR_BASE + ' */'
# Output all candidate fallbacks, or only output selected fallbacks?
-OUTPUT_CANDIDATES = False
+OUTPUT_CANDIDATES = getenv_conf('TOR_FB_OUTPUT_CANDIDATES',
+ False, bool)
# Perform DirPort checks over IPv4?
# Change this to False if IPv4 doesn't work for you, or if you don't want to
# download a consensus for each fallback
# Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
-PERFORM_IPV4_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else True
+PERFORM_IPV4_DIRPORT_CHECKS = getenv_conf('TOR_FB_PERFORM_IPV4_DIRPORT_CHECKS',
+ not OUTPUT_CANDIDATES, bool)
# Perform DirPort checks over IPv6?
-# If you know IPv6 works for you, set this to True
-# This will exclude IPv6 relays without an IPv6 DirPort configured
-# So it's best left at False until #18394 is implemented
-# Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
-PERFORM_IPV6_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else False
+# There are no IPv6 DirPorts in the Tor protocol, so we disable this option by
+# default. When #18394 is implemented, we'll be able to check IPv6 ORPorts.
+PERFORM_IPV6_DIRPORT_CHECKS = getenv_conf('TOR_FB_PERFORM_IPV6_DIRPORT_CHECKS',
+ False, bool)
# Must relays be running now?
-MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS
- or PERFORM_IPV6_DIRPORT_CHECKS)
+MUST_BE_RUNNING_NOW = getenv_conf('TOR_FB_MUST_BE_RUNNING_NOW',
+ (PERFORM_IPV4_DIRPORT_CHECKS
+ or PERFORM_IPV6_DIRPORT_CHECKS), bool)
# Clients have been using microdesc consensuses by default for a while now
-DOWNLOAD_MICRODESC_CONSENSUS = True
+DOWNLOAD_MICRODESC_CONSENSUS = (
+ getenv_conf('TOR_FB_DOWNLOAD_MICRODESC_CONSENSUS',
+ True, bool))
# If a relay delivers an invalid consensus, if it will become valid less than
# this many seconds in the future, or expired less than this many seconds ago,
@@ -126,44 +154,56 @@ DOWNLOAD_MICRODESC_CONSENSUS = True
# Clients on 0.3.5.5-alpha? and earlier also won't select guards from
# consensuses that have expired, but can bootstrap if they already have guards
# in their state file.
-REASONABLY_LIVE_TIME = 24*60*60
+REASONABLY_LIVE_TIME = getenv_conf('TOR_FB_REASONABLY_LIVE_TIME',
+ 24*60*60, int)
# Output fallback name, flags, bandwidth, and ContactInfo in a C comment?
-OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
+OUTPUT_COMMENTS = getenv_conf('TOR_FB_OUTPUT_COMMENTS',
+ OUTPUT_CANDIDATES, bool)
# Output matching ContactInfo in fallbacks list?
# Useful if you're trying to contact operators
-CONTACT_COUNT = True if OUTPUT_CANDIDATES else False
+CONTACT_COUNT = getenv_conf('TOR_FB_CONTACT_COUNT',
+ OUTPUT_CANDIDATES, bool)
# How the list should be sorted:
# fingerprint: is useful for stable diffs of fallback lists
# measured_bandwidth: is useful when pruning the list based on bandwidth
# contact: is useful for contacting operators once the list has been pruned
-OUTPUT_SORT_FIELD = 'contact' if OUTPUT_CANDIDATES else 'fingerprint'
+OUTPUT_SORT_FIELD = getenv_conf('TOR_FB_OUTPUT_SORT_FIELD',
+ ('contact' if OUTPUT_CANDIDATES
+ else 'fingerprint'), str)
## OnionOO Settings
-ONIONOO = 'https://onionoo.torproject.org/'
+ONIONOO = getenv_conf('TOR_FB_ONIONOO',
+ 'https://onionoo.torproject.org/', str)
#ONIONOO = 'https://onionoo.thecthulhu.com/'
# Don't bother going out to the Internet, just use the files available locally,
# even if they're very old
-LOCAL_FILES_ONLY = False
+LOCAL_FILES_ONLY = getenv_conf('TOR_FB_LOCAL_FILES_ONLY',
+ False, bool)
-## Whitelist / Blacklist Filter Settings
+## Whitelist Filter Settings
-# The whitelist contains entries that are included if all attributes match
-# (IPv4, dirport, orport, id, and optionally IPv6 and IPv6 orport)
+# The whitelist contains entries that are included if one of the unique
+# attributes matches (IPv4, id, or IPv6 (optional))
# What happens to entries not in whitelist?
# When True, they are included, when False, they are excluded
-INCLUDE_UNLISTED_ENTRIES = True if OUTPUT_CANDIDATES else False
+INCLUDE_UNLISTED_ENTRIES = getenv_conf('TOR_FB_INCLUDE_UNLISTED_ENTRIES',
+ OUTPUT_CANDIDATES, bool)
-WHITELIST_FILE_NAME = 'fallback.whitelist'
-FALLBACK_FILE_NAME = '../tor/src/app/config/fallback_dirs.inc'
+WHITELIST_FILE_NAME = getenv_conf('TOR_FB_WHITELIST_FILE_NAME',
+ 'fallback.whitelist', str)
+FALLBACK_FILE_NAME = (
+ getenv_conf('TOR_FB_FALLBACK_FILE_NAME',
+ '../tor/src/app/config/fallback_dirs.inc', str))
-# The number of bytes we'll read from a filter file before giving up
-MAX_LIST_FILE_SIZE = 1024 * 1024
+# The number of bytes we'll read from the whitelist file before giving up
+MAX_LIST_FILE_SIZE = getenv_conf('TOR_FB_MAX_LIST_FILE_SIZE',
+ 1024 * 1024, int)
## Eligibility Settings
@@ -172,28 +212,39 @@ MAX_LIST_FILE_SIZE = 1024 * 1024
# meant that we had to rebuild the list more often. We want fallbacks to be
# stable for 2 years, so we set it to a few months.
#
-# If a relay changes address or port, that's it, it's not useful any more,
-# because clients can't find it
-ADDRESS_AND_PORT_STABLE_DAYS = 90
+# If a relay changes address or port, it's not useful any more,
+# because clients with the old hard-coded address and port can't find it
+ADDRESS_AND_PORT_STABLE_DAYS = (
+ getenv_conf('TOR_FB_ADDRESS_AND_PORT_STABLE_DAYS',
+ 90, int))
# We ignore relays that have been down for more than this period
-MAX_DOWNTIME_DAYS = 0 if MUST_BE_RUNNING_NOW else 7
+MAX_DOWNTIME_DAYS = getenv_conf('TOR_FB_MAX_DOWNTIME_DAYS',
+ 0 if MUST_BE_RUNNING_NOW else 7, int)
# FallbackDirs must have a time-weighted-fraction that is greater than or
# equal to:
# Mirrors that are down half the time are still useful half the time
-CUTOFF_RUNNING = .50
-CUTOFF_V2DIR = .50
+# (But we need 75% of the list to be up on average, or we start getting
+# fallback warnings from DocTor.)
+CUTOFF_RUNNING = getenv_conf('TOR_FB_CUTOFF_RUNNING',
+ .50, float)
+CUTOFF_V2DIR = getenv_conf('TOR_FB_CUTOFF_V2DIR',
+ .50, float)
# Guard flags are removed for some time after a relay restarts, so we ignore
# the guard flag.
-CUTOFF_GUARD = .00
+CUTOFF_GUARD = getenv_conf('TOR_FB_CUTOFF_GUARD',
+ .00, float)
# FallbackDirs must have a time-weighted-fraction that is less than or equal
# to:
# .00 means no bad exits
-PERMITTED_BADEXIT = .00
+PERMITTED_BADEXIT = getenv_conf('TOR_FB_PERMITTED_BADEXIT',
+ .00, float)
# older entries' weights are adjusted with ALPHA^(age in days)
-AGE_ALPHA = 0.99
+AGE_ALPHA = getenv_conf('TOR_FB_AGE_ALPHA',
+ .99, float)
# this factor is used to scale OnionOO entries to [0,1]
+# it's not configurable, because it's unlikely to change
ONIONOO_SCALE_ONE = 999.
## Fallback Count Limits
@@ -201,12 +252,23 @@ ONIONOO_SCALE_ONE = 999.
# The target for these parameters is 20% of the guards in the network
# This is around 200 as of October 2015
_FB_POG = 0.2
-FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG
+# None means no limit on the number of fallbacks.
+# Set env TOR_FB_FALLBACK_PROPORTION_OF_GUARDS="None" to have no limit.
+FALLBACK_PROPORTION_OF_GUARDS = (
+ getenv_conf('TOR_FB_FALLBACK_PROPORTION_OF_GUARDS',
+ None if OUTPUT_CANDIDATES else _FB_POG, opt(float)))
# Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
-MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 200
+# None means no limit on the number of fallbacks.
+# Set env TOR_FB_MAX_FALLBACK_COUNT="None" to have no limit.
+MAX_FALLBACK_COUNT = (
+ getenv_conf('TOR_FB_MAX_FALLBACK_COUNT',
+ None if OUTPUT_CANDIDATES else 200, opt(int)))
# Emit a C #error if the number of fallbacks is less than expected
-MIN_FALLBACK_COUNT = 0 if OUTPUT_CANDIDATES else MAX_FALLBACK_COUNT*0.5
+# Set to 0 to have no minimum.
+MIN_FALLBACK_COUNT = (
+ getenv_conf('TOR_FB_MIN_FALLBACK_COUNT',
+ 0 if OUTPUT_CANDIDATES else MAX_FALLBACK_COUNT*0.5, int))
# The maximum number of fallbacks on the same address, contact, or family
#
@@ -217,11 +279,16 @@ MIN_FALLBACK_COUNT = 0 if OUTPUT_CANDIDATES else MAX_FALLBACK_COUNT*0.5
#
# We also don't want too much of the list to go down if a single operator
# has to move all their relays.
-MAX_FALLBACKS_PER_IP = 1
-MAX_FALLBACKS_PER_IPV4 = MAX_FALLBACKS_PER_IP
-MAX_FALLBACKS_PER_IPV6 = MAX_FALLBACKS_PER_IP
-MAX_FALLBACKS_PER_CONTACT = 7
-MAX_FALLBACKS_PER_FAMILY = 7
+MAX_FALLBACKS_PER_IP = getenv_conf('TOR_FB_MAX_FALLBACKS_PER_IP',
+ 1, int)
+MAX_FALLBACKS_PER_IPV4 = getenv_conf('TOR_FB_MAX_FALLBACKS_PER_IPV4',
+ MAX_FALLBACKS_PER_IP, int)
+MAX_FALLBACKS_PER_IPV6 = getenv_conf('TOR_FB_MAX_FALLBACKS_PER_IPV6',
+ MAX_FALLBACKS_PER_IP, int)
+MAX_FALLBACKS_PER_FAMILY = getenv_conf('TOR_FB_MAX_FALLBACKS_PER_FAMILY',
+ 7, int)
+MAX_FALLBACKS_PER_CONTACT = getenv_conf('TOR_FB_MAX_FALLBACKS_PER_CONTACT',
+ MAX_FALLBACKS_PER_FAMILY, int)
## Fallback Bandwidth Requirements
@@ -229,7 +296,8 @@ MAX_FALLBACKS_PER_FAMILY = 7
# to make sure we aren't further overloading exits
# (Set to 1.0, because we asked that only lightly loaded exits opt-in,
# and the extra load really isn't that much for large relays.)
-EXIT_BANDWIDTH_FRACTION = 1.0
+EXIT_BANDWIDTH_FRACTION = getenv_conf('TOR_FB_EXIT_BANDWIDTH_FRACTION',
+ 1.0, float)
# If a single fallback's bandwidth is too low, it's pointless adding it
# We expect fallbacks to handle an extra 10 kilobytes per second of traffic
@@ -237,17 +305,22 @@ EXIT_BANDWIDTH_FRACTION = 1.0
#
# We convert this to a consensus weight before applying the filter,
# because all the bandwidth amounts are specified by the relay
-MIN_BANDWIDTH = 50.0 * 10.0 * 1024.0
+MIN_BANDWIDTH = getenv_conf('TOR_FB_MIN_BANDWIDTH',
+ 50.0 * 10.0 * 1024.0, float)
-# Clients will time out after 30 seconds trying to download a consensus
+# Clients will time out (or users will give up) after 30 seconds trying to
+# download a consensus
# So allow fallback directories half that to deliver a consensus
# The exact download times might change based on the network connection
# running this script, but only by a few seconds
# There is also about a second of python overhead
-CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
+CONSENSUS_DOWNLOAD_SPEED_MAX = (
+ getenv_conf('TOR_FB_CONSENSUS_DOWNLOAD_SPEED_MAX',
+ 15.0, float))
# If the relay fails a consensus check, retry the download
# This avoids delisting a relay due to transient network conditions
-CONSENSUS_DOWNLOAD_RETRY = True
+CONSENSUS_DOWNLOAD_RETRY = getenv_conf('TOR_FB_CONSENSUS_DOWNLOAD_RETRY',
+ True, bool)
## Parsing Functions
More information about the tor-commits
mailing list