[tor-commits] [stegotorus/master] Rough draft benchmarking tools.
zwol at torproject.org
zwol at torproject.org
Fri Jul 20 23:17:06 UTC 2012
commit 704c053fbae93353976e3f7abf585b6283edb3a9
Author: Zack Weinberg <zackw at panix.com>
Date: Sun Feb 12 20:36:56 2012 -0800
Rough draft benchmarking tools.
---
scripts/benchmark-plot.R | 7 +
scripts/benchmark.py | 410 +++++++++++++++++++++++++++++++++++
scripts/bm-fixedrate-cgi.c | 158 ++++++++++++++
scripts/bm-genfiles.py | 162 ++++++++++++++
scripts/bm-mcurl.c | 196 +++++++++++++++++
scripts/tool_urlglob.c | 516 ++++++++++++++++++++++++++++++++++++++++++++
scripts/tool_urlglob.h | 69 ++++++
7 files changed, 1518 insertions(+), 0 deletions(-)
diff --git a/scripts/benchmark-plot.R b/scripts/benchmark-plot.R
new file mode 100644
index 0000000..a58ae0a
--- /dev/null
+++ b/scripts/benchmark-plot.R
@@ -0,0 +1,7 @@
+#! /usr/bin/Rscript
+
+suppressPackageStartupMessages({
+ library(ggplot2)
+})
+
+lf.direct <- read.csv("bench-lf-direct.tab", header=TRUE)
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
new file mode 100755
index 0000000..c6487b6
--- /dev/null
+++ b/scripts/benchmark.py
@@ -0,0 +1,410 @@
+#! /usr/bin/python
+
+# Stegotorus benchmarking script.
+# Several different computers are involved:
+#
+# - the "client" is the machine you run this script on; the workload
+# generator will run there, as will the StegoTorus and Tor clients.
+#
+# - the "proxy" is a machine that you can ssh to with no password.
+# It will run the StegoTorus and Tor bridge servers.
+#
+# - the "target" is the HTTP server that will be contacted in various ways.
+#
+# bm-genfiles.py must have been run on this server to create file
+# trees named 'fixed' and 'pareto' which appear as direct children
+# of the root URL. bm-fixedrate-cgi.c must have been compiled for
+# that server and appear as /bm-fixedrate.cgi.
+#
+# Software you need on the client machine:
+#
+# bwm-ng: http://www.gropp.org/?id=projects&sub=bwm-ng
+# curl: http://curl.haxx.se/
+# httperf: http://www.hpl.hp.com/research/linux/httperf/
+# tsocks: http://tsocks.sourceforge.net/about.php
+# tor: https://torproject.org/
+# stegotorus: you already have it :)
+#
+# Software you need on the proxy machine:
+#
+# nylon: http://monkey.org/~marius/pages/?page=nylon
+# tor, stegotorus
+#
+# You configure this script by setting variables below.
+
+# Client host
+
+CLIENT_IP = "99.113.33.155"
+CLIENT_IFACE = "eth0"
+
+# Proxy host
+
+PROXY = "sandbox03.sv.cmu.edu"
+PROXY_IP = "209.129.244.30" # some things won't do DNS for this
+PROXY_PORT = "1080"
+PROXY_SSH_CMD = ("ssh", PROXY)
+
+# Target
+
+TARGET = "storustest.nfshost.com"
+
+# Fudge factors. For some reason, bm-fixedrate generates data a
+# linear factor slower than it was meant to; this is the quick fix.
+
+FUDGE_FIXEDRATE = 2.5
+
+# Programs we need to run. Change these if any binary is not in the
+# default path or hasn't got the default name.
+# C_ - for the client. P_ - for the proxy.
+# You can NOT specify arguments here - if you need to do any
+# setup, write a wrapper script.
+
+C_bwm = "bwm-ng"
+C_curl = "curl"
+C_httperf = "httperf"
+C_storus = "stegotorus-wrapper"
+C_tor = "/usr/sbin/tor"
+C_tsocks = "/usr/lib/libtsocks.so"
+
+P_nylon = "nylon"
+P_storus = "stegotorus-wrapper"
+P_tor = "tor"
+P_python = "/usr/local/bin/python" # this must be an absolute path,
+ # it goes on a shebang line
+
+# ACTUAL PROGRAM STARTS HERE
+
+from types import MethodType
+import os
+import os.path
+import pickle
+import subprocess
+import sys
+import time
+
+def monitor(report, label, period):
+ """Monitor network utilization (bytes/sec up and down) for a
+ period of PERIOD seconds, writing the report to REPORT, labeling
+ each line with LABEL."""
+
+ bwm = subprocess.Popen((C_bwm, "-o", "csv", "-c", str(period), "-t", "1000",
+ "-u", "bytes", "-T", "rate", "-I", CLIENT_IFACE),
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ try:
+ n = 1
+ for line in bwm.stdout:
+ (stamp, iface, upbytes, dnbytes, rest) = line.split(';', 4)
+ if iface == 'total': continue
+
+ # convert to most compact possible form
+ upbytes = str(float(upbytes))
+ dnbytes = str(float(dnbytes))
+
+ report.write("%s,%d,%s,%s\n" % (label,n,upbytes,dnbytes))
+ n += 1
+ except:
+ bwm.terminate()
+ raise
+ finally:
+ bwm.wait()
+
+class ProxyProcess(object):
+ """A process running on the proxy host. It has a command line and
+ an optional config file. It is not expected to produce any output
+ (if it does, it will get dumped to this script's stdout/stderr) or
+ require any input (input is redirected from /dev/null). It is
+ expected to run until it is killed."""
+
+ @staticmethod
+ def prepare_remote():
+ remote_driver=r"""#! %s
+import pickle
+import signal
+import subprocess
+import sys
+import traceback
+
+wrote_rpid = False
+
+# Remote driver for proxy processes.
+try:
+ data = pickle.load(sys.stdin)
+ sys.stdin.close()
+ if data['cfgname']:
+ f = open(data['cfgname'], "w")
+ f.write(data['cfgdata'])
+ f.close()
+ proc = subprocess.Popen(data['args'], stdin=open("/dev/null", "r"),
+ stdout=2) # redirect child stdout to our stderr
+ sys.stdout.write(str(proc.pid) + "\n")
+ wrote_rpid = True
+ sys.stdout.close()
+ proc.wait()
+
+ # the process being killed by SIGTERM is normal
+ if proc.returncode != 0 and proc.returncode != -signal.SIGTERM:
+ raise subprocess.CalledProcessError(proc.returncode, data['args'][0])
+except:
+ traceback.print_exc()
+ if not wrote_rpid: sys.stdout.write("X\n")
+ sys.exit(1)
+
+sys.exit(0)
+""" % P_python
+ remote_setup=r"""newdriver=`mktemp ./driver.py.XXXXXX` || exit 1
+cat > "$newdriver"
+if cmp -s "$newdriver" driver.py
+then rm -f "$newdriver"
+else set -e; mv -f "$newdriver" driver.py; chmod +x driver.py
+fi
+"""
+ prep_worker = subprocess.Popen(PROXY_SSH_CMD + (remote_setup,),
+ stdin=subprocess.PIPE,
+ stdout=2)
+ prep_worker.communicate(remote_driver)
+ if prep_worker.returncode != 0:
+ raise subprocess.CalledProcessError(prep_worker.returncode,
+ 'remote_setup script')
+
+ def __init__(self, args, cfgname=None, cfgdata=None):
+ if ((cfgname is None or cfgdata is None) and
+ (cfgname is not None or cfgdata is not None)):
+ raise TypeError("either both or neither of cfgname and cfgdata"
+ " must be specified")
+
+ self._rpid = "X"
+
+ ProxyProcess.prepare_remote()
+ self._proc = subprocess.Popen(PROXY_SSH_CMD + ("./driver.py",),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ close_fds=True)
+ pickle.dump({ 'args' : args,
+ 'cfgname' : cfgname,
+ 'cfgdata' : cfgdata },
+ self._proc.stdin, 2)
+ self._proc.stdin.close()
+ self._rpid = self._proc.stdout.readline().strip()
+ if self._rpid == "X" or self._rpid == "":
+ self._rpid = "X"
+ self._proc.wait()
+ raise RuntimeError("failed to execute '%s' on proxy host"
+ % " ".join(args))
+
+ def terminate(self):
+ if self._rpid == "X": return
+ subprocess.check_call(PROXY_SSH_CMD + ("kill", self._rpid))
+
+ def kill(self):
+ if self._rpid == "X": return
+ subprocess.check_call(PROXY_SSH_CMD + ("kill", "-9", self._rpid))
+
+ # forward everything else to _proc; logic copied verbatim from
+ # http://code.activestate.com/recipes/519639-
+ # true-lieberman-style-delegation-in-python/
+ def __getattr__(self, aname):
+ target = self._proc
+ f = getattr(target, aname)
+ if isinstance(f, MethodType):
+ return MethodType(f.im_func, self, target.__class__)
+ else:
+ return f
+
+# Individual proxy-side test runners.
+def p_nylon():
+ return ProxyProcess((P_nylon, "-f", "-c", "nylon.conf"),
+ "nylon.conf",
+ """\
+[General]
+No-Simultaneous-Conn=10
+Log=0
+Verbose=0
+PIDfile=nylon.pid
+
+[Server]
+Port=%s
+Allow-IP=%s/32
+""" % (PROXY_PORT, CLIENT_IP))
+
+def p_tor_direct():
+ return ProxyProcess((P_tor, "--quiet", "-f", "tor-direct.conf"),
+ "tor-direct.conf",
+ """\
+ORPort %s
+SocksPort 0
+BridgeRelay 1
+PublishServerDescriptor 0
+ExitPolicy reject *:*
+DataDirectory .
+Log err stderr
+# unfortunately there doesn't seem to be any way to tell Tor to accept
+# OR connections from specific IP addresses only.
+""" % PROXY_PORT)
+
+class ClientProcess(subprocess.Popen):
+ """A process running on the local machine. This is probably doing
+ the meat of the work of some benchmark. Basically a shim around
+ subprocess.Popen to fix constructor arguments."""
+
+ def __init__(self, argv, envp=None):
+ if envp is not None:
+ env = os.environ.copy()
+ env.update(envp)
+ subprocess.Popen.__init__(self, argv,
+ stdin=open("/dev/null", "r"),
+ stdout=open("/dev/null", "w"),
+ stderr=subprocess.STDOUT, env=env)
+ else:
+ subprocess.Popen.__init__(self, argv,
+ stdin=open("/dev/null", "r"),
+ stdout=2)
+
+def c_tor_direct():
+ fp = open("tor-direct-client.conf", "w")
+ fp.write("""\
+ORPort 0
+SocksPort %s
+DataDirectory .
+Log err stderr
+Bridge %s:%s
+UseBridges 1
+SafeSocks 0
+""" % (PROXY_PORT, PROXY_IP, PROXY_PORT))
+ fp.close()
+ return ClientProcess((C_tor, "--quiet", "-f", "tor-direct-client.conf"))
+
+def c_curl(url, proxyhost):
+ return ClientProcess((C_curl, "-s", "--socks5-hostname",
+ proxyhost + ":" + PROXY_PORT,
+ url, "-o", "/dev/null"))
+
+def c_httperf(prefix, rate, proxyhost):
+ fp = open("tsocks.conf", "w")
+ fp.write("""\
+server = %s
+local = %s/255.255.255.255
+server_port = %s
+server_type = 5
+""" % (proxyhost, proxyhost, PROXY_PORT))
+ fp.close()
+ return ClientProcess((C_httperf, "--hog",
+ "--server=" + TARGET,
+ "--uri=" + prefix,
+ "--period=" + str(rate),
+ "--num-calls=5", "--num-conns=2000",
+ "--wset=10000,1"),
+ { 'LD_PRELOAD' : C_tsocks,
+ 'TSOCKS_CONF_FILE' :
+ os.path.join(os.getcwd(), "tsocks.conf") })
+
+# Benchmarks.
+
+def bench_fixedrate_direct(report):
+ client = None
+ proxy = None
+ try:
+ proxy = p_nylon()
+
+ for cap in range(10, 810, 10):
+ sys.stderr.write("fixedrate,direct,%d\n" % (cap * 1000))
+ try:
+ client = c_curl('http://' + TARGET + '/bm-fixedrate.cgi/' +
+ str(int(cap * 1000 * FUDGE_FIXEDRATE)),
+ PROXY)
+ monitor(report, "fixedrate,direct,%d" % (cap * 1000), 60)
+ finally:
+ if client is not None:
+ client.terminate()
+ client.wait()
+ client = None
+ finally:
+ if proxy is not None:
+ proxy.terminate()
+ proxy.wait()
+
+def bench_fixedrate_tor(report):
+ client = None
+ proxy = None
+ proxyl = None
+ try:
+ proxy = p_tor_direct()
+ proxyl = c_tor_direct()
+ time.sleep(5) # tor startup is slow
+
+ for cap in range(10,810,10):
+ sys.stderr.write("fixedrate,tor,%d\n" % (cap * 1000))
+ try:
+ client = c_curl('http://' + TARGET + '/bm-fixedrate.cgi/' +
+ str(int(cap * 1000 * FUDGE_FIXEDRATE)),
+ '127.0.0.1')
+ monitor(report, "fixedrate,tor,%d" % (cap * 1000), 60)
+ finally:
+ if client is not None:
+ client.terminate()
+ client.wait()
+ client = None
+ finally:
+ if proxy is not None:
+ proxy.terminate()
+ proxy.wait()
+ if proxyl is not None:
+ proxyl.terminate()
+ proxyl.wait()
+
+def bench_files_direct(report, prefix):
+ client = None
+ proxy = None
+ try:
+ proxy = p_nylon()
+
+ for cps in range(1,81):
+ sys.stderr.write("files.%s,direct,%d\n" % (prefix, cps))
+ try:
+ client = c_httperf(prefix, 1./cps, PROXY_IP)
+ monitor(report, "files.%s,direct,%d" % (prefix, cps), 60)
+ finally:
+ if client is not None:
+ client.terminate()
+ client.wait()
+ client = None
+ finally:
+ if proxy is not None:
+ proxy.terminate()
+ proxy.wait()
+
+def bench_files_tor(report, prefix):
+ client = None
+ proxy = None
+ proxyl = None
+ try:
+ proxy = p_tor_direct()
+ proxyl = c_tor_direct()
+ time.sleep(5) # tor startup is slow
+
+ for cps in range(1,81):
+ sys.stderr.write("files.%s,tor,%d\n" % (prefix, cps))
+ try:
+ client = c_httperf(prefix, 1./cps, '127.0.0.1')
+ monitor(report, "files.%s,tor,%d" % (prefix, cps), 60)
+ finally:
+ if client is not None:
+ client.terminate()
+ client.wait()
+ client = None
+ finally:
+ if proxy is not None:
+ proxy.terminate()
+ proxy.wait()
+ if proxyl is not None:
+ proxyl.terminate()
+ proxyl.wait()
+
+if __name__ == '__main__':
+ sys.stdout.write("benchmark,relay,cap,obs,up,down\n")
+ bench_fixedrate_direct(sys.stdout)
+ bench_fixedrate_tor(sys.stdout)
+ bench_files_direct(sys.stdout, "fixed")
+ bench_files_tor(sys.stdout, "fixed")
+ bench_files_direct(sys.stdout, "pareto")
+ bench_files_tor(sys.stdout, "pareto")
diff --git a/scripts/bm-fixedrate-cgi.c b/scripts/bm-fixedrate-cgi.c
new file mode 100644
index 0000000..2b48f98
--- /dev/null
+++ b/scripts/bm-fixedrate-cgi.c
@@ -0,0 +1,158 @@
+#define _XOPEN_SOURCE 600
+#define _POSIX_C_SOURCE 200112
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <errno.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+/* 1400 bytes is a safe figure for per-packet transmissible payload. */
+#define BLOCKSZ 1400
+
+
+#if __GNUC__ >= 3
+#define NORETURN void __attribute__((noreturn))
+#else
+#define NORETURN void
+#endif
+
+extern char **environ;
+
+static NORETURN
+error_400(const char *msg)
+{
+ char **p;
+ printf("Status: 400 Bad Request\nContent-Type: text/plain\n\n"
+ "400 Bad Request (%s)\nCGI environment dump follows:\n\n", msg);
+ for (p = environ; *p; p++)
+ puts(*p);
+ exit(0);
+}
+
+static NORETURN
+error_500(const char *syscall)
+{
+ printf("Status: 500 Internal Server Error\nContent-Type:text/plain\n\n"
+ "500 Internal Server Error: %s: %s\n",
+ syscall, strerror(errno));
+ exit(0);
+}
+
+static void
+generate(unsigned long rate, bool dryrun)
+{
+ double interval;
+ timer_t timerid;
+ struct sigevent sev;
+ struct itimerspec its;
+ sigset_t mask;
+ int sig;
+ char *data;
+ size_t bufsz = BLOCKSZ;
+
+ /* You send data at R bytes per second in 1400-byte blocks by
+ calling write() every 1/(R/1400) second. However, despite our
+ use of the high-resolution interval timers, we cannot count on
+ being scheduled more often than every 1/CLOCKS_PER_SEC seconds,
+ so if we need to send data faster than that, bump up the block
+ size instead. */
+ interval = 1./(rate/(double)BLOCKSZ);
+
+ if (interval < 1./CLOCKS_PER_SEC) {
+ interval = 1./CLOCKS_PER_SEC;
+ bufsz = rate / CLOCKS_PER_SEC;
+ }
+
+ its.it_value.tv_sec = lrint(floor(interval));
+ its.it_value.tv_nsec = lrint((interval - its.it_value.tv_sec) * 1e9);
+ its.it_interval.tv_sec = its.it_value.tv_sec;
+ its.it_interval.tv_nsec = its.it_value.tv_nsec;
+
+ if (dryrun) {
+ printf("Content-Type: text/plain\n\n"
+ "Goal %lu bytes per second:\n"
+ "would send %lu bytes every %f seconds\n"
+ " \" \" \" \" \" %lu sec + %lu nsec\n",
+ rate, bufsz, interval,
+ (unsigned long)its.it_value.tv_sec,
+ (unsigned long)its.it_value.tv_nsec);
+ return;
+ }
+
+ data = malloc(bufsz);
+ if (!data)
+ error_500("malloc");
+ memset(data, 0, bufsz);
+
+ fflush(stdout);
+ setvbuf(stdout, 0, _IONBF, 0);
+ fputs("Content-Type: application/octet-stream\n"
+ "Cache-Control: no-store,no-cache\n\n", stdout);
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGRTMIN);
+ if (sigprocmask(SIG_SETMASK, &mask, 0))
+ error_500("sigprocmask");
+
+ memset(&sev, 0, sizeof sev);
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGRTMIN;
+ sev.sigev_value.sival_ptr = &timerid;
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ error_500("timer_create");
+
+ if (timer_settime(timerid, 0, &its, 0))
+ error_500("timer_settime");
+
+ do {
+ size_t r, n = bufsz;
+ char *p = data;
+ do {
+ r = fwrite(p, 1, n, stdout);
+ if (r == 0)
+ exit(1);
+ n -= r;
+ p += r;
+ } while (n > 0);
+ } while (sigwait(&mask, &sig) == 0);
+}
+
+int
+main(void)
+{
+ unsigned long rate;
+ char *endp;
+ bool dryrun;
+ char *request_method = getenv("REQUEST_METHOD");
+ char *query_string = getenv("QUERY_STRING");
+ char *path_info = getenv("PATH_INFO");
+
+ if (!request_method || strcmp(request_method, "GET"))
+ error_400("method not supported");
+ if (query_string && strcmp(query_string, ""))
+ error_400("no query parameters accepted");
+
+ if (!path_info || path_info[0] != '/')
+ error_400("malformed or missing PATH_INFO");
+
+ rate = strtoul(path_info+1, &endp, 10);
+ if (endp == path_info+1)
+ error_400("missing rate (specify bytes per second)");
+
+ if (endp[0] == '\0')
+ dryrun = false;
+ else if (endp[0] == ';' && endp[1] == 'd' && endp[2] == '\0')
+ dryrun = true;
+ else
+ error_400("unrecognized extra arguments");
+
+ generate(rate, dryrun);
+ return 0;
+}
diff --git a/scripts/bm-genfiles.py b/scripts/bm-genfiles.py
new file mode 100755
index 0000000..dcd1030
--- /dev/null
+++ b/scripts/bm-genfiles.py
@@ -0,0 +1,162 @@
+#! /usr/bin/python
+
+"""Generate files for network performance testing.
+
+The default behavior is to generate 10,000 files all of which are
+exactly 3584 bytes long, because that is approximately how big
+Flickr's 75x75px JPEG thumbnails are. You can request a different
+size, or you can request that the file sizes instead follow a bounded
+Pareto distribution with tunable alpha.
+
+The files have names compatible with httperf's --wset mode. Since
+it insists on .html as a file suffix, the files are syntactically
+valid HTML. Their contents are word salad.
+
+There is one mandatory command line argument: the path to the root
+of the tree of files to generate. It is created if it doesn't
+already exist. If it already exists, its contents will be erased!
+(so don't use '.')"""
+
+from __future__ import division
+
+import argparse
+import errno
+import math
+import os
+import os.path
+import random
+import shutil
+import sys
+import textwrap
+
+def ensure_empty_dir(dpath):
+ todelete = []
+ try:
+ todelete = os.listdir(dpath)
+ except OSError, e:
+ # Don't delete a _file_ that's in the way.
+ # Don't try to create parent directories that are missing.
+ if e.errno != errno.ENOENT:
+ raise
+ os.mkdir(dpath)
+ return
+ for f in todelete:
+ p = os.path.join(dpath, f)
+ try:
+ os.remove(p)
+ except OSError, e:
+ if e.errno != errno.EISDIR and e.errno != errno.EPERM:
+ raise
+ shutil.rmtree(p)
+
+def ensure_parent_directories(path):
+ try:
+ os.makedirs(os.path.dirname(path))
+ except OSError, e:
+ if e.errno != errno.EEXIST:
+ raise
+
+def word_salad(f, words, seed, maxlen):
+ rng = random.Random(seed)
+ salad = []
+ slen = 0
+ while slen < maxlen - 1:
+ nl = rng.randint(1, min((maxlen - 1) - slen, len(words))) - 1
+ w = rng.choice(words[nl])
+ salad.append(w)
+ slen += len(w) + 1
+ salad = textwrap.fill(" ".join(salad), 78)
+ while len(salad) < maxlen-1:
+ salad += '.'
+ salad += '\n'
+ f.write(salad)
+
+def load_words():
+ words = [ [] for _ in xrange(15) ]
+ for w in open('/usr/share/dict/words'):
+ w = w.strip()
+ if w.endswith("'s"): continue
+ if len(w) > 15 or len(w) < 2: continue
+ words[len(w)-1].append(w)
+ # special case words[0] as dictfiles often have every single single letter
+ words[0].extend(('a','I'))
+ return words
+
+FILE_PREFIX = '<!doctype html>\n<title>{0}</title>\n<p>\n'
+FILE_SUFFIX = '</p>\n'
+
+def create_one(parent, ctr, digits, words, filesize, seed, resume, progress):
+ label = format(ctr, '0'+str(digits)+'d')
+ fname = os.path.join(parent, *label) + '.html'
+ ensure_parent_directories(fname)
+
+ if os.path.exists(fname):
+ if not resume: raise RuntimeError('{0} already exists'.format(fname))
+ return
+
+ prefix = FILE_PREFIX.format(label)
+ suffix = FILE_SUFFIX
+ limit = filesize - (len(prefix) + len(suffix))
+ if limit <= 0:
+ raise TypeError("{0} bytes is too small to generate (minimum {1})"
+ .format(filesize, len(prefix)+len(suffix)))
+
+ if progress:
+ sys.stderr.write(fname + '\n')
+
+ f = open(fname, "w")
+ f.write(prefix)
+ word_salad(f, words, ctr+seed, limit)
+ f.write(suffix)
+
+def bounded_pareto(rng, alpha, L, H):
+ while True:
+ U = rng.random()
+ if U < 1: break
+ Ha = H**alpha
+ La = L**alpha
+ return int(round((-(U*Ha - U*La - Ha)/(Ha * La)) ** (-1/alpha)))
+
+if __name__ == '__main__':
+
+ default_filesize = 3584
+ default_filecount = 10000 # 0/0/0/0.html through 9/9/9/9.html
+
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument('directory',
+ help='directory to populate with files')
+ parser.add_argument('-c', '--count', type=int, default=default_filecount,
+ help='number of files to generate')
+ sg = parser.add_mutually_exclusive_group()
+ sg.add_argument('-s', '--size', type=int, default=default_filesize,
+ help='all files will be exactly SIZE bytes long')
+ sg.add_argument('-p', '--pareto', type=float,
+ metavar='ALPHA',
+ help='file sizes will follow a bounded Pareto distribution'
+ ' with parameter ALPHA')
+ parser.add_argument('-m', '--minsize', type=int, default=512,
+ help='minimum file size (only useful with -p)')
+ parser.add_argument('-M', '--maxsize', type=int, default=2*1024*1024,
+ help='maximum file size (only useful with -p)')
+ parser.add_argument('-S', '--seed', type=int, default=719,
+ help='seed for random number generator')
+ parser.add_argument('--resume', action='store_true',
+ help='resume an interrupted run where it left off')
+ parser.add_argument('--progress', action='store_true',
+ help='report progress')
+
+ args = parser.parse_args()
+ digits = len(str(args.count - 1))
+ rng = random.Random(args.seed)
+
+ words = load_words()
+ if not args.resume:
+ ensure_empty_dir(args.directory)
+
+ size = args.size
+ for i in xrange(args.count):
+ if args.pareto is not None:
+ size = bounded_pareto(rng, args.pareto, args.minsize, args.maxsize)
+ create_one(args.directory, i, digits, words, size, args.seed,
+ args.resume, args.progress)
diff --git a/scripts/bm-mcurl.c b/scripts/bm-mcurl.c
new file mode 100644
index 0000000..ac24f3a
--- /dev/null
+++ b/scripts/bm-mcurl.c
@@ -0,0 +1,196 @@
+/* Use libcurl to retrieve many URLs, according to a wildcard pattern,
+ starting new connections at a constant rate until we hit a limit.
+
+ Command line arguments -- all are required, but 'proxy' may be an
+ empty string if you want direct connections:
+
+ bm-mcurl [-v] rate limit proxy url-pattern [url-pattern ...]
+
+ There is no output; it is assumed that you are monitoring traffic
+ externally. Passing -v turns on CURLOPT_VERBOSE debugging spew.
+ */
+
+#define _XOPEN_SOURCE 600
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <curl/curl.h>
+#include "tool_urlglob.h"
+
+#define NORETURN __attribute__((noreturn))
+
+static bool verbose = false;
+
+static size_t
+discard_data(char *ptr, size_t size, size_t nmemb, void *userdata)
+{
+ return size * nmemb;
+}
+
+static size_t
+read_abort(void *ptr, size_t size, size_t nmemb, void *userdata)
+{
+ /* we don't do anything that should require this to be called,
+ so if it does get called, something is wrong */
+ return CURL_READFUNC_ABORT;
+}
+
+static CURL *
+setup_curl_easy_handle(char *proxy)
+{
+ CURL *h = curl_easy_init();
+ if (!h) abort();
+
+#define SET_OR_CRASH(h, opt, param) \
+ do { if (curl_easy_setopt(h, opt, param)) abort(); } while (0)
+
+ SET_OR_CRASH(h, CURLOPT_VERBOSE, (unsigned long)verbose);
+ SET_OR_CRASH(h, CURLOPT_NOPROGRESS, 1L);
+ SET_OR_CRASH(h, CURLOPT_FAILONERROR, 1L);
+ SET_OR_CRASH(h, CURLOPT_USERAGENT, "bm-mcurl/0.1");
+ SET_OR_CRASH(h, CURLOPT_ACCEPT_ENCODING, "");
+ SET_OR_CRASH(h, CURLOPT_AUTOREFERER, 1L);
+ SET_OR_CRASH(h, CURLOPT_FOLLOWLOCATION, 1L);
+ SET_OR_CRASH(h, CURLOPT_MAXREDIRS, 30L);
+
+ SET_OR_CRASH(h, CURLOPT_WRITEFUNCTION, discard_data);
+ SET_OR_CRASH(h, CURLOPT_WRITEDATA, NULL);
+ SET_OR_CRASH(h, CURLOPT_READFUNCTION, read_abort);
+ SET_OR_CRASH(h, CURLOPT_READDATA, NULL);
+
+ if (proxy && proxy[0]) {
+ SET_OR_CRASH(h, CURLOPT_PROXY, proxy);
+ SET_OR_CRASH(h, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
+ }
+#undef SET_OR_CRASH
+}
+
+static bool
+process_events_once(CURLM *multi, unsigned long timeout_max)
+{
+ struct timeval tv;
+ int rc; /* select() return code */
+
+ fd_set fdread;
+ fd_set fdwrite;
+ fd_set fdexcept;
+ int maxfd = -1;
+
+ unsigned long timeout = 1000000; /* one second - ultimate default */
+ long curl_tout_ms = -1;
+
+ /* get fd sets for all pending transfers */
+ FD_ZERO(&fdread);
+ FD_ZERO(&fdwrite);
+ FD_ZERO(&fdexcept);
+ curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcept, &maxfd);
+
+ /* timeout */
+ if (timeout_max > 0 && timeout_max < timeout)
+ timeout = timeout_max;
+
+ curl_multi_timeout(multi_handle, &curl_tout_ms);
+
+ if (curl_tout_ms >= 0) {
+ unsigned long curl_tout_us = ((unsigned long)curl_tout_ms) * 1000;
+ if (timeout > curl_tout_us)
+ timeout = curl_tout_us;
+ }
+
+ tv.tv_sec = timeout / 1000000;
+ if(tv.tv_sec >= 1)
+ tv.tv_sec = 1;
+ else
+ tv.tv_usec = timeout % 1000000;
+
+ do {
+ rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &tv);
+ } while (rc == -1 && errno == EINTR);
+
+ if (rc > 0) {
+ int still_running;
+ curl_multi_perform(multi_handle, &still_running);
+ return !!still_running;
+ } else
+ abort();
+}
+
+/* Note: this function must not return until we are ready to start
+ another connection. */
+static void
+queue_one(CURLM *multi, unsigned long rate, unsigned long limit,
+ char *proxy, char *url)
+{
+
+}
+
+static void
+run(unsigned long rate, unsigned long limit, char *proxy, char **urls)
+{
+ CURLM *multi;
+ curl_global_init();
+ multi = curl_multi_init();
+ if (!multi) abort();
+
+ for (char **upat = urls; *upat; url++) {
+ URLGlob *uglob;
+ int *n;
+ if (glob_url(&uglob, *upat, &n, stderr))
+ continue;
+ do {
+ char *url;
+ if (glob_next_url(&url, uglob)) abort();
+ queue_one(multi, rate, limit, proxy, url); /* takes ownership */
+ } while (--n);
+ glob_cleanup(uglob);
+ }
+
+ /* spin the event loop until all outstanding transfers complete */
+ while (process_events_once(multi, 0));
+
+ curl_multi_cleanup(multi);
+}
+
+static NORETURN
+usage(const char *av0, const char *complaint)
+{
+ fprintf(stderr,
+ "%s\nusage: %s [-v] rate limit proxy url [url...]\n",
+ complaint, av0);
+ exit(2);
+}
+
+int
+main(int argc, char **argv)
+{
+ unsigned long rate;
+ unsigned long limit;
+ char *endp;
+
+ if (argv[1] && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--verbose"))) {
+ verbose = true;
+ argc--;
+ argv[1] = argv[0];
+ argv++;
+ }
+
+ if (argc < 5)
+ usage("not enough arguments");
+
+ rate = strtoul(argv[1], &endp, 10);
+ if (endp == argv[1] || *endp)
+ usage("rate must be a positive integer (connections per second)");
+
+ limit = strtoul(argv[2], &endp, 10);
+ if (endp == argv[2] || *endp)
+ usage("limit must be a positive integer (max outstanding requests)");
+
+ run(rate, limit, argv[3], argv+4);
+ return 0;
+}
diff --git a/scripts/tool_urlglob.c b/scripts/tool_urlglob.c
new file mode 100644
index 0000000..d714971
--- /dev/null
+++ b/scripts/tool_urlglob.c
@@ -0,0 +1,516 @@
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel at haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <curl/curl.h>
+
+#include "tool_urlglob.h"
+
+typedef enum {
+ GLOB_OK,
+ GLOB_NO_MEM,
+ GLOB_ERROR
+} GlobCode;
+
+/*
+ * glob_word()
+ *
+ * Input a full globbed string, set the forth argument to the amount of
+ * strings we get out of this. Return GlobCode.
+ */
+static GlobCode glob_word(URLGlob *, /* object anchor */
+ char *, /* globbed string */
+ size_t, /* position */
+ int *); /* returned number of strings */
+
+static GlobCode glob_set(URLGlob *glob, char *pattern,
+ size_t pos, int *amount)
+{
+ /* processes a set expression with the point behind the opening '{'
+ ','-separated elements are collected until the next closing '}'
+ */
+ URLPattern *pat;
+ GlobCode res;
+ bool done = false;
+ char* buf = glob->glob_buffer;
+
+ pat = &glob->pattern[glob->size / 2];
+ /* patterns 0,1,2,... correspond to size=1,3,5,... */
+ pat->type = UPTSet;
+ pat->content.Set.size = 0;
+ pat->content.Set.ptr_s = 0;
+ pat->content.Set.elements = NULL;
+
+ ++glob->size;
+
+ while(!done) {
+ switch (*pattern) {
+ case '\0': /* URL ended while set was still open */
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "unmatched brace at pos %zu\n", pos);
+ return GLOB_ERROR;
+
+ case '{':
+ case '[': /* no nested expressions at this time */
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "nested braces not supported at pos %zu\n", pos);
+ return GLOB_ERROR;
+
+ case ',':
+ case '}': /* set element completed */
+ *buf = '\0';
+ if(pat->content.Set.elements) {
+ char **new_arr = realloc(pat->content.Set.elements,
+ (pat->content.Set.size + 1) * sizeof(char*));
+ if(!new_arr) {
+ short elem;
+ for(elem = 0; elem < pat->content.Set.size; elem++)
+ Curl_safefree(pat->content.Set.elements[elem]);
+ Curl_safefree(pat->content.Set.elements);
+ pat->content.Set.ptr_s = 0;
+ pat->content.Set.size = 0;
+ }
+ pat->content.Set.elements = new_arr;
+ }
+ else
+ pat->content.Set.elements = malloc(sizeof(char*));
+ if(!pat->content.Set.elements) {
+ snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
+ return GLOB_NO_MEM;
+ }
+ pat->content.Set.elements[pat->content.Set.size] =
+ strdup(glob->glob_buffer);
+ if(!pat->content.Set.elements[pat->content.Set.size]) {
+ short elem;
+ for(elem = 0; elem < pat->content.Set.size; elem++)
+ Curl_safefree(pat->content.Set.elements[elem]);
+ Curl_safefree(pat->content.Set.elements);
+ pat->content.Set.ptr_s = 0;
+ pat->content.Set.size = 0;
+ snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
+ return GLOB_NO_MEM;
+ }
+ ++pat->content.Set.size;
+
+ if(*pattern == '}') {
+ /* entire set pattern completed */
+ int wordamount;
+
+ /* always check for a literal (may be "") between patterns */
+ res = glob_word(glob, ++pattern, ++pos, &wordamount);
+ if(res) {
+ short elem;
+ for(elem = 0; elem < pat->content.Set.size; elem++)
+ Curl_safefree(pat->content.Set.elements[elem]);
+ Curl_safefree(pat->content.Set.elements);
+ pat->content.Set.ptr_s = 0;
+ pat->content.Set.size = 0;
+ return res;
+ }
+
+ *amount = pat->content.Set.size * wordamount;
+
+ done = true;
+ continue;
+ }
+
+ buf = glob->glob_buffer;
+ ++pattern;
+ ++pos;
+ break;
+
+ case ']': /* illegal closing bracket */
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "illegal pattern at pos %zu\n", pos);
+ return GLOB_ERROR;
+
+ case '\\': /* escaped character, skip '\' */
+ if(pattern[1]) {
+ ++pattern;
+ ++pos;
+ }
+ /* intentional fallthrough */
+ default:
+ *buf++ = *pattern++; /* copy character to set element */
+ ++pos;
+ }
+ }
+ return GLOB_OK;
+}
+
+static GlobCode glob_range(URLGlob *glob, char *pattern,
+ size_t pos, int *amount)
+{
+ /* processes a range expression with the point behind the opening '['
+ - char range: e.g. "a-z]", "B-Q]"
+ - num range: e.g. "0-9]", "17-2000]"
+ - num range with leading zeros: e.g. "001-999]"
+ expression is checked for well-formedness and collected until the next ']'
+ */
+ URLPattern *pat;
+ char *c;
+ char sep;
+ char sep2;
+ int step;
+ int rc;
+ GlobCode res;
+ int wordamount = 1;
+
+ pat = &glob->pattern[glob->size / 2];
+ /* patterns 0,1,2,... correspond to size=1,3,5,... */
+ ++glob->size;
+
+ if(ISALPHA(*pattern)) {
+ /* character range detected */
+ char min_c;
+ char max_c;
+
+ pat->type = UPTCharRange;
+
+ rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
+
+ if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
+ /* the pattern is not well-formed */
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "error: bad range specification after pos %zu\n", pos);
+ return GLOB_ERROR;
+ }
+
+ /* check the (first) separating character */
+ if((sep != ']') && (sep != ':')) {
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "error: unsupported character (%c) after range at pos %zu\n",
+ sep, pos);
+ return GLOB_ERROR;
+ }
+
+ /* if there was a ":[num]" thing, use that as step or else use 1 */
+ pat->content.CharRange.step =
+ ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
+
+ pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
+ pat->content.CharRange.max_c = max_c;
+ }
+ else if(ISDIGIT(*pattern)) {
+ /* numeric range detected */
+ int min_n;
+ int max_n;
+
+ pat->type = UPTNumRange;
+ pat->content.NumRange.padlength = 0;
+
+ rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
+
+ if((rc < 2) || (min_n > max_n)) {
+ /* the pattern is not well-formed */
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "error: bad range specification after pos %zu\n", pos);
+ return GLOB_ERROR;
+ }
+ pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
+ pat->content.NumRange.max_n = max_n;
+
+ /* if there was a ":[num]" thing, use that as step or else use 1 */
+ pat->content.NumRange.step =
+ ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
+
+ if(*pattern == '0') {
+ /* leading zero specified */
+ c = pattern;
+ while(ISDIGIT(*c)) {
+ c++;
+ ++pat->content.NumRange.padlength; /* padding length is set for all
+ instances of this pattern */
+ }
+ }
+ }
+ else {
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "illegal character in range specification at pos %zu\n", pos);
+ return GLOB_ERROR;
+ }
+
+ c = (char*)strchr(pattern, ']'); /* continue after next ']' */
+ if(c)
+ c++;
+ else {
+ snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
+ return GLOB_ERROR; /* missing ']' */
+ }
+
+ /* always check for a literal (may be "") between patterns */
+
+ res = glob_word(glob, c, pos + (c - pattern), &wordamount);
+ if(res == GLOB_ERROR) {
+ wordamount = 1;
+ res = GLOB_OK;
+ }
+
+ if(!res) {
+ if(pat->type == UPTCharRange)
+ *amount = wordamount * (pat->content.CharRange.max_c -
+ pat->content.CharRange.min_c + 1);
+ else
+ *amount = wordamount * (pat->content.NumRange.max_n -
+ pat->content.NumRange.min_n + 1);
+ }
+
+ return res; /* GLOB_OK or GLOB_NO_MEM */
+}
+
+static GlobCode glob_word(URLGlob *glob, char *pattern,
+ size_t pos, int *amount)
+{
+ /* processes a literal string component of a URL
+ special characters '{' and '[' branch to set/range processing functions
+ */
+ char* buf = glob->glob_buffer;
+ size_t litindex;
+ GlobCode res = GLOB_OK;
+
+ *amount = 1; /* default is one single string */
+
+ while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
+ if(*pattern == '}' || *pattern == ']') {
+ snprintf(glob->errormsg, sizeof(glob->errormsg),
+ "unmatched close brace/bracket at pos %zu\n", pos);
+ return GLOB_ERROR;
+ }
+
+ /* only allow \ to escape known "special letters" */
+ if(*pattern == '\\' &&
+ (*(pattern+1) == '{' || *(pattern+1) == '[' ||
+ *(pattern+1) == '}' || *(pattern+1) == ']') ) {
+
+ /* escape character, skip '\' */
+ ++pattern;
+ ++pos;
+ }
+ *buf++ = *pattern++; /* copy character to literal */
+ ++pos;
+ }
+ *buf = '\0';
+ litindex = glob->size / 2;
+ /* literals 0,1,2,... correspond to size=0,2,4,... */
+ glob->literal[litindex] = strdup(glob->glob_buffer);
+ if(!glob->literal[litindex]) {
+ snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
+ return GLOB_NO_MEM;
+ }
+ ++glob->size;
+
+ switch (*pattern) {
+ case '\0':
+ /* singular URL processed */
+ break;
+
+ case '{':
+ /* process set pattern */
+ res = glob_set(glob, ++pattern, ++pos, amount);
+ break;
+
+ case '[':
+ /* process range pattern */
+ res = glob_range(glob, ++pattern, ++pos, amount);
+ break;
+ }
+
+ if(res)
+ Curl_safefree(glob->literal[litindex]);
+
+ return res;
+}
+
+int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
+{
+ /*
+ * We can deal with any-size, just make a buffer with the same length
+ * as the specified URL!
+ */
+ URLGlob *glob_expand;
+ int amount;
+ char *glob_buffer;
+ GlobCode res;
+
+ *glob = NULL;
+
+ glob_buffer = malloc(strlen(url) + 1);
+ if(!glob_buffer)
+ return CURLE_OUT_OF_MEMORY;
+
+ glob_expand = calloc(1, sizeof(URLGlob));
+ if(!glob_expand) {
+ Curl_safefree(glob_buffer);
+ return CURLE_OUT_OF_MEMORY;
+ }
+ glob_expand->size = 0;
+ glob_expand->urllen = strlen(url);
+ glob_expand->glob_buffer = glob_buffer;
+ glob_expand->beenhere = 0;
+
+ res = glob_word(glob_expand, url, 1, &amount);
+ if(!res)
+ *urlnum = amount;
+ else {
+ if(error && glob_expand->errormsg[0]) {
+ /* send error description to the error-stream */
+ fprintf(error, "curl: (%d) [globbing] %s",
+ (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT,
+ glob_expand->errormsg);
+ }
+ /* it failed, we cleanup */
+ Curl_safefree(glob_buffer);
+ Curl_safefree(glob_expand);
+ *urlnum = 1;
+ return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
+ }
+
+ *glob = glob_expand;
+ return CURLE_OK;
+}
+
+void glob_cleanup(URLGlob* glob)
+{
+ size_t i;
+ int elem;
+
+ for(i = glob->size - 1; i < glob->size; --i) {
+ if(!(i & 1)) { /* even indexes contain literals */
+ Curl_safefree(glob->literal[i/2]);
+ }
+ else { /* odd indexes contain sets or ranges */
+ if((glob->pattern[i/2].type == UPTSet) &&
+ (glob->pattern[i/2].content.Set.elements)) {
+ for(elem = glob->pattern[i/2].content.Set.size - 1;
+ elem >= 0;
+ --elem) {
+ Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
+ }
+ Curl_safefree(glob->pattern[i/2].content.Set.elements);
+ }
+ }
+ }
+ Curl_safefree(glob->glob_buffer);
+ Curl_safefree(glob);
+}
+
+int glob_next_url(char **globbed, URLGlob *glob)
+{
+ URLPattern *pat;
+ char *lit;
+ size_t i;
+ size_t j;
+ size_t len;
+ size_t buflen = glob->urllen + 1;
+ char *buf = glob->glob_buffer;
+
+ *globbed = NULL;
+
+ if(!glob->beenhere)
+ glob->beenhere = 1;
+ else {
+ bool carry = true;
+
+ /* implement a counter over the index ranges of all patterns,
+ starting with the rightmost pattern */
+ for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) {
+ carry = false;
+ pat = &glob->pattern[i];
+ switch (pat->type) {
+ case UPTSet:
+ if((pat->content.Set.elements) &&
+ (++pat->content.Set.ptr_s == pat->content.Set.size)) {
+ pat->content.Set.ptr_s = 0;
+ carry = true;
+ }
+ break;
+ case UPTCharRange:
+ pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
+ (int)((unsigned char)pat->content.CharRange.ptr_c));
+ if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
+ pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
+ carry = true;
+ }
+ break;
+ case UPTNumRange:
+ pat->content.NumRange.ptr_n += pat->content.NumRange.step;
+ if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
+ pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
+ carry = true;
+ }
+ break;
+ default:
+ printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
+ return CURLE_FAILED_INIT;
+ }
+ }
+ if(carry) { /* first pattern ptr has run into overflow, done! */
+ /* TODO: verify if this should actally return CURLE_OK. */
+ return CURLE_OK; /* CURLE_OK to match previous behavior */
+ }
+ }
+
+ for(j = 0; j < glob->size; ++j) {
+ if(!(j&1)) { /* every other term (j even) is a literal */
+ lit = glob->literal[j/2];
+ len = snprintf(buf, buflen, "%s", lit);
+ buf += len;
+ buflen -= len;
+ }
+ else { /* the rest (i odd) are patterns */
+ pat = &glob->pattern[j/2];
+ switch(pat->type) {
+ case UPTSet:
+ if(pat->content.Set.elements) {
+ len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
+ snprintf(buf, buflen, "%s",
+ pat->content.Set.elements[pat->content.Set.ptr_s]);
+ buf += len;
+ buflen -= len;
+ }
+ break;
+ case UPTCharRange:
+ *buf++ = pat->content.CharRange.ptr_c;
+ break;
+ case UPTNumRange:
+ len = snprintf(buf, buflen, "%0*d",
+ pat->content.NumRange.padlength,
+ pat->content.NumRange.ptr_n);
+ buf += len;
+ buflen -= len;
+ break;
+ default:
+ printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
+ return CURLE_FAILED_INIT;
+ }
+ }
+ }
+ *buf = '\0';
+
+ *globbed = strdup(glob->glob_buffer);
+ if(!*globbed)
+ return CURLE_OUT_OF_MEMORY;
+
+ return CURLE_OK;
+}
diff --git a/scripts/tool_urlglob.h b/scripts/tool_urlglob.h
new file mode 100644
index 0000000..562b08e
--- /dev/null
+++ b/scripts/tool_urlglob.h
@@ -0,0 +1,69 @@
+#ifndef HEADER_CURL_TOOL_URLGLOB_H
+#define HEADER_CURL_TOOL_URLGLOB_H
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel at haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+typedef enum {
+ UPTSet = 1,
+ UPTCharRange,
+ UPTNumRange
+} URLPatternType;
+
+typedef struct {
+ URLPatternType type;
+ union {
+ struct {
+ char **elements;
+ short size;
+ short ptr_s;
+ } Set;
+ struct {
+ char min_c;
+ char max_c;
+ char ptr_c;
+ int step;
+ } CharRange;
+ struct {
+ int min_n;
+ int max_n;
+ short padlength;
+ int ptr_n;
+ int step;
+ } NumRange ;
+ } content;
+} URLPattern;
+
+typedef struct {
+ char *literal[10];
+ URLPattern pattern[9];
+ size_t size;
+ size_t urllen;
+ char *glob_buffer;
+ char beenhere;
+ char errormsg[80]; /* error message buffer */
+} URLGlob;
+
+int glob_url(URLGlob**, char*, int *, FILE *);
+int glob_next_url(char **, URLGlob *);
+void glob_cleanup(URLGlob* glob);
+
+#endif /* HEADER_CURL_TOOL_URLGLOB_H */
More information about the tor-commits
mailing list