[or-cvs] r15497: Add a perl script that automatically queries http, https, an (website/trunk)
ioerror at seul.org
ioerror at seul.org
Fri Jun 27 10:35:38 UTC 2008
Author: ioerror
Date: 2008-06-27 06:35:38 -0400 (Fri, 27 Jun 2008)
New Revision: 15497
Added:
website/trunk/update-mirrors.pl
Log:
Add a perl script that automatically queries http, https, and ftp for their date of last update. This also includes much more metadata about each mirror. It isn't ready to be used, the main mirror page needs to be updated to incorporate the new fields that are often blank.
Added: website/trunk/update-mirrors.pl
===================================================================
--- website/trunk/update-mirrors.pl (rev 0)
+++ website/trunk/update-mirrors.pl 2008-06-27 10:35:38 UTC (rev 15497)
@@ -0,0 +1,646 @@
+#!/usr/bin/perl -w
+use warnings;
+use strict;
+use LWP::Simple;
+use LWP;
+use Date::Parse;
+
+print "Creating LWP agent ($LWP::VERSION)...\n";
+my $lua = LWP::UserAgent->new(
+ keep_alive => 1,
+ timeout => 15,
+ agent => "Tor MirrorCheck Agent"
+);
+
+sub sanitize {
+ my $taintedData = shift;
+ my $cleanedData;
+ my $whitelist = '-a-zA-Z0-9: +';
+
+ # clean the data, return cleaned data
+ $taintedData =~ s/[^$whitelist]//go;
+ $cleanedData = $taintedData;
+
+ return $cleanedData;
+}
+
+sub FetchDate {
+ my $url = shift; # Base url for mirror
+ my $trace = "project/trace/www.torproject.org"; # Location of recent update info
+ $url = "$url$trace";
+
+ print "Fetching possible date from: $url\n";
+
+ my $request = new HTTP::Request GET => "$url";
+ my $result = $lua->request($request);
+
+ if ($result->is_success){
+ my $taint = $result->content;
+ my $content = sanitize($taint);
+ if ($content) {
+
+ my $date = str2time($content);
+
+ if ($date) {
+ print "We've fetched a date $date.\n";
+ return $date;
+ } else {
+ print "We've haven't fetched a date.\n";
+ return "Unknown";
+ }
+
+ } else {
+ print "Unable to fetch date, empty content returned.\n";
+ return "Unknown";
+ }
+
+ } else {
+ print "Our request failed, we had no result.\n";
+ return "Unknown";
+ }
+}
+
+# This is the list of all known Tor mirrors
+# Add new mirrors to the bottom!
+my %m = (
+ mirror000 => {
+ orgName => "cypherpunks.at",
+ isoCC => "AT",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.cypherpunks.at/",
+ rsyncWebsiteMirror => "rsync://tor.cypherpunks.at/tor",
+ httpDistMirror => "http://tor.cypherpunks.at/dist/",
+ rsyncDistMirror => "rsync: tor.cypherpunks.at::tor/dist/",
+ updateDate => "",
+ },
+
+ mirror001 => {
+ orgName => "depthstrike.com",
+ isoCC => "CA",
+ subRegion => "NS",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.depthstrike.com/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.depthstrike.com/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror001 => {
+ orgName => "depthstrike.com",
+ isoCC => "CA",
+ subRegion => "NS",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.depthstrike.com/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.depthstrike.com/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror002 => {
+ orgName => "hermetix.org",
+ isoCC => "CA",
+ subRegion => "QC",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.hermetix.org/",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.hermetix.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror003 => {
+ orgName => "Boinc.ch",
+ isoCC => "CH",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.boinc.ch/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.boinc.ch/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror004 => {
+ orgName => "anonymity.cn",
+ isoCC => "CN",
+ subRegion => "",
+ region => "Asia",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.anonymity.cn/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.anonymity.cn/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror004 => {
+ orgName => "bbs",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.blingblingsquad.net/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.blingblingsquad.net/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror005 => {
+ orgName => "Berapla",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://download.berapla.de/mirrors/tor/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror006 => {
+ orgName => "cybermirror",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.cybermirror.org/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.cybermirror.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror007 => {
+ orgName => "Spline",
+ isoCC => "DE",
+ subRegion => "FU",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://rem.spline.de/tor/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror008 => {
+ orgName => "mirror.bsdhost.eu",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://mirror.bsdhost.eu/www.torproject.org/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://mirror.bsdhost.eu/www.torproject.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror009 => {
+ orgName => "onionland",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://mirror.onionland.org/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "rsync: mirror.onionland.org::tor/",
+ httpDistMirror => "http://mirror.onionland.org/dist/",
+ rsyncDistMirror => "rsync: mirror.onionland.org::tor/dist/",
+ updateDate => "",
+ },
+
+ mirror010 => {
+ orgName => "plentyfact",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.plentyfact.net/",
+ ftpWebsiteMirror => "",
+ httpsWebsiteMirror => "https://tor.plentyfact.net/",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.plentyfact.net/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror011 => {
+ orgName => "loxal.net",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor-anonymizer.mirror.loxal.net/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor-anonymizer.mirror.loxal.net/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror012 => {
+ orgName => "centervenus.com",
+ isoCC => "DE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://www.centervenus.com/mirrors/tor/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror013 => {
+ orgName => "zdg-gmbh.eu",
+ isoCC => "DK",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.zdg-gmbh.eu/",
+ ftpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ httpDistMirror => "http://tor.zdg-gmbh.eu/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror014 => {
+ orgName => "CRAN",
+ isoCC => "FR",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.miroir-francais.fr/",
+ rsyncWebsiteMirror => "rsync: miroir-francais.fr::tor",
+ ftpWebsiteMirror => "ftp://miroir-francais.fr/pub/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror015 => {
+ orgName => "tor.newworldorder.com.es",
+ isoCC => "HU",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.newworldorder.com.es/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror016 => {
+ orgName => "amorphis.eu",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.amorphis.eu/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://tor.amorphis.eu/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror017 => {
+ orgName => "BIT BV",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://ftp.bit.nl/mirror/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "ftp://ftp.bit.nl/mirror/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror018 => {
+ orgName => "CCC",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.ccc.de/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://tor.ccc.de/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror018 => {
+ orgName => "kamagurka.org",
+ isoCC => "NL",
+ subRegion => "Haarlem",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.kamagurka.org/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://tor.kamagurka.org/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror019 => {
+ orgName => "OS Mirror",
+ isoCC => "NL",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.osmirror.nl/",
+ rsyncWebsiteMirror => "rsync: rsync.osmirror.nl::tor/",
+ ftpWebsiteMirror => "ftp://ftp.osmirror.nl/pub/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+
+ mirror020 => {
+ orgName => "Meulie.net",
+ isoCC => "NO",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.meulie.net/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror021 => {
+ orgName => "Swedish Linux Society",
+ isoCC => "SE",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://ftp.se.linux.org/crypto/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "ftp://ftp.se.linux.org/pub/crypto/tor/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror022 => {
+ orgName => "Ghirai.com",
+ isoCC => "UK",
+ subRegion => "London",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://www.ghirai.com/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror023 => {
+ orgName => "BJWOnline.com",
+ isoCC => "US",
+ subRegion => "California",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://mirror.bjwonline.com/tor/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror024 => {
+ orgName => "Libertarian Action Network",
+ isoCC => "",
+ subRegion => "",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "ftp://libertarianactivism.com/tor.eff.org/dist/",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror025 => {
+ orgName => "TheOnionRouter.com",
+ isoCC => "US",
+ subRegion => "Texas",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://www.theonionrouter.com/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "http://www.theonionrouter.com/dist/",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror026 => {
+ orgName => "Site2nd.org",
+ isoCC => "USA",
+ subRegion => "Texas",
+ region => "North America",
+ ipv4 => "True",
+ ipv6 => "False",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.site2nd.org",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror027 => {
+ adminContact => "jeroen\@unfix.org",
+ orgName => "unfix",
+ isoCC => "CH",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "True",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.unfix.org/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+
+ mirror028 => {
+ adminContact => "jeroen\@unfix.org",
+ orgName => "sixx",
+ isoCC => "",
+ subRegion => "",
+ region => "Europe",
+ ipv4 => "True",
+ ipv6 => "True",
+ loadBalanced => "Unknown",
+ httpWebsiteMirror => "http://tor.sixxs.net/",
+ rsyncWebsiteMirror => "",
+ ftpWebsiteMirror => "",
+ httpDistMirror => "",
+ rsyncDistMirror => "",
+ updateDate => "",
+ },
+);
+
+my $count = values %m;
+print "We have a total of $count mirrors\n";
+print "Fetching the last updated date for each mirror.\n";
+
+foreach my $server ( keys %m ) {
+
+ print "Attempting to fetch from $m{$server}{'orgName'}\n";
+
+ if ($m{$server}{'httpWebsiteMirror'}) {
+ print "Attempt to fetch via HTTP.\n";
+ $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpWebsiteMirror'}");
+ } elsif ($m{$server}{'httpsWebsiteMirror'}) {
+ print "Attempt to fetch via HTTPS.\n";
+ $m{$server}{"updateDate"} = FetchDate("$m{$server}{'httpsWebsiteMirror'}");
+ } elsif ($m{$server}{'ftpWebsiteMirror'}) {
+ print "Attempt to fetch via FTP.\n";
+ $m{$server}{"updateDate"} = FetchDate("$m{$server}{'ftpWebsiteMirror'}");
+ } else {
+ print "We were unable to fetch or store anything. We still have the following: $m{$server}{'updateDate'}\n";
+ }
+
+ print "We fetched and stored the following: $m{$server}{'updateDate'}\n";
+
+ }
+
+
+print "We sorted the following mirrors by their date of last update: \n";
+foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
+
+ print "\n";
+ print "Mirror $m{$server}{'orgName'}: \n";
+
+ foreach my $attrib ( sort keys %{$m{$server}} ) {
+ print "$attrib = $m{$server}{$attrib}";
+ print "\n";
+ };
+}
+
+my $outFile = "include/mirrors-table.wmi";
+my $html;
+open(OUT, "> $outFile") or die "Can't open $outFile: $!";
+
+# Here's where we open a file and print some wml include goodness
+# This is storted from last known recent update to unknown update times
+foreach my $server ( sort { $m{$b}{'updateDate'} <=> $m{$a}{'updateDate'}} keys %m ) {
+
+ # Country Organisation website mirror dist/ mirror
+ print OUT "\n<tr>\n";
+ print OUT " <td>$m{$server}{'isoCC'}</td>\n";
+ print OUT " <td>$m{$server}{'orgName'}</td>\n";
+ print OUT " <td>$m{$server}{'updateDate'}</td>\n";
+
+ foreach my $precious ( "httpWebsiteMirror", "ftpWebsiteMirror",
+ "rsyncWebsiteMirror","httpDistMirror",
+ "rsyncDistMirror" )
+ {
+ if ($m{$server}{"$precious"}) {
+ print OUT " <td><a href=\"" . $m{$server}{$precious} . "\">" .
+ "$m{$server}{$precious}</a></td>\n";
+ } else { print OUT " <td> - </td>\n"; }
+ }
+
+ print OUT "</tr>\n";
+}
+
+close(OUT);
Property changes on: website/trunk/update-mirrors.pl
___________________________________________________________________
Name: svn:executable
+ *
More information about the tor-commits
mailing list