[or-cvs] r15574: adapt edgeproxy to rewrite HTML for reverse HTTP proxy into (blossom/trunk)
goodell at seul.org
goodell at seul.org
Mon Jun 30 22:16:51 UTC 2008
Author: goodell
Date: 2008-06-30 18:16:51 -0400 (Mon, 30 Jun 2008)
New Revision: 15574
Modified:
blossom/trunk/edgeproxy
Log:
adapt edgeproxy to rewrite HTML for reverse HTTP proxy into Tor
Modified: blossom/trunk/edgeproxy
===================================================================
--- blossom/trunk/edgeproxy 2008-06-30 22:15:35 UTC (rev 15573)
+++ blossom/trunk/edgeproxy 2008-06-30 22:16:51 UTC (rev 15574)
@@ -23,6 +23,7 @@
# fwdport -- act as proxy forwarder for dedicated services
use strict; # require declarations
+use Fcntl; # polipo hack
use Getopt::Long; # for option processing
use Net::hostent; # by-name interface for host info
use IO::Socket; # for creating server and client sockets
@@ -40,6 +41,7 @@
);
my $DEBUG = 0; # debug level
+my $PREFIX = undef; # proxy server host[:port]
($ME = $0) =~ s,.*/,,; # retain just basename of script name
@@ -56,8 +58,14 @@
"local=s" => \$LOCAL,
"service=s" => \$SERVICE,
"debug=s" => \$DEBUG,
+ "prefix=s" => \$PREFIX,
) or die <<EOUSAGE;
- usage: $0 [ --remote host ] [ --local interface ] [ --service service ] [ --debug level ]
+ usage: $0
+ [ --remote host ]
+ [ --local interface ]
+ [ --service service ]
+ [ --debug level ]
+ [ --prefix prefix ]
EOUSAGE
die "Need remote" unless $REMOTE;
die "Need local or service" unless $LOCAL || $SERVICE;
@@ -169,7 +177,9 @@
my $proxypath = "";
my $reverseproxy = undef;
+ my $serverhost = <READER>;
my $router = <READER>;
+ chomp $serverhost;
if($router) {
chomp $router;
} else {
@@ -192,6 +202,7 @@
my $length = undef;
my $content = "";
my $headers = "";
+ my $data = "";
my $type = "";
while(<$remote_server>) {
@@ -211,20 +222,29 @@
my $before = "$pre$host$port$post";
$host = append_exit($host, $router) if $router;
- $pre = "$proxypath$pre" if $reverseproxy;
+ if($PREFIX and $PREFIX ne "$host$port") {
+ $proxypath = "http://$PREFIX/";
+ $pre = "$proxypath$pre";
+ } elsif($reverseproxy) {
+ $pre = "$proxypath$pre";
+ }
my $after = "$pre$host$port$post";
log_info(2, "converting: Location: $before --> Location: $after");
$headers .= "Location: $after$rest\r\n";
- } elsif($reverseproxy and /^Location: \/(.*)\r$/) {
+ } elsif(/^Location: \/(.*)\r$/) {
my $before = "/$1";
- my $after = "$reverseproxy$before";
- log_info(2, "converting: Location: $before --> Location: $after");
-
- $headers .= "Location: $after\n";
+ if($PREFIX) {
+ $reverseproxy = "/http://$serverhost";
+ }
+ if($reverseproxy) {
+ my $after = "$reverseproxy$before";
+ log_info(2, "converting: Location: $before --> Location: $after");
+ $headers .= "Location: $after\n";
+ }
} elsif(/^\r$/) {
$headers .= $_;
last;
@@ -233,19 +253,50 @@
}
}
+ if($length) {
+ log_info(0, " data: $type [$length]");
+ read($remote_server, $data, $length) or die " error: $!";
+ } else {
+ # chunked transfer coding
+ # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
+
+ if($type) {
+ log_info(0, " data: $type [chunked]");
+ } else {
+ log_info(0, " data: unspecified type");
+ }
+ while(<$remote_server>) {
+ my ($d, $length) = ("", 0);
+ ($length = $_) =~ s/;.*$//;
+ $length =~ s/\r$//;
+ chomp $length;
+ $length = hex $length;
+ last unless $length;
+ while(length $d < $length) {
+ $d .= <$remote_server>;
+ };
+ $d =~ s/\r$//;
+ chomp $d;
+ $data .= $d;
+ }
+ $data .= "\r\n";
+ }
+
+ log_info(1, sprintf " data: received %d bytes", length $data);
+
if($html) {
log_info(0, " data: $type (recognized as HTML)");
my $next = "";
- while(<$remote_server>) {
- my $line = $_;
-
- unless($router or $reverseproxy) {
- $content .= $_;
+ foreach my $line (split /\n/, $data) {
+ unless($router or $reverseproxy or $PREFIX) {
+ $content .= $line;
next;
}
- $line = "$next $line";
+ my $space = "";
+ $space = " " if length $next;
+ $line = "$next$space$line";
chomp $line;
$next = "";
@@ -264,8 +315,10 @@
if($router) {
$host = append_exit($host, $router);
- log_info(1, "<$tag tag: $host>");
}
+ if($PREFIX and $PREFIX ne "$host$port") {
+ $proxypath = "http://$PREFIX/";
+ }
my $after = "$tag$attr$label=$quote$proxypath$pre$host$port";
@@ -273,20 +326,23 @@
$content .= "<$after";
$line = "$post$rest";
- } elsif($reverseproxy and $line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)\/(.*)$/i) {
+ } elsif($line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)\/(.*)$/i) {
+ my $after;
my ($tag, $attr, $label, $quote, $rest) = ($1, $2, $3, $4, $5);
-
my $before = "$tag$attr$label=$quote/";
# normalize
$tag =~ y/A-Z/a-z/;
- my $after = "$tag$attr$label=$quote$reverseproxy/";
-
- log_info(2, "converting: <$before --> <$after");
-
- $content .= "<$after";
- $line = $rest;
+ if($PREFIX) {
+ $reverseproxy = "/http://$serverhost";
+ }
+ if($reverseproxy) {
+ $after = "$tag$attr$label=$quote$reverseproxy/";
+ log_info(2, "converting: <$before --> <$after");
+ $content .= "<$after";
+ $line = $rest;
+ }
} elsif($line =~ /^(<.*?>)(.*)$/) {
$content .= $1;
$line = $2;
@@ -304,35 +360,23 @@
$content .= "\n";
}
- foreach my $line (split /\n/, $headers) {
- if($line =~ /^Content-Length: (\S+)\r$/i) {
- $length = length $content;
- print "Content-Length: $length\n";
- } else {
- print "$line\n";
+ # explicitly set Content-Length
+
+ my @headerlines = split /\n/, $headers;
+ my @newheaders = ();
+
+ foreach my $headerline (@headerlines) {
+ next if $headerline =~ /^Transfer-Encoding: chunked/;
+ unless($headerline =~ /^Content-Length: (\S+)\r$/i) {
+ push @newheaders, $headerline if length $headerline > 2;
}
}
- print $content;
-
- } elsif($length) {
- my $data = "";
- print $headers;
-
- log_info(0, " data: $type [$length]");
- read($remote_server, $data, $length) or die " error: $?";
- print "$data\n";
+ push @newheaders, sprintf "Content-Length: %d", length $content;
+ $headers = join "\n", @newheaders;
+ print "$headers\n\n$content";
} else {
- print $headers;
- if($type) {
- log_info(0, " data: $type");
- } else {
- log_info(0, " data: unspecified type");
- }
- while(<$remote_server>) {
- print;
- }
+ print "$headers$data";
}
-
kill('TERM', $kidpid); # kill my twin cause we're done
}
# this is the fork's child, the master's grandchild
@@ -356,7 +400,10 @@
$length = $1;
print;
log_info(1, " send: $_");
+ } elsif(/^Accept-Encoding: /i) {
+ log_info(1, " suppress: $_");
} elsif(/^Host: (\S+)\r$/i) {
+ my $h = $1;
my $router = "";
my $repl = $reverseproxy || $1;
if($repl =~ /\.[A-Za-z0-9-]+\.exit(:[0-9]+)?$/) {
@@ -369,7 +416,7 @@
}
log_info(0, "transmitting router: [$router]");
$router .= "+$reverseproxy" if $reverseproxy;
- print WRITER "$router\n";
+ print WRITER "$h\n$router\n";
close WRITER;
$repl = "Host: $repl\r\n";
@@ -464,4 +511,3 @@
$SIG{CHLD} = \&REAPER;
};
-
More information about the tor-commits
mailing list