[or-cvs] r15574: adapt edgeproxy to rewrite HTML for reverse HTTP proxy into (blossom/trunk)

goodell at seul.org goodell at seul.org
Mon Jun 30 22:16:51 UTC 2008


Author: goodell
Date: 2008-06-30 18:16:51 -0400 (Mon, 30 Jun 2008)
New Revision: 15574

Modified:
   blossom/trunk/edgeproxy
Log:
adapt edgeproxy to rewrite HTML for reverse HTTP proxy into Tor


Modified: blossom/trunk/edgeproxy
===================================================================
--- blossom/trunk/edgeproxy	2008-06-30 22:15:35 UTC (rev 15573)
+++ blossom/trunk/edgeproxy	2008-06-30 22:16:51 UTC (rev 15574)
@@ -23,6 +23,7 @@
 # fwdport -- act as proxy forwarder for dedicated services
 
 use strict;                 # require declarations
+use Fcntl;                  # polipo hack
 use Getopt::Long;           # for option processing
 use Net::hostent;           # by-name interface for host info
 use IO::Socket;             # for creating server and client sockets
@@ -40,6 +41,7 @@
 );
 
 my $DEBUG = 0;              # debug level
+my $PREFIX = undef;         # proxy server host[:port]
 
 ($ME = $0) =~ s,.*/,,;      # retain just basename of script name
 
@@ -56,8 +58,14 @@
         "local=s"     => \$LOCAL,
         "service=s"   => \$SERVICE,
         "debug=s"     => \$DEBUG,
+        "prefix=s"    => \$PREFIX,
     ) or die <<EOUSAGE;
-    usage: $0 [ --remote host ] [ --local interface ] [ --service service ] [ --debug level ]
+    usage: $0
+        [ --remote host ]
+        [ --local interface ]
+        [ --service service ]
+        [ --debug level ]
+        [ --prefix prefix ]
 EOUSAGE
     die "Need remote"                   unless $REMOTE;
     die "Need local or service"         unless $LOCAL || $SERVICE;
@@ -169,7 +177,9 @@
                 my $proxypath = "";
                 my $reverseproxy = undef;
 
+                my $serverhost = <READER>;
                 my $router = <READER>;
+                chomp $serverhost;
                 if($router) {
                     chomp $router;
                 } else {
@@ -192,6 +202,7 @@
                 my $length      = undef;
                 my $content     = "";
                 my $headers     = "";
+                my $data        = "";
                 my $type        = "";
 
                 while(<$remote_server>) {
@@ -211,20 +222,29 @@
                         my $before = "$pre$host$port$post";
 
                         $host = append_exit($host, $router) if $router;
-                        $pre = "$proxypath$pre" if $reverseproxy;
+                        if($PREFIX and $PREFIX ne "$host$port") {
+                            $proxypath = "http://$PREFIX/";
+                            $pre = "$proxypath$pre";
+                        } elsif($reverseproxy) {
+                            $pre = "$proxypath$pre";
+                        }
 
                         my $after = "$pre$host$port$post";
 
                         log_info(2, "converting: Location: $before --> Location: $after");
 
                         $headers .= "Location: $after$rest\r\n";
-                    } elsif($reverseproxy and /^Location: \/(.*)\r$/) {
+                    } elsif(/^Location: \/(.*)\r$/) {
                         my $before = "/$1";
-                        my $after = "$reverseproxy$before";
 
-                        log_info(2, "converting: Location: $before --> Location: $after");
-
-                        $headers .= "Location: $after\n";
+                        if($PREFIX) {
+                            $reverseproxy = "/http://$serverhost";
+                        }
+                        if($reverseproxy) {
+                            my $after = "$reverseproxy$before";
+                            log_info(2, "converting: Location: $before --> Location: $after");
+                            $headers .= "Location: $after\n";
+                        }
                     } elsif(/^\r$/) {
                         $headers .= $_;
                         last;
@@ -233,19 +253,50 @@
                     }
                 }
 
+                if($length) {
+                    log_info(0, "    data: $type [$length]");
+                    read($remote_server, $data, $length) or die "    error: $!";
+                } else {
+                    # chunked transfer coding
+                    # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
+
+                    if($type) {
+                        log_info(0, "    data: $type [chunked]");
+                    } else {
+                        log_info(0, "    data: unspecified type");
+                    }
+                    while(<$remote_server>) {
+                        my ($d, $length) = ("", 0);
+                        ($length = $_) =~ s/;.*$//;
+                        $length =~ s/\r$//;
+                        chomp $length;
+                        $length = hex $length;
+                        last unless $length;
+                        while(length $d < $length) {
+                            $d .= <$remote_server>;
+                        };
+                        $d =~ s/\r$//;
+                        chomp $d;
+                        $data .= $d;
+                    }
+                    $data .= "\r\n";
+                }
+
+                log_info(1, sprintf "    data: received %d bytes", length $data);
+
                 if($html) {
                     log_info(0, "    data: $type (recognized as HTML)");
                     my $next = "";
 
-                    while(<$remote_server>) {
-                        my $line = $_;
-
-                        unless($router or $reverseproxy) {
-                            $content .= $_;
+                    foreach my $line (split /\n/, $data) {
+                        unless($router or $reverseproxy or $PREFIX) {
+                            $content .= $line;
                             next;
                         }
 
-                        $line = "$next $line";
+                        my $space = "";
+                        $space = " " if length $next;
+                        $line = "$next$space$line";
                         chomp $line;
                         $next = "";
 
@@ -264,8 +315,10 @@
 
                                 if($router) {
                                     $host = append_exit($host, $router);
-                                    log_info(1, "<$tag tag: $host>");
                                 }
+                                if($PREFIX and $PREFIX ne "$host$port") {
+                                    $proxypath = "http://$PREFIX/";
+                                }
 
                                 my $after = "$tag$attr$label=$quote$proxypath$pre$host$port";
 
@@ -273,20 +326,23 @@
 
                                 $content .= "<$after";
                                 $line = "$post$rest";
-                            } elsif($reverseproxy and $line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)\/(.*)$/i) {
+                            } elsif($line =~ /^<(a|form|frame|img|input|link)([^>]+)(action|href|src)=(\'?\"?)\/(.*)$/i) {
+                                my $after;
                                 my ($tag, $attr, $label, $quote, $rest) = ($1, $2, $3, $4, $5);
-
                                 my $before = "$tag$attr$label=$quote/";
 
                                 # normalize
                                 $tag =~ y/A-Z/a-z/;
 
-                                my $after = "$tag$attr$label=$quote$reverseproxy/";
-
-                                log_info(2, "converting: <$before --> <$after");
-
-                                $content .= "<$after";
-                                $line = $rest;
+                                if($PREFIX) {
+                                    $reverseproxy = "/http://$serverhost";
+                                }
+                                if($reverseproxy) {
+                                    $after = "$tag$attr$label=$quote$reverseproxy/";
+                                    log_info(2, "converting: <$before --> <$after");
+                                    $content .= "<$after";
+                                    $line = $rest;
+                                }
                             } elsif($line =~ /^(<.*?>)(.*)$/) {
                                 $content .= $1;
                                 $line = $2;
@@ -304,35 +360,23 @@
                         $content .= "\n";
                     }
 
-                    foreach my $line (split /\n/, $headers) {
-                        if($line =~ /^Content-Length: (\S+)\r$/i) {
-                            $length = length $content;
-                            print "Content-Length: $length\n";
-                        } else {
-                            print "$line\n";
+                    # explicitly set Content-Length
+
+                    my @headerlines = split /\n/, $headers;
+                    my @newheaders = ();
+
+                    foreach my $headerline (@headerlines) {
+                        next if $headerline =~ /^Transfer-Encoding: chunked/;
+                        unless($headerline =~ /^Content-Length: (\S+)\r$/i) {
+                            push @newheaders, $headerline if length $headerline > 2;
                         }
                     }
-                    print $content;
-
-                } elsif($length) {
-                    my $data = "";
-                    print $headers;
-
-                    log_info(0, "    data: $type [$length]");
-                    read($remote_server, $data, $length) or die "    error: $?";
-                    print "$data\n";
+                    push @newheaders, sprintf "Content-Length: %d", length $content;
+                    $headers = join "\n", @newheaders;
+                    print "$headers\n\n$content";
                 } else {
-                    print $headers;
-                    if($type) {
-                        log_info(0, "    data: $type");
-                    } else {
-                        log_info(0, "    data: unspecified type");
-                    }
-                    while(<$remote_server>) {
-                        print;
-                    }
+                    print "$headers$data";
                 }
-
                 kill('TERM', $kidpid);          # kill my twin cause we're done
             }
             # this is the fork's child, the master's grandchild
@@ -356,7 +400,10 @@
                             $length = $1;
                             print;
                             log_info(1, "    send: $_");
+                        } elsif(/^Accept-Encoding: /i) {
+                            log_info(1, "    suppress: $_");
                         } elsif(/^Host: (\S+)\r$/i) {
+                            my $h = $1;
                             my $router = "";
                             my $repl = $reverseproxy || $1;
                             if($repl =~ /\.[A-Za-z0-9-]+\.exit(:[0-9]+)?$/) {
@@ -369,7 +416,7 @@
                             }
                             log_info(0, "transmitting router: [$router]");
                             $router .= "+$reverseproxy" if $reverseproxy;
-                            print WRITER "$router\n";
+                            print WRITER "$h\n$router\n";
                             close WRITER;
 
                             $repl = "Host: $repl\r\n";
@@ -464,4 +511,3 @@
     $SIG{CHLD} = \&REAPER;
 };
 
-



More information about the tor-commits mailing list