#!/usr/bin/perl # stfw: Straight Through the Firewall # stick this on your web site and all of a sudden you've got raw # buck-naked Google Thoughtcrime happening on your IP address. # See if they can block us all. PS: works for Google Cache too. # So put that in your pipe and smoke it. ## If you're seeing this Perl script instead of your ## ## search result, either (1) you didn't supply a "q" parameter ## ## in your URL (try the form below) or (2) too many people are ## ## using this copy of this script. Slow down, try back later, or ## ## save this script and send it to someone who has a web server ## ## that can see Google. ## # Copyright 2006, 2010 Don Marti # This is free software. You can redistribute it # and/or modify it under the same terms as Perl itself. # You will probably want to change the name of the script, and put # a basic search form on a page. Something like this: # # Set the "$FAIL_TIMEOUT" and "$SUCCESS_TIMEOUT" below to reduce the # rate at which this script will bug Google. # for background: # http://rconversation.blogs.com/rconversation/2010/01/google-puts-its-foot-down.html use strict; use CGI; use File::Slurp qw(slurp); use HTML::Parser (); use LWP::UserAgent; use Redis; use URI; use URI::QueryParam; my $FAIL_TIMEOUT = 1800; # how long to wait after a failed request my $SUCCESS_TIMEOUT = 6; # how long to wait after a successful request # Filter out tags that are less likely to leave traces # on the user's computer, but no guarantees. Need iframe # for cached pages (?) my %safe_tag = map { $_ => 1 } qw { head body title h1 h2 h3 table td th tr a img div iframe input form textarea u small b large em span nobr cite wbr p noscript br style link ol ul li span/ }; # Same with attributes -- for example we don't want to fire the # JavaScript stuff from the on.* attributes. my %safe_attr = map { $_ => 1 } qw { action class colspan rowspan href id maxlength method name nowrap rel size style title type valign value width }; # Allowed query parameters my %safe_param = map { $_ => 1 } qw { hl q start }; # Set a URL that will lead back here for clicking through more # result pages my $q = new CGI; my $here = $q->url(-relative=>1); # keep track of when we're between tags that we don't want to pass on my %in_sensitive_element; # Start building the text to be passed back to the browser my $clean = ''; # Some rate limiting... my $wait; my $redis = Redis->new; my $when = $redis->get($here); if ($redis->get($here) > time() or !defined($q->param('q'))) { print "Content-type: text/plain\n\n"; print slurp($0); exit(0); } # and let's not mess up the browser usage stats, so pass on # the real user-agent (browser fanboys leave this alone please) my $browser = LWP::UserAgent->new; $browser->agent($q->http('User-agent')); # set up a URI object for the outgoing search my $url = URI->new("http://www.google.com/search"); foreach my $qp ($q->param) { $url->query_param($qp, $q->param($qp)) if $safe_param{$qp}; } my $search = HTTP::Request->new(GET => $url); # do the HTTP request my $result = $browser->request($search); # Hey, Google, don't get mad at us, we'll go away for a while # if rate limited. if ($result->is_success()) { $redis->set($here, time() + $SUCCESS_TIMEOUT); } else { $redis->set($here, time() + $FAIL_TIMEOUT); } # make a parser object, parse out the stuff that's safe* to pass # on, send it back to the user my $p = HTML::Parser->new(api_version => 3, start_h => [\&start, "tagname, attr, text"], end_h => [\&end, "tagname, text"], default_h => [\&text, "text"], ); $p->parse($result->content); print $q->header(), $clean; exit(0); # this is the callback that gets called on an open tag. # try not to send sensitive stuff and do a couple special things # with images and links. sub start { my ($name, $attr, $text) = @_; if (total_sensitive() > 0) { return; } if (!exists($safe_tag{$name})) { # $in_sensitive_element{$name}++; # return; } elsif ($name eq 'img') { if(defined($attr->{'alt'})) { $clean .= $attr->{'alt'}; } return; } if (defined($attr->{'href'})) { $attr->{'href'} =~ s|^(http://[\d\.]+)?/search|$here|; } elsif (defined($attr->{'action'})) { $attr->{'action'} =~ s|^/search|$here|; } foreach my $a (keys(%$attr)) { if (!exists($safe_attr{$a})) { delete($attr->{$a}); } } $clean .= "<$name " . join (' ', map{qq{$_="$attr->{$_}"}} keys(%$attr)) . ">"; } # close tag. Keep track of whether we're in something sensitive. sub end { my ($name, $text) = @_; if ($in_sensitive_element{$name} > 0) { $in_sensitive_element{$name}--; } else { $clean .= $text; } } # build onto the result if we're not in a sensitive place sub text { if (total_sensitive() == 0) { $clean .= shift; } } sub total_sensitive { my $sum = 0; while ( my ($key, $value) = each (%in_sensitive_element)) { $sum += $value; } return($sum); }