#
# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.92 2010/08/28 13:22:00 fabiankeil Exp $
+# $Id: privoxy-log-parser.pl,v 1.103 2010/11/08 17:53:29 fabiankeil Exp $
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0,
CLI_OPTION_STATISTICS => 0,
CLI_OPTION_URL_STATISTICS_THRESHOLD => 0,
+ CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0,
SUPPRESS_SUCCEEDED_FILTER_ADDITIONS => 1,
SHOW_SCAN_INTRO => 0,
'action-bits-update' => 'light_red',
'configuration-line' => 'red',
'content-type' => 'yellow',
+ 'HOST' => HEADER_DEFAULT_COLOUR,
);
%h_colours = %h;
} elsif ($c =~ m/^(scan: )(\w+ .+ HTTP\/\d\.\d)/) {
- # scan: HTTP/1.1 200 OK
+ # scan: GET http://p.p/ HTTP/1.1
$c = $1 . highlight_request_line($2);
} elsif ($c =~ m/^(scan: )((?:HTTP\/\d\.\d|ICY) (\d+) (.*))/) {
- # Server response line
+ # scan: HTTP/1.1 200 OK
$req{$t}{'response_line'} = $2;
$req{$t}{'status_code'} = $3;
$req{$t}{'status_message'} = $4;
or $c =~ m/^Removing 'Connection: close' to imply keep-alive./
or $c =~ m/^keep-alive support is disabled/
or $c =~ m/^Continue hack in da house/
+ or $c =~ m/^Merged multiple header lines to:/
)
{
# XXX: Some of these may need highlighting
# Removing 'Connection: close' to imply keep-alive.
# keep-alive support is disabled. Crunching: Keep-Alive: 300.
# Continue hack in da house.
+ # Merged multiple header lines to: 'X-FORWARDED-PROTO: http X-HOST: 127.0.0.1'
} elsif ($c =~ m/^scanning headers for:/) {
$c =~ s@(?<= from )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<= to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ } elsif ($c =~ m/^Killed all-caps Host header line: HOST:/) {
+
+ # Killed all-caps Host header line: HOST: bestproxydb.com
+ $c = highlight_matched_host($c, '(?<=HOST: )[^\s]+');
+ $c = highlight_matched_pattern($c, 'HOST', 'HOST');
+
} else {
found_unknown_content($c);
$c =~ s@(?<=set to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=reading )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ } elsif ($c =~ m/^Reducing expected bytes to /) {
+
+ # Reducing expected bytes to 0. Marking the server socket tainted after throwing 4 bytes away.
+ $c =~ s@(?<=bytes to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=after throwing )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
} elsif ($c =~ m/^Waiting for up to /) {
# Waiting for up to 4999 bytes from the client.
$stats{requests}++;
$stats{crunches}++;
+
+ if ($c =~ m/^Redirected:/) {
+ # Redirected: http://www.example.org/http://p.p/
+ $stats{'fast-redirections'}++;
+
+ } elsif ($c =~ m/^Blocked:/) {
+ # Blocked: blogger.googleusercontent.com:443
+ $stats{'blocked'}++;
+ }
}
$stats{'method'}{$2}++;
$stats{'ressource'}{$3}++;
$stats{'http-version'}{$4}++;
+
+ } elsif ($c =~ m/^scan: Host: ([^\s]+)/) {
+
+ # scan: Host: p.p
+ $stats{'hosts'}{$1}++;
}
}
'empty-responses' => 0,
'empty-responses-on-new-connections' => 0,
'empty-responses-on-reused-connections' => 0,
+ 'fast-redirections' => 0,
+ 'blocked' => 0,
+ 'reused-connections' => 0,
+ 'server-keep-alive' => 0,
);
}
print "Client requests total: " . $stats{requests} . "\n";
print "Crunches: " . $stats{crunches} . " (" .
get_percentage($stats{requests}, $stats{crunches}) . ")\n";
+ print "Blocks: " . $stats{'blocked'} . " (" .
+ get_percentage($stats{requests}, $stats{'blocked'}) . ")\n";
+ print "Fast redirections: " . $stats{'fast-redirections'} . " (" .
+ get_percentage($stats{requests}, $stats{'fast-redirections'}) . ")\n";
print "Outgoing requests: " . $outgoing_requests . " (" .
get_percentage($stats{requests}, $outgoing_requests) . ")\n";
print "Server keep-alive offers: " . $stats{'server-keep-alive'} . " (" .
print "New outgoing connections: " . $new_connections . " (" .
get_percentage($stats{requests}, $new_connections) . ")\n";
print "Reused connections: " . $stats{'reused-connections'} . " (" .
- get_percentage($stats{requests}, $stats{'reused-connections'}) . ")\n";
+ get_percentage($stats{requests}, $stats{'reused-connections'}) .
+ "; server offers accepted: " .
+ get_percentage($stats{'server-keep-alive'}, $stats{'reused-connections'}) . ")\n";
print "Empty responses: " . $stats{'empty-responses'} . " (" .
get_percentage($stats{requests}, $stats{'empty-responses'}) . ")\n";
print "Empty responses on new connections: "
printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource;
}
}
+
+ if ($cli_options{'host-statistics-threshold'} == 0) {
+ print "Host statistics are disabled. Increase --host-statistics-threshold to enable them.\n";
+ } else {
+ print "Requested Hosts:\n";
+ foreach my $host (sort {$stats{'hosts'}{$b} <=> $stats{'hosts'}{$a}} keys %{$stats{'hosts'}}) {
+ if ($stats{'hosts'}{$host} < $cli_options{'host-statistics-threshold'}) {
+ print "Skipped statistics for Hosts below the treshold.\n";
+ last;
+ }
+ printf "%d : %s\n", $stats{'hosts'}{$host}, $host;
+ }
+ }
}
'Error' => \&gather_loglevel_error_stats,
'Fatal error' => \&handle_loglevel_ignore,
'Writing' => \&handle_loglevel_ignore,
+ 'Received' => \&handle_loglevel_ignore,
'Unknown log level' => \&handle_loglevel_ignore
);
'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS,
'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES,
'statistics' => CLI_OPTION_STATISTICS,
- 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
+ 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
+ 'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD,
);
GetOptions (
'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'},
+ 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'},
'version' => sub { VersionMessage && exit(0) },
'help' => \&help,
) or exit(1);
Options and their default values if they have any:
[--accept-unknown-messages]
+ [--host-statistics-threshold $cli_options{'host-statistics-threshold'}]
[--html-output]
[--no-embedded-css]
[--no-msecs]
[B<--accept-unknown-messages>] Don't print warnings in case of unknown messages,
just don't highlight them.
+[B<--host-statistics-threshold>] Only show the request count for a host
+if it's above or equal to the given threshold. If the threshold is 0, host
+statistics are disabled.
+
[B<--html-output>] Use HTML and CSS for the syntax highlighting. If this option is
omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is active.
This option is only intended to make embedding log excerpts in web pages easier.