#
# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.82 2010/07/22 14:51:19 fabiankeil Exp $
+# $Id: privoxy-log-parser.pl,v 1.98 2010/11/06 13:27:45 fabiankeil Exp $
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
# - Handle incomplete input without Perl warning about undefined variables.
# - Use generic highlighting function that takes a regex and the
# hash key as input.
+# - Add --compress and --decompress options.
#
-# Copyright (c) 2007-2009 Fabian Keil <fk@fabiankeil.de>
+# Copyright (c) 2007-2010 Fabian Keil <fk@fabiankeil.de>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0,
CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0,
CLI_OPTION_STATISTICS => 0,
+ CLI_OPTION_URL_STATISTICS_THRESHOLD => 0,
SUPPRESS_SUCCEEDED_FILTER_ADDITIONS => 1,
SHOW_SCAN_INTRO => 0,
'Gif-Deanimate' => 'blue',
Force => 'red',
Writing => 'light_green',
+ Received => 'yellow',
# ----------------------
URL => 'yellow',
path => 'brown',
update_header_highlight_regex($header);
}
- } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) {
+ } elsif ($c =~ m/^(scan: )(\w+ .+ HTTP\/\d\.\d)/) {
- # Client request line
- # Save for statistics (XXX: Not implemented yet)
- $req{$t}{'method'} = $2;
- $req{$t}{'destination'} = $3;
- $req{$t}{'http-version'} = $4;
-
- $c = highlight_request_line($1);
+ # scan: HTTP/1.1 200 OK
+ $c = $1 . highlight_request_line($2);
} elsif ($c =~ m/^(scan: )((?:HTTP\/\d\.\d|ICY) (\d+) (.*))/) {
$c =~ s@(?<=Received )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=expecting )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Connection from/) {
+ } elsif ($c =~ m/^(Rejecting c|C)onnection from/) {
# Connection from 81.163.28.218 dropped due to ACL
- $c =~ s@(?<=^Connection from )((?:\d+\.?){4})@$h{'Number'}$1$h{'Standard'}@;
+ # Rejecting connection from 178.63.152.227. Maximum number of connections reached.
+ $c =~ s@(?<=onnection from )((?:\d+\.?){3}\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^(?:Reusing|Closing) server socket \d./ or
$c =~ m/^No additional client request/) {
$stats{requests}++;
$stats{crunches}++;
+
+ if ($c =~ m/^Redirected:/) {
+ # Redirected: http://www.example.org/http://p.p/
+ $stats{'fast-redirections'}++;
+
+ } elsif ($c =~ m/^Blocked:/) {
+ # Blocked: blogger.googleusercontent.com:443
+ $stats{'blocked'}++;
+ }
}
# A HTTP/1.1 response without Connection header implies keep-alive.
# Keeping the server header 'Connection: keep-alive' around.
$stats{'server-keep-alive'}++;
+
+ } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) {
+
+ # scan: HTTP/1.1 200 OK
+ $stats{'method'}{$2}++;
+ $stats{'ressource'}{$3}++;
+ $stats{'http-version'}{$4}++;
}
}
'empty-responses' => 0,
'empty-responses-on-new-connections' => 0,
'empty-responses-on-reused-connections' => 0,
+ 'fast-redirections' => 0,
+ 'blocked' => 0,
+ 'reused-connections' => 0,
+ 'server-keep-alive' => 0,
);
}
sub print_stats () {
our %stats;
+ our %cli_options;
my $new_connections = $stats{requests} - $stats{crunches} - $stats{'reused-connections'};
my $outgoing_requests = $stats{requests} - $stats{crunches};
print "Client requests total: " . $stats{requests} . "\n";
print "Crunches: " . $stats{crunches} . " (" .
get_percentage($stats{requests}, $stats{crunches}) . ")\n";
+ print "Blocks: " . $stats{'blocked'} . " (" .
+ get_percentage($stats{requests}, $stats{'blocked'}) . ")\n";
+ print "Fast redirections: " . $stats{'fast-redirections'} . " (" .
+ get_percentage($stats{requests}, $stats{'fast-redirections'}) . ")\n";
print "Outgoing requests: " . $outgoing_requests . " (" .
get_percentage($stats{requests}, $outgoing_requests) . ")\n";
print "Server keep-alive offers: " . $stats{'server-keep-alive'} . " (" .
print "New outgoing connections: " . $new_connections . " (" .
get_percentage($stats{requests}, $new_connections) . ")\n";
print "Reused connections: " . $stats{'reused-connections'} . " (" .
- get_percentage($stats{requests}, $stats{'reused-connections'}) . ")\n";
+ get_percentage($stats{requests}, $stats{'reused-connections'}) .
+ "; server offers accepted: " .
+ get_percentage($stats{'server-keep-alive'}, $stats{'reused-connections'}) . ")\n";
print "Empty responses: " . $stats{'empty-responses'} . " (" .
get_percentage($stats{requests}, $stats{'empty-responses'}) . ")\n";
print "Empty responses on new connections: "
$stats{'empty-responses-on-reused-connections'} . " (" .
get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) .
")\n";
+
+ if ($stats{method} eq 0) {
+ print "No response lines parsed yet yet.\n";
+ return;
+ }
+ print "Method distribution:\n";
+ foreach my $method (sort {$stats{'method'}{$b} <=> $stats{'method'}{$a}} keys %{$stats{'method'}}) {
+ printf "%8d : %-8s\n", $stats{'method'}{$method}, $method;
+ }
+ print "Client HTTP versions:\n";
+ foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
+ printf "%d : %s\n", $stats{'http-version'}{$http_version}, $http_version;
+ }
+
+ if ($cli_options{'url-statistics-threshold'} == 0) {
+ print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n";
+ } else {
+ print "Requested URLs:\n";
+ foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) {
+ if ($stats{'ressource'}{$ressource} < $cli_options{'url-statistics-threshold'}) {
+ print "Skipped statistics for URLs below the treshold.\n";
+ last;
+ }
+ printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource;
+ }
+ }
}
'Error' => \&handle_loglevel_error,
'Fatal error' => \&handle_loglevel_ignore,
'Writing' => \&handle_loglevel_ignore,
+ 'Received' => \&handle_loglevel_ignore,
'Unknown log level' => \&handle_loglevel_ignore,
);
'Error' => \&gather_loglevel_error_stats,
'Fatal error' => \&handle_loglevel_ignore,
'Writing' => \&handle_loglevel_ignore,
+ 'Received' => \&handle_loglevel_ignore,
'Unknown log level' => \&handle_loglevel_ignore
);
my $version_message;
$version_message .= 'Privoxy-Log-Parser ' . PRIVOXY_LOG_PARSER_VERSION . "\n";
- $version_message .= 'Copyright (C) 2007-2009 Fabian Keil <fk@fabiankeil.de>' . "\n";
+ $version_message .= 'Copyright (C) 2007-2010 Fabian Keil <fk@fabiankeil.de>' . "\n";
$version_message .= 'http://www.fabiankeil.de/sourcecode/privoxy-log-parser/' . "\n";
print $version_message;
'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS,
'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES,
'statistics' => CLI_OPTION_STATISTICS,
+ 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
);
GetOptions (
'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'},
'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
+ 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'},
'version' => sub { VersionMessage && exit(0) },
'help' => \&help,
) or exit(1);
[--shorten-thread-ids]
[--show-ineffective-filters]
[--statistics]
+ [--url-statistics-threshold $cli_options{'url-statistics-threshold'}]
[--title $cli_options{'title'}]
[--version]
see "perldoc $0" for more information
B<privoxy-log-parser> [B<--accept-unknown-messages>] [B<--html-output>]
[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--statistics>]
-[B<--shorten-thread-ids>] [B<--show-ineffective-filters>] [B<--version>]
+[B<--shorten-thread-ids>] [B<--show-ineffective-filters>]
+[B<--url-statistics-threshold>] [B<--version>]
=head1 DESCRIPTION
codes.
[B<--shorten-thread-ids>] Shorten the thread ids to a three-digit decimal number.
-Note that the mapping from thread ids to shortended ids is created at
-run-time and thus varies with the input.
+Note that the mapping from thread ids to shortened ids is created at run-time
+and thus varies with the input.
[B<--show-ineffective-filters>] Don't suppress log lines for filters
that didn't modify the content.
they very well might be. Also note that the results are pretty much guaranteed
to be incorrect if Privoxy and Privoxy-Log-Parser aren't in sync.
+[B<--url-statistics-threshold>] Only show the request count for a ressource
+if it's above or equal to the given threshold. If the threshold is 0, URL
+statistics are disabled.
+
[B<--version>] Print version and exit.
=head1 EXAMPLES