#
# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.85 2010/07/22 14:56:33 fabiankeil Exp $
+# $Id: privoxy-log-parser.pl,v 1.92 2010/08/28 13:22:00 fabiankeil Exp $
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
# - Handle incomplete input without Perl warning about undefined variables.
# - Use generic highlighting function that takes a regex and the
# hash key as input.
+# - Add --compress and --decompress options.
#
# Copyright (c) 2007-2010 Fabian Keil <fk@fabiankeil.de>
#
CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0,
CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0,
CLI_OPTION_STATISTICS => 0,
+ CLI_OPTION_URL_STATISTICS_THRESHOLD => 0,
SUPPRESS_SUCCEEDED_FILTER_ADDITIONS => 1,
SHOW_SCAN_INTRO => 0,
update_header_highlight_regex($header);
}
- } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) {
+ } elsif ($c =~ m/^(scan: )(\w+ .+ HTTP\/\d\.\d)/) {
- # Client request line
- # Save for statistics (XXX: Not implemented yet)
- $req{$t}{'method'} = $2;
- $req{$t}{'destination'} = $3;
- $req{$t}{'http-version'} = $4;
-
- $c = highlight_request_line($1);
+ # scan: HTTP/1.1 200 OK
+ $c = $1 . highlight_request_line($2);
} elsif ($c =~ m/^(scan: )((?:HTTP\/\d\.\d|ICY) (\d+) (.*))/) {
$c =~ s@(?<=Received )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=expecting )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Connection from/) {
+ } elsif ($c =~ m/^(Rejecting c|C)onnection from/) {
# Connection from 81.163.28.218 dropped due to ACL
- $c =~ s@(?<=^Connection from )((?:\d+\.?){4})@$h{'Number'}$1$h{'Standard'}@;
+ # Rejecting connection from 178.63.152.227. Maximum number of connections reached.
+ $c =~ s@(?<=onnection from )((?:\d+\.?){3}\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^(?:Reusing|Closing) server socket \d./ or
$c =~ m/^No additional client request/) {
# A HTTP/1.1 response without Connection header implies keep-alive.
# Keeping the server header 'Connection: keep-alive' around.
$stats{'server-keep-alive'}++;
+
+ } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) {
+
+ # scan: HTTP/1.1 200 OK
+ $stats{'method'}{$2}++;
+ $stats{'ressource'}{$3}++;
+ $stats{'http-version'}{$4}++;
}
}
sub print_stats () {
our %stats;
+ our %cli_options;
my $new_connections = $stats{requests} - $stats{crunches} - $stats{'reused-connections'};
my $outgoing_requests = $stats{requests} - $stats{crunches};
$stats{'empty-responses-on-reused-connections'} . " (" .
get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) .
")\n";
+
+ if ($stats{method} eq 0) {
+ print "No response lines parsed yet yet.\n";
+ return;
+ }
+ print "Method distribution:\n";
+ foreach my $method (sort {$stats{'method'}{$b} <=> $stats{'method'}{$a}} keys %{$stats{'method'}}) {
+ printf "%8d : %-8s\n", $stats{'method'}{$method}, $method;
+ }
+ print "Client HTTP versions:\n";
+ foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
+ printf "%d : %s\n", $stats{'http-version'}{$http_version}, $http_version;
+ }
+
+ if ($cli_options{'url-statistics-threshold'} == 0) {
+ print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n";
+ } else {
+ print "Requested URLs:\n";
+ foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) {
+ if ($stats{'ressource'}{$ressource} < $cli_options{'url-statistics-threshold'}) {
+ print "Skipped statistics for URLs below the treshold.\n";
+ last;
+ }
+ printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource;
+ }
+ }
}
'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS,
'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES,
'statistics' => CLI_OPTION_STATISTICS,
+ 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
);
GetOptions (
'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'},
'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
+ 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'},
'version' => sub { VersionMessage && exit(0) },
'help' => \&help,
) or exit(1);
[--shorten-thread-ids]
[--show-ineffective-filters]
[--statistics]
+ [--url-statistics-threshold $cli_options{'url-statistics-threshold'}]
[--title $cli_options{'title'}]
[--version]
see "perldoc $0" for more information
B<privoxy-log-parser> [B<--accept-unknown-messages>] [B<--html-output>]
[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--statistics>]
-[B<--shorten-thread-ids>] [B<--show-ineffective-filters>] [B<--version>]
+[B<--shorten-thread-ids>] [B<--show-ineffective-filters>]
+[B<--url-statistics-threshold>] [B<--version>]
=head1 DESCRIPTION
they very well might be. Also note that the results are pretty much guaranteed
to be incorrect if Privoxy and Privoxy-Log-Parser aren't in sync.
+[B<--url-statistics-threshold>] Only show the request count for a ressource
+if it's above or equal to the given threshold. If the threshold is 0, URL
+statistics are disabled.
+
[B<--version>] Print version and exit.
=head1 EXAMPLES