#
# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.151 2012/12/24 15:53:26 fabiankeil Exp $
+# $Id: privoxy-log-parser.pl,v 1.161 2013/08/29 11:12:13 fabiankeil Exp $
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
# hash key as input.
# - Add --compress and --decompress options.
#
-# Copyright (c) 2007-2012 Fabian Keil <fk@fabiankeil.de>
+# Copyright (c) 2007-2013 Fabian Keil <fk@fabiankeil.de>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
use Getopt::Long;
use constant {
- PRIVOXY_LOG_PARSER_VERSION => '0.7',
+ PRIVOXY_LOG_PARSER_VERSION => '0.8',
# Feel free to mess with these ...
DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black')
DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black')
CLI_OPTION_NO_SYNTAX_HIGHLIGHTING => 0,
CLI_OPTION_SHORTEN_THREAD_IDS => 0,
CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0,
- CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0,
CLI_OPTION_STATISTICS => 0,
CLI_OPTION_STRICT_CHECKS => 0,
CLI_OPTION_UNBREAK_LINES_ONLY => 0,
CLI_OPTION_URL_STATISTICS_THRESHOLD => 0,
CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0,
+ CLI_OPTION_SHOW_COMPLETE_REQUEST_DISTRIBUTION => 0,
SUPPRESS_SUCCEEDED_FILTER_ADDITIONS => 1,
SHOW_SCAN_INTRO => 0,
my $unknown = shift;
my $message;
- return if cli_option_is_set('accept-unknown-messages');
+ return unless cli_option_is_set('strict-checks');
return if ($unknown =~ /\[too long, truncated\]$/);
$rl = h('invalid-request') . $rl . h('Standard');
- } elsif ($rl =~ m/^([-\w]+) (.*) (HTTP\/\d\.\d)/) {
+ } elsif ($rl =~ m/^([-\w]+) (.*) (HTTP\/\d+\.\d+)/) {
# XXX: might not match in case of HTTP method fuzzing.
# XXX: save these: ($method, $path, $http_version) = ($1, $2, $3);
$c =~ s@(?<=Drained )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Tainting client socket/) {
+ } elsif ($c =~ m/^Tainting client socket/ or
+ $c =~ m/^Failed to shutdown socket/) {
# Tainting client socket 7 due to unread data.
+ # Failed to shutdown socket 11: Connection reset by peer
+
$c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^Shifting \d+ pipelined bytes/) {
# Reloading configuration file '/usr/local/etc/privoxy/config'
$c =~ s@(?<=loading configuration file \')([^\']*)@$h{'file'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Loading (actions|filter) file: /) {
+ } elsif ($c =~ m/^Loading (actions|filter|trust) file: /) {
# Loading actions file: /usr/local/etc/privoxy/default.action
# Loading filter file: /usr/local/etc/privoxy/default.filter
+ # Loading trust file: /usr/local/etc/privoxy/trust
+
$c =~ s@(?<= file: )(.*)$@$h{'file'}$1$h{'Standard'}@;
} elsif ($c =~ m/^exiting by signal/) {
$thread_data{$thread}{'new_connection'} = 0;
$stats{'reused-connections'}++;
+
+ } elsif ($c =~ m/^Closing client socket \d+. .* Requests received: (\d+)\.$/) {
+
+ # Closing client socket 12. Keep-alive: 1. Socket alive: 1. Data available: 0. \
+ # Configuration file change detected: 0. Requests received: 14.
+
+ $stats{'client-requests-on-connection'}{$1}++;
+ $stats{'closed-client-connections'}++;
}
}
'blocked' => 0,
'reused-connections' => 0,
'server-keep-alive' => 0,
+ 'closed-client-connections' => 0,
);
+ $stats{'client-requests-on-connection'}{1} = 0;
}
sub get_percentage ($$) {
our %cli_options;
my $new_connections = $stats{requests} - $stats{crunches} - $stats{'reused-connections'};
my $outgoing_requests = $stats{requests} - $stats{crunches};
+ my $client_requests_checksum = 0;
if ($stats{requests} eq 0) {
print "No requests yet.\n";
$stats{'empty-responses-on-reused-connections'} . " (" .
get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) .
")\n";
+ print "Client connections: " . $stats{'closed-client-connections'} . "\n";
+
+ my $lines_printed = 0;
+ print "Client requests per connection distribution:\n";
+ foreach my $client_requests (sort {
+ $stats{'client-requests-on-connection'}{$b} <=> $stats{'client-requests-on-connection'}{$a}}
+ keys %{$stats{'client-requests-on-connection'}
+ })
+ {
+ my $count = $stats{'client-requests-on-connection'}{$client_requests};
+ $client_requests_checksum += $count * $client_requests;
+ if ($cli_options{'show-complete-request-distribution'} or ($lines_printed < 10)) {
+ printf "%8d: %d\n", $count, $client_requests;
+ $lines_printed++;
+ }
+ }
+ unless ($cli_options{'show-complete-request-distribution'}) {
+ printf "Enable --show-complete-request-distribution to get less common numbers as well.\n";
+ }
+ # Due to log rotation we may not have a complete picture for all the requests
+ printf "Improperly accounted requests: ~%d\n", abs($stats{requests} - $client_requests_checksum);
if ($stats{method} eq 0) {
print "No response lines parsed yet yet.\n";
(undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5);
# Skip LOG_LEVEL_CLF
- next if ($time_stamp eq "-");
+ next if (not defined($log_level) or $time_stamp eq "-");
if (defined($log_level_handlers{$log_level})) {
my $version_message;
$version_message .= 'Privoxy-Log-Parser ' . PRIVOXY_LOG_PARSER_VERSION . "\n";
- $version_message .= 'Copyright (C) 2007-2010 Fabian Keil <fk@fabiankeil.de>' . "\n";
$version_message .= 'http://www.fabiankeil.de/sourcecode/privoxy-log-parser/' . "\n";
print $version_message;
'no-msecs' => CLI_OPTION_NO_MSECS,
'shorten-thread-ids' => CLI_OPTION_SHORTEN_THREAD_IDS,
'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS,
- 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES,
'statistics' => CLI_OPTION_STATISTICS,
'strict-checks' => CLI_OPTION_STRICT_CHECKS,
'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
'unbreak-lines-only' => CLI_OPTION_UNBREAK_LINES_ONLY,
'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD,
+ 'show-complete-request-distribution' => CLI_OPTION_SHOW_COMPLETE_REQUEST_DISTRIBUTION,
);
GetOptions (
'no-msecs' => \$cli_options{'no-msecs'},
'shorten-thread-ids' => \$cli_options{'shorten-thread-ids'},
'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'},
- 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
'strict-checks' => \$cli_options{'strict-checks'},
'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'},
'url-statistics-threshold=i'=> \$cli_options{'url-statistics-threshold'},
'host-statistics-threshold=i'=> \$cli_options{'host-statistics-threshold'},
+ 'show-complete-request-distribution' => \$cli_options{'show-complete-request-distribution'},
'version' => sub { VersionMessage && exit(0) },
'help' => \&help,
) or exit(1);
print << " EOF"
Options and their default values if they have any:
- [--accept-unknown-messages]
[--host-statistics-threshold $cli_options{'host-statistics-threshold'}]
[--html-output]
[--no-embedded-css]
[--no-syntax-highlighting]
[--shorten-thread-ids]
[--show-ineffective-filters]
+ [--show-complete-request-distribution]
[--statistics]
[--unbreak-lines-only]
[--url-statistics-threshold $cli_options{'url-statistics-threshold'}]
=head1 SYNOPSIS
-B<privoxy-log-parser> [B<--accept-unknown-messages>] [B<--html-output>]
+B<privoxy-log-parser> [B<--html-output>]
[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--statistics>]
[B<--shorten-thread-ids>] [B<--show-ineffective-filters>]
[B<--url-statistics-threshold>] [B<--version>]
=head1 OPTIONS
-[B<--accept-unknown-messages>] Don't print warnings in case of unknown messages,
-just don't highlight them.
-
[B<--host-statistics-threshold>] Only show the request count for a host
if it's above or equal to the given threshold. If the threshold is 0, host
statistics are disabled.
[B<--show-ineffective-filters>] Don't suppress log lines for filters
that didn't modify the content.
+[B<--show-complete-request-distribution>] Show the complete client request
+distribution in the B<--statistics> output. Without this option only the
+ten most common numbers are shown.
+
[B<--statistics>] Gather various statistics instead of syntax highlighting
log messages. This is an experimental feature, if the results look wrong
they very well might be. Also note that the results are pretty much guaranteed
input data and abort if it is unexpected, even if it doesn't affect the
results. Significantly slows the parsing down and is not expected to catch
any problems that matter.
+When highlighting, print warnings in case of unknown messages which can't be
+properly highlighted.
[B<--unbreak-lines-only>] Tries to fix lines that got messed up by a broken or
interestingly configured mail client and thus are no longer recognized properly.