# A parser for Privoxy log messages. For incomplete documentation run
# perldoc privoxy-log-parser(.pl), for fancy screenshots see:
#
-# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
-#
-# $Id: privoxy-log-parser.pl,v 1.155 2013/01/16 16:29:13 fabiankeil Exp $
+# https://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
# hash key as input.
# - Add --compress and --decompress options.
#
-# Copyright (c) 2007-2012 Fabian Keil <fk@fabiankeil.de>
+# Copyright (c) 2007-2017 Fabian Keil <fk@fabiankeil.de>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
use Getopt::Long;
use constant {
- PRIVOXY_LOG_PARSER_VERSION => '0.7',
+ PRIVOXY_LOG_PARSER_VERSION => '0.9',
# Feel free to mess with these ...
DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black')
DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black')
CLI_OPTION_NO_SYNTAX_HIGHLIGHTING => 0,
CLI_OPTION_SHORTEN_THREAD_IDS => 0,
CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0,
- CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0,
CLI_OPTION_STATISTICS => 0,
CLI_OPTION_STRICT_CHECKS => 0,
CLI_OPTION_UNBREAK_LINES_ONLY => 0,
my $unknown = shift;
my $message;
- return if cli_option_is_set('accept-unknown-messages');
+ return unless cli_option_is_set('strict-checks');
return if ($unknown =~ /\[too long, truncated\]$/);
$rl = h('invalid-request') . $rl . h('Standard');
- } elsif ($rl =~ m/^([-\w]+) (.*) (HTTP\/\d\.\d)/) {
+ } elsif ($rl =~ m/^([-\w]+) (.*) (HTTP\/\d+\.\d+)/) {
# XXX: might not match in case of HTTP method fuzzing.
# XXX: save these: ($method, $path, $http_version) = ($1, $2, $3);
} elsif ($c =~ m/^Blocked:/) {
# Blocked: blogger.googleusercontent.com:443
$stats{'blocked'}++;
+
+ } elsif ($c =~ m/^Connection timeout:/) {
+ # Connection timeout: http://c.tile.openstreetmap.org/18/136116/87842.png
+ $stats{'connection-timeout'}++;
+
+ } elsif ($c =~ m/^Connection failure:/) {
+ # Connection failure: http://127.0.0.1:8080/
+ $stats{'connection-failure'}++;
}
}
my ($c, $thread) = @_;
our %stats;
+ our %cli_options;
if ($c =~ m/^A HTTP\/1\.1 response without/ or
$c =~ m/^Keeping the server header 'Connection: keep-alive' around./)
# scan: HTTP/1.1 200 OK
$stats{'method'}{$2}++;
- $stats{'resource'}{$3}++;
+ if ($cli_options{'url-statistics-threshold'} != 0) {
+ $stats{'resource'}{$3}++;
+ }
$stats{'http-version'}{$4}++;
- } elsif ($c =~ m/^scan: Host: ([^\s]+)/) {
+ } elsif ($cli_options{'host-statistics-threshold'} != 0 and
+ $c =~ m/^scan: Host: ([^\s]+)/) {
# scan: Host: p.p
$stats{'hosts'}{$1}++;
'empty-responses-on-reused-connections' => 0,
'fast-redirections' => 0,
'blocked' => 0,
+ 'connection-failure' => 0,
+ 'connection-timeout' => 0,
'reused-connections' => 0,
'server-keep-alive' => 0,
'closed-client-connections' => 0,
get_percentage($stats{requests}, $stats{'blocked'}) . ")\n";
print "Fast redirections: " . $stats{'fast-redirections'} . " (" .
get_percentage($stats{requests}, $stats{'fast-redirections'}) . ")\n";
+ print "Connection timeouts: " . $stats{'connection-timeout'} . " (" .
+ get_percentage($stats{requests}, $stats{'connection-timeout'}) . ")\n";
+ print "Connection failures: " . $stats{'connection-failure'} . " (" .
+ get_percentage($stats{requests}, $stats{'connection-failure'}) . ")\n";
print "Outgoing requests: " . $outgoing_requests . " (" .
get_percentage($stats{requests}, $outgoing_requests) . ")\n";
print "Server keep-alive offers: " . $stats{'server-keep-alive'} . " (" .
unless ($cli_options{'show-complete-request-distribution'}) {
printf "Enable --show-complete-request-distribution to get less common numbers as well.\n";
}
- printf "Unaccounted requests: ~%d\n", $stats{requests} - $client_requests_checksum;
+ # Due to log rotation we may not have a complete picture for all the requests
+ printf "Improperly accounted requests: ~%d\n", abs($stats{requests} - $client_requests_checksum);
- if ($stats{method} eq 0) {
- print "No response lines parsed yet yet.\n";
- return;
- }
- print "Method distribution:\n";
- foreach my $method (sort {$stats{'method'}{$b} <=> $stats{'method'}{$a}} keys %{$stats{'method'}}) {
- printf "%8d : %-8s\n", $stats{'method'}{$method}, $method;
+ if (exists $stats{method}) {
+ print "Method distribution:\n";
+ foreach my $method (sort {$stats{'method'}{$b} <=> $stats{'method'}{$a}} keys %{$stats{'method'}}) {
+ printf "%8d : %-8s\n", $stats{'method'}{$method}, $method;
+ }
+ } else {
+ print "Method distribution unknown. No response headers parsed yet. Is 'debug 8' enabled?\n";
}
print "Client HTTP versions:\n";
foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
(undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5);
# Skip LOG_LEVEL_CLF
- next if ($time_stamp eq "-" or not defined($log_level));
+ next if (not defined($log_level) or $time_stamp eq "-");
if (defined($log_level_handlers{$log_level})) {
my $version_message;
$version_message .= 'Privoxy-Log-Parser ' . PRIVOXY_LOG_PARSER_VERSION . "\n";
- $version_message .= 'Copyright (C) 2007-2010 Fabian Keil <fk@fabiankeil.de>' . "\n";
- $version_message .= 'http://www.fabiankeil.de/sourcecode/privoxy-log-parser/' . "\n";
+ $version_message .= 'https://www.fabiankeil.de/sourcecode/privoxy-log-parser/' . "\n";
print $version_message;
}
'no-msecs' => CLI_OPTION_NO_MSECS,
'shorten-thread-ids' => CLI_OPTION_SHORTEN_THREAD_IDS,
'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS,
- 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES,
'statistics' => CLI_OPTION_STATISTICS,
'strict-checks' => CLI_OPTION_STRICT_CHECKS,
'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
'no-msecs' => \$cli_options{'no-msecs'},
'shorten-thread-ids' => \$cli_options{'shorten-thread-ids'},
'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'},
- 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
'strict-checks' => \$cli_options{'strict-checks'},
'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'},
print << " EOF"
Options and their default values if they have any:
- [--accept-unknown-messages]
[--host-statistics-threshold $cli_options{'host-statistics-threshold'}]
[--html-output]
[--no-embedded-css]
=head1 SYNOPSIS
-B<privoxy-log-parser> [B<--accept-unknown-messages>] [B<--html-output>]
+B<privoxy-log-parser> [B<--html-output>]
[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--statistics>]
[B<--shorten-thread-ids>] [B<--show-ineffective-filters>]
[B<--url-statistics-threshold>] [B<--version>]
=head1 OPTIONS
-[B<--accept-unknown-messages>] Don't print warnings in case of unknown messages,
-just don't highlight them.
-
[B<--host-statistics-threshold>] Only show the request count for a host
if it's above or equal to the given threshold. If the threshold is 0, host
statistics are disabled.
input data and abort if it is unexpected, even if it doesn't affect the
results. Significantly slows the parsing down and is not expected to catch
any problems that matter.
+When highlighting, print warnings in case of unknown messages which can't be
+properly highlighted.
[B<--unbreak-lines-only>] Tries to fix lines that got messed up by a broken or
interestingly configured mail client and thus are no longer recognized properly.