#
# http://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.110 2011/01/14 19:47:53 fabiankeil Exp $
+# $Id: privoxy-log-parser.pl,v 1.135 2012/07/27 17:40:31 fabiankeil Exp $
#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
Force => 'red',
Writing => 'light_green',
Received => 'yellow',
+ Actions => 'yellow',
# ----------------------
URL => 'yellow',
path => 'brown',
# XXX: The Rolling Stones reference has to go.
###############################################################
- my $colour = shift @_;
+ my $colour = shift;
return "" if cli_option_is_set('no-syntax-highlighting');
# Takes a string and returns a span element
###############################################################
- my $type = shift @_;
+ my $type = shift;
my $code;
if ($type =~ /Standard/) {
# markup should always be semantically correct.
###############################################################
- my $type = shift @_;
+ my $type = shift;
my $code;
if ($type =~ /Standard/) {
my $message = shift;
if (LOG_UNPARSED_LINES_TO_EXTRA_FILE) {
- open(my $errorlog_fd, ">>" . ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed";
+ open(my $errorlog_fd, ">>", ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed";
print $errorlog_fd $message;
close($errorlog_fd);
}
or $c =~ m/^keep-alive support is disabled/
or $c =~ m/^Continue hack in da house/
or $c =~ m/^Merged multiple header lines to:/
+ or $c =~ m/^Added header: /
+ or $c =~ m/^Enlisting (?:sorted|left-over) header/
)
{
# XXX: Some of these may need highlighting
# keep-alive support is disabled. Crunching: Keep-Alive: 300.
# Continue hack in da house.
# Merged multiple header lines to: 'X-FORWARDED-PROTO: http X-HOST: 127.0.0.1'
+ # Added header: Content-Encoding: deflate
+ # Enlisting sorted header User-Agent: Mozilla/5.0 (X11; SunOS i86pc; rv:10.0.3) Gecko/20100101 Firefox/10.0.3
+ # Enlisting left-over header Connection: close
} elsif ($c =~ m/^scanning headers for:/) {
# Adding dynamic re_filter job s@^(?:\w*)\s+.*\s+HTTP/\d\.\d\s*@IP-ADDRESS: $origin@D\
# to filter client-ip-address succeeded.
+ } elsif ($c =~ m/^Compressed content from /) {
+
+ # Compressed content from 29258 to 8630 bytes. Compression level: 3
+ $content =~ s@(?<=from )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $content =~ s@(?<=to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $content =~ s@(?<=level: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
} elsif ($c =~ m/^Reading in filter/) {
return '' unless SHOW_FILTER_READIN_IN;
# XXX: assume the same?
$c = highlight_matched_url($c, '(?<=assuming that \")[^"]*');
+ } elsif ($c =~ m/^Percent-encoding redirect/) {
+
+ # Percent-encoding redirect URL: http://www.example.org/\x02
+ $c = highlight_matched_url($c, '(?<=redirect URL: ).*');
+
} else {
found_unknown_content($c);
} elsif ($content =~ m/^(?:No GIF header found|failed while parsing)/) {
# No GIF header found (XXX: Did I ever commit this?)
- # failed while parsing 195 134747048 (XXX: never commited)
+ # failed while parsing 195 134747048 (XXX: never committed)
# Ignore these for now
return '' if SUPPRESS_ACCEPTED_CONNECTIONS;
$c = highlight_matched_host($c, '(?<=connection from ).*');
+ } elsif ($c =~ m/^Closing client socket/) {
+
+ # Closing client socket 5. Keep-alive: 0, Socket alive: 1. Data available: 0.
+ $c = highlight_matched_pattern($c, 'Number', '(?<=socket )\d+');
+ $c = highlight_matched_pattern($c, 'Number', '(?<=Keep-alive: )\d+');
+ $c = highlight_matched_pattern($c, 'Number', '(?<=Socket alive: )\d+');
+ $c = highlight_matched_pattern($c, 'Number', '(?<=available: )\d+');
+
} elsif ($c =~ m/^write header to: .* failed:/) {
# write header to: 10.0.0.1 failed: Broken pipe
$c =~ s@(?<=Timeout is: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Stopped waiting for the request line./) {
+ } elsif ($c =~ m/^Stopped waiting for the request line/ or
+ $c =~ m/^No request line on socket \d received in time/ or
+ $c =~ m/^The client side of the connection on socket \d/) {
# Stopped waiting for the request line. Timeout: 121.
+ # Privoxy 3.0.19 and later:
+ # No request line on socket 5 received in time. Timeout: 1.
+ # The client side of the connection on socket 5 got closed \
+ # without sending a complete request line.
$c =~ s@(?<=Timeout: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^Waiting for \d/) {
# Waiting for up to 4999 bytes from the client.
$c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ } elsif ($c =~ m/^Stopping to watch the client socket/) {
+
+ # Stopping to watch the client socket. There's already another request waiting.
+ # Privoxy 3.0.20 and later:
+ # Stopping to watch the client socket 5. There's already another request waiting.
+ $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
} elsif ($c =~ m/^Looks like we / or
$c =~ m/^Unsetting keep-alive flag/ or
$c =~ m/^No connections to wait/ or
$c =~ m/^The server still wants to talk, but the client hung up on us./ or
$c =~ m/^The server didn't specify how long the connection will stay open/ or
$c =~ m/^There might be a request body. The connection will not be kept alive/ or
- $c =~ m/^Stopping to watch the client socket. There's already another request waiting./ or
$c =~ m/^Done reading from the client\.$/) {
# Looks like we reached the end of the last chunk. We better stop reading.
# The server still wants to talk, but the client hung up on us.
# The server didn't specify how long the connection will stay open. Assume it's only a second.
# There might be a request body. The connection will not be kept alive.
- # Stopping to watch the client socket. There's already another request waiting.
# Done reading from the client\.
} else {
} elsif ($c =~ m/^Decompress(ing deflated|ion didn)/ or
$c =~ m/^Compressed content detected/ or
+ $c =~ m/^SDCH-compressed content detected/ or
$c =~ m/^Tagger/
) {
# Decompressing deflated iob: 117
# Decompression didn't result in any content.
# Compressed content detected, content filtering disabled. Consider recompiling Privoxy\
# with zlib support or enable the prevent-compression action.
+ # SDCH-compressed content detected, content filtering disabled.\
+ # Consider suppressing SDCH offers made by the client.
# Tagger 'complete-url' created empty tag. Ignored.
# Ignored for now
# Request from 10.0.0.1 denied. limit-connect{,} doesn't allow CONNECT requests to port 443.
# Request from 10.0.0.1 marked for blocking. limit-connect{,} doesn't allow CONNECT requests to port 443.
+ # 3.0.18 and later:
+ # Request from 10.0.0.1 marked for blocking. limit-connect{0} doesn't allow CONNECT requests to www.example.org:443
# Malformed server response detected. Downgrading to HTTP/1.0 impossible.
$c =~ s@(?<=Request from )([^\s]*)@$h{'ip-address'}$1$h{'Standard'}@;
$c =~ s@(denied|blocking)@$h{'warning'}$1$h{'Standard'}@;
$c =~ s@(CONNECT)@$h{'method'}$1$h{'Standard'}@;
$c =~ s@(?<=to port )(\d+)@$h{'port'}$1$h{'Standard'}@;
+ $c =~ s@(?<=to )([^\s]+)@$h{'request_'}$1$h{'Standard'}@;
} elsif ($c =~ m/^Status code/) {
my $c = shift;
- if ($c =~ m/^Empty server or forwarder response received on socket \d+./) {
+ if ($c =~ m/^(?:Empty|No) server or forwarder response received on socket \d+\./) {
# Empty server or forwarder response received on socket 4.
# Empty server or forwarder response received on socket 3. \
# Closing client socket 15 without sending data.
+ # Used by Privoxy 3.0.18 and later:
+ # No server or forwarder response received on socket 8. \
+ # Closing client socket 10 without sending data.
+
$c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Didn't receive data in time:/) {
+
+ # Didn't receive data in time: a.fsdn.com:443
+ $c =~ s@(?<=in time: )(.*)@$h{'destination'}$1$h{'Standard'}@;
}
+
# XXX: There are probably more messages that deserve highlighting.
return $c;
# scan: HTTP/1.1 200 OK
$stats{'method'}{$2}++;
- $stats{'ressource'}{$3}++;
+ $stats{'resource'}{$3}++;
$stats{'http-version'}{$4}++;
} elsif ($c =~ m/^scan: Host: ([^\s]+)/) {
sub get_percentage ($$) {
my $big = shift;
my $small = shift;
+
+ # If small is 0 the percentage is always 0%.
+ # Make sure it works even if big is 0 as well.
+ return "0.00%" if ($small eq 0);
+
+ # Prevent division by zero.
+ # XXX: Is this still supposed to be reachable?
return "NaN" if ($big eq 0);
+
return sprintf("%.2f%%", $small / $big * 100);
}
print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n";
} else {
print "Requested URLs:\n";
- foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) {
- if ($stats{'ressource'}{$ressource} < $cli_options{'url-statistics-threshold'}) {
+ foreach my $resource (sort {$stats{'resource'}{$b} <=> $stats{'resource'}{$a}} keys %{$stats{'resource'}}) {
+ if ($stats{'resource'}{$resource} < $cli_options{'url-statistics-threshold'}) {
print "Skipped statistics for URLs below the treshold.\n";
last;
}
- printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource;
+ printf "%d : %s\n", $stats{'resource'}{$resource}, $resource;
}
}
sub print_non_clf_message ($) {
my $content = shift;
- my $msec_string = "." . $req{$t}{'msecs'} unless $no_msecs_mode;
+ my $msec_string = $no_msecs_mode ? '' : '.' . $req{$t}{'msecs'};
my $line_start = $html_output_mode ? '' : $h{"Standard"};
return if DEBUG_SUPPRESS_LOG_MESSAGES;
'Fatal error' => \&handle_loglevel_ignore,
'Writing' => \&handle_loglevel_ignore,
'Received' => \&handle_loglevel_ignore,
+ 'Actions' => \&handle_loglevel_ignore,
'Unknown log level' => \&handle_loglevel_ignore,
);
'Fatal error' => \&handle_loglevel_ignore,
'Writing' => \&handle_loglevel_ignore,
'Received' => \&handle_loglevel_ignore,
+ 'Actions' => \&handle_loglevel_ignore,
'Unknown log level' => \&handle_loglevel_ignore
);
# Log level other than LOG_LEVEL_CLF?
if (m/^(\d{4}-\d{2}-\d{2}|\w{3} \d{2}) (\d\d:\d\d:\d\d)\.?(\d+)? (?:Privoxy\()?([^\)\s]*)[\)]? ([\w -]*): (.*?)\r?$/ or
# LOG_LEVEL_CLF?
- m/^((?:\d+\.\d+\.\d+\.\d+|[:\d]+)) - - \[(.*)\] "(.*)" (\d+) (\d+)/) {
+ m/^((?:\d+\.\d+\.\d+\.\d+)) - - \[(.*)\] "(.*)" (\d+) (\d+)/) {
$log_messages_reached = 1;
print "\n";
} else {
# Wrapped message
+ $_ = "\n". $_ if /^(?:\d+\.\d+\.\d+\.\d+)/;
+ $_ = " " . $_;
}
s@<BR>$@@;
print;
print "\n" unless $log_messages_reached;
}
+ print "\n";
}
sub VersionMessage {
'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'},
'statistics' => \$cli_options{'statistics'},
'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'},
- 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'},
- 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'},
+ 'url-statistics-threshold=i'=> \$cli_options{'url-statistics-threshold'},
+ 'host-statistics-threshold=i'=> \$cli_options{'host-statistics-threshold'},
'version' => sub { VersionMessage && exit(0) },
'help' => \&help,
) or exit(1);
they very well might be. Also note that the results are pretty much guaranteed
to be incorrect if Privoxy and Privoxy-Log-Parser aren't in sync.
-[B<--unbreak-lines-only] Tries to fix lines that got messed up by a broken or
+[B<--unbreak-lines-only>] Tries to fix lines that got messed up by a broken or
interestingly configured mail client and thus are no longer recognized properly.
Only fixes some breakage, but may be good enough or at least better than nothing.
Doesn't do anything else, so you probably want to pipe the output into
B<privoxy-log-parser> again.
-[B<--url-statistics-threshold>] Only show the request count for a ressource
+[B<--url-statistics-threshold>] Only show the request count for a resource
if it's above or equal to the given threshold. If the threshold is 0, URL
statistics are disabled.