# hash key as input.
# - Add --compress and --decompress options.
#
-# Copyright (c) 2007-2020 Fabian Keil <fk@fabiankeil.de>
+# Copyright (c) 2007-2022 Fabian Keil <fk@fabiankeil.de>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
use Getopt::Long;
use constant {
- PRIVOXY_LOG_PARSER_VERSION => '0.9.1',
+ PRIVOXY_LOG_PARSER_VERSION => '0.9.5',
# Feel free to mess with these ...
DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black')
DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black')
HEADER_DEFAULT_COLOUR => 'yellow',
REGISTER_HEADERS_WITH_THE_SAME_COLOUR => 1,
+ CLI_OPTION_DETECT_INACTIVITY => 0,
CLI_OPTION_DEFAULT_TO_HTML_OUTPUT => 0,
CLI_OPTION_TITLE => 'Privoxy-Log-Parser in da house',
+ CLI_OPTION_INACTIVITY_THRESHOLD => 100,
+ CLI_OPTION_KEEP_DATE => 0,
CLI_OPTION_NO_EMBEDDED_CSS => 0,
CLI_OPTION_NO_MSECS => 0,
CLI_OPTION_NO_SYNTAX_HIGHLIGHTING => 0,
CLI_OPTION_STRICT_CHECKS => 0,
CLI_OPTION_UNBREAK_LINES_ONLY => 0,
CLI_OPTION_URL_STATISTICS_THRESHOLD => 0,
+ CLI_OPTION_PASSED_REQUEST_STATISTICS_THRESHOLD => 0,
CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0,
CLI_OPTION_SHOW_COMPLETE_REQUEST_DISTRIBUTION => 0,
my $header_highlight_regex = '';
my $html_output_mode;
+my $keep_date_mode;
my $no_msecs_mode; # XXX: should probably be removed
my $shorten_thread_ids;
my $line_end;
'Re-Filter' => 'purple',
Connect => 'brown',
Request => 'light_cyan',
+ Tagging => 'purple',
CGI => 'light_green',
Redirect => 'cyan',
Error => 'light_red',
'pcrs-delimiter' => 'light_red',
'ignored' => 'light_red',
'action-bits-update' => 'light_red',
+ 'http-downgrade' => 'light_red',
'configuration-line' => 'red',
'content-type' => 'yellow',
'HOST' => HEADER_DEFAULT_COLOUR,
+ 'tls-version' => 'pink',
+ 'cipher-suite' => 'light_cyan',
);
%h_colours = %h;
my ($method, $url, $http_version);
#GET http://images.sourceforge.net/sfx/icon_warning.gif HTTP/1.1
- if ($rl =~ m/Invalid request/) {
+ if ($rl =~ m/Invalid request/ or $rl =~ m/Failed reading chunked client body/) {
$rl = h('invalid-request') . $rl . h('Standard');
return '';
}
- $c =~ s@(?<=\(size )(\d+)\)(?= with)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=\(size )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=\(new size )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=produced )(\d+)(?= hits)@$h{'Number'}$1$h{'Standard'}@;
$c .= $req{$t}{'content_size_change'} . $h{'Standard'} . ")";
$content = $c;
+ } elsif ($c =~ m/^filtering request body from client /) {
+
+ # filtering request body from client 127.0.0.1 (size 958) with 'null-filter' produced 0 hits (new size 958).
+
+ $c =~ s@(?<=from client )([^\s]+)@$h{'ip-address'}$1$h{'Standard'}@;
+ $c =~ s@(?<=\(size )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@([^\s]+?)(\'? produced)@$h{'filter'}$1$h{'Standard'}$2@;
+ $c =~ s@(?<=\(new size )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=produced )(\d+)(?= hits)@$h{'Number'}$1$h{'Standard'}@;
+ $content = $c;
+
} elsif ($c =~ /\.{3}$/
and $c =~ m/^(?:re_)?filtering \'?(.*?)\'? \(size (\d*)\) with (?:filter )?\'?([^\s]*?)\'? ?\.{3}$/) {
return '' unless SHOW_FILTER_READIN_IN;
+ } elsif ($c =~ m/^Decompression didn't result/) {
+
+ # Decompression didn't result in any content.
+
+ # Nothing to highlight.
+
} else {
found_unknown_content($content);
return $content;
}
+sub handle_loglevel_tagging($) {
+
+ my $c = shift;
+
+ if ($c =~ /^Tagger \'([^\']*)\' added tag \'([^\']*)\'/ or
+ $c =~ m/^Adding tag \'([^\']*)\' created by header tagger \'([^\']*)\'/) {
+
+ # Adding tag 'GET request' created by header tagger 'method-man' (XXX: no longer used)
+ # Tagger 'revalidation' added tag 'REVALIDATION-REQUEST'. No action bit update necessary.
+ # Tagger 'revalidation' added tag 'REVALIDATION-REQUEST'. Action bits updated accordingly.
+
+ # XXX: Save tag and tagger
+
+ $c =~ s@(?<=^Tagger \')([^\']*)@$h{'tagger'}$1$h{'Standard'}@;
+ $c =~ s@(?<=added tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@;
+ $c =~ s@(?<=Action bits )(updated)@$h{'action-bits-update'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ /^Enlisting tag/) {
+
+ # Enlisting tag 'forward-directly' for client 127.0.0.1.
+
+ $c =~ s@(?<=tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@;
+ $c = highlight_matched_host($c, '[^\s]+(?=\.$)');
+
+ } elsif ($c =~ /^Tag/) {
+
+ # Tag 'change-tor-socks-port' for client 127.0.0.1 expired 1 seconds ago. Deleting it.
+
+ $c =~ s@(?<=Tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@;
+ $c =~ s@(?<=expired )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c = highlight_matched_host($c, '(?<=client )[^\s]+');
+
+ } elsif ($c =~ /^Evaluating/) {
+
+ # Evaluating tag 'change-tor-socks-port' for client 127.0.0.1. End of life 1613162302.
+
+ $c =~ s@(?<=tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@;
+ $c = highlight_matched_host($c, '(?<=client )[^\s]+(?=\.)');
+ $c =~ s@(?<=life )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ /^Client tag/) {
+
+ # Client tag 'forward-directly' matches
+
+ $c =~ s@(?<=tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@;
+
+ }
+
+ return $c;
+}
+
sub handle_loglevel_redirect($) {
my $c = shift;
# Percent-encoding redirect URL: http://www.example.org/\x02
$c = highlight_matched_url($c, '(?<=redirect URL: ).*');
+ } elsif ($c =~ m/^Rewrite detected:/) {
+
+ # Rewrite detected: GET http://10.0.0.2:88/blah.txt HTTP/1.1
+ # Rewrite detected: GET https://www.electrobsd.org/CommonJS/ajax/libs/jquery/3.4.1/jquery.min.js HTTP/1.1
+ $c = highlight_matched_request_line($c, '(?<=^Rewrite detected: ).*');
+
+ } elsif ($c =~ m/^Rewritten request line results in downgrade to http/) {
+
+ # Rewritten request line results in downgrade to http
+ $c =~ s@(downgrade)@$h{'http-downgrade'}$1$h{'Standard'}@;
+
} else {
found_unknown_content($c);
# Highlight crunch reason
foreach my $reason (keys %reason_colours) {
- $content =~ s@($reason)@$reason_colours{$reason}$1$h{'Standard'}@g;
+ # Crunch: Blocked: https://capture.condenastdigital.com/track?_o=cne&[...]&dim2=%7B%22adBlocked%[...]
+ $content =~ s@($reason)@$reason_colours{$reason}$1$h{'Standard'}@;
}
if ($content =~ m/\[too long, truncated\]$/) {
# [...]&filter... [too long, truncated]
$content = highlight_matched_pattern($content, 'request_', '^.*(?=\.\.\. \[too long, truncated\]$)');
+ } elsif ($content =~ m/Certificate error:/) {
+
+ # Certificate error: ASN date error, current date after: https://expired.badssl.com/
+ $content = highlight_matched_pattern($content, 'request_', 'https://.*');
+
} else {
# Blocked: http://ads.example.org/
$c = highlight_matched_host($c, '(?<=for )[^\s]+');
$c =~ s@(?<=in slot )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- } elsif ($c =~ m/^Socket/) {
+ } elsif ($c =~ m/^Socket \d+ (already|closed)/) {
# Socket 16 already forgotten or never remembered.
+ # Socket 9 closed while waiting for client headers
$c =~ s@(?<=Socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^The connection to/) {
$c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^Stopped waiting for the request line/ or
- $c =~ m/^No request line on socket \d received in time/ or
+ $c =~ m/^No request line on socket \d+ received in time/ or
$c =~ m/^The client side of the connection on socket \d/) {
# Stopped waiting for the request line. Timeout: 121.
# Connection from 81.163.28.218 dropped due to ACL
# Rejecting connection from 178.63.152.227. Maximum number of connections reached.
- $c =~ s@(?<=onnection from )((?:\d+\.?){3}\d+)@$h{'Number'}$1$h{'Standard'}@;
+ # Connection from 192.168.2.1 on 127.0.1.1:8118 (socket 3) dropped due to ACL
+ $c = highlight_matched_host($c, '(?<=onnection from )[\d.:]+');
+ $c = highlight_matched_host($c, '(?<=on )[\d.:]+');
+ $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^(?:Reusing|Closing) server socket / or
$c =~ m/^No additional client request/) {
# Reusing server socket 7 connected to www.privoxy.org. Total requests: 2.
# Closing server socket 6 connected to d.asset.soup.io. Keep-alive: 0.\
# Tainted: 1. Socket alive: 1. Timeout: 60. Configuration file change detected: 0.
+ # Reusing server socket 35 connected to nl.wikipedia.org. Requests already sent: 5.
$c =~ s@(?<= socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c = highlight_matched_host($c, '(?<=for )[^\s]+(?=\.)');
for my $number_pattern ('requests', 'Keep-alive', 'Tainted', ' alive', 'Timeout', 'detected') {
$c = highlight_matched_pattern($c, 'Number', '(?<='. $number_pattern . ': )\d+');
}
+ $c =~ s@(?<=already sent: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
} elsif ($c =~ m/^Connected to /) {
} elsif ($c =~ m/^Optimistically sending /) {
# Optimistically sending 318 bytes of client headers intended for www.privoxy.org
+ # Optimistically sending 318 bytes of client headers intended for www.privoxy.org.
$c =~ s@(?<=sending )(\d+)@$h{'Number'}$1$h{'Standard'}@;
- $c = highlight_matched_host($c, '(?<=for )[^\s]+');
+ if ($c =~ /\.$/) {
+ $c = highlight_matched_host($c, '[^\s]+(?=\.)');
+ } else {
+ $c = highlight_matched_host($c, '(?<=for )[^\s]+');
+ }
} elsif ($c =~ m/^Stopping to watch the client socket/) {
$c =~ s@(?<=Shifting )(\d+)@$h{'Number'}$1$h{'Standard'}@;
$c =~ s@(?<=by )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ } elsif ($c =~ m/^Flushed (\d+) bytes of request body while expecting (\d+)/) {
+
+ # Flushed 30 bytes of request body while expecting 30
+ $c =~ s@(?<=Flushed )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=expecting )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Performing the TLS\/SSL handshake with client. Hash of host:/) {
+
+ # Performing the TLS/SSL handshake with client. Hash of host: bab5296b25e256c7b06b92b17b56bcae
+ $c = highlight_matched_host($c, '(?<=Hash of host: ).+');
+
+ } elsif ($c =~ m/^Forwarding \d+ bytes of encrypted POST data/) {
+
+ # Forwarding 1954 bytes of encrypted POST data
+ $c =~ s@(?<=Forwarding )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Forwarded the last \d+ bytes/) {
+
+ # Forwarded the last 1954 bytes
+ $c =~ s@(?<=the last )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Waiting for the next client connection. Currently active threads:/) {
+
+ # Waiting for the next client connection. Currently active threads: 30
+ $c =~ s@(?<=threads: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Data arrived in time on client socket/) {
+
+ # Data arrived in time on client socket 6. Requests so far: 3
+ $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=Requests so far: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Dropping the client connection on socket/) {
+
+ # Dropping the client connection on socket 71. The server connection has not been established yet.
+ # Dropping the client connection on socket 23 with server socket 24 connected to \
+ # www.reddit.com. The forwarder has changed.
+ $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=server socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c = highlight_matched_host($c, '(?<=connected to )[^ ]+(?=\.)');
+
+ } elsif ($c =~ m/^The client socket \d+ has become unusable while the server/) {
+
+ # The client socket 16 has become unusable while the server socket 24 is still open.
+ $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=server socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^The last \d+ bytes of the request body have been read/) {
+
+ # The last 12078 bytes of the request body have been read
+ $c =~ s@(?<=The last )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Flushed \d+ bytes of request body/) {
+
+ # Flushed 3153 bytes of request body
+ $c =~ s@(?<=Flushed )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Complete client request followed by/) {
+
+ # Complete client request followed by 59 bytes of pipelined data received.
+ $c =~ s@(?<=followed by )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^The peer notified us that the connection on socket/) {
+
+ # The peer notified us that the connection on socket 11 is going to be closed
+ $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Client socket \d is no longer usable/) {
+
+ # Client socket 7 is no longer usable. The server socket has been closed.
+ $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Socket timeout \d+ reached/) {
+
+ # Socket timeout 3 reached: http://127.0.0.1:20000/no-filter/chunked-content/36
+ $c =~ s@(?<=timeout )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c = highlight_matched_url($c, "(?<=reached: ).*")
+
+ } elsif ($c =~ m/^Prepared to read up to /) {
+
+ # Prepared to read up to 157 bytes of encrypted request body from the client.
+ $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Forwarding \d+ bytes /) {
+
+ # Forwarding 157 bytes of encrypted request body.
+ $c =~ s@(?<=Forwarding )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Buffering encrypted client body/) {
+
+ # Buffering encrypted client body. Prepared to read up to 2236 bytes.
+ $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^The last \d+ bytes of the encrypted request body have been read/) {
+
+ # The last 6945 bytes of the encrypted request body have been read.
+ $c =~ s@(?<=The last )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Reducing the chunk offset from/) {
+
+ # Reducing the chunk offset from 1096654 to 32704 after discarding 1063950 bytes to make room in the buffer.
+ # Reducing the chunk offset from 16219 to 128 after flushing 16091 bytes.
+ $c =~ s@(?<=\d to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=offset from )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=after discarding )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=after flushing )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Client socket \d+ is no longer usable/) {
+
+ # Client socket 21 is no longer usable. The server socket has been closed.
+ $c =~ s@(?<=Client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^(Client|Server) successfully connected over/) {
+
+ # Server successfully connected over TLSv1.3 (TLS_AES_256_GCM_SHA384).
+ # Client successfully connected over TLSv1.3 (TLS_AES_128_GCM_SHA256).
+ $c =~ s@(?<=connected over )(TLSv\d\.\d)@$h{'tls-version'}$1$h{'Standard'}@;
+ $c =~ s@(?<=\()([^)]+)@$h{'cipher-suite'}$1$h{'Standard'}@;
+
} elsif ($c =~ m/^Looks like we / or
$c =~ m/^Unsetting keep-alive flag/ or
$c =~ m/^No connections to wait/ or
# Didn't receive data in time: a.fsdn.com:443
$c =~ s@(?<=in time: )(.*)@$h{'destination'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Sending data on socket \d+ over TLS/) {
+
+ # Sending data on socket 33 over TLS/SSL failed: no TLS/SSL errors detected
+ $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^Chunk size \d+ exceeds buffered data left/) {
+
+ # Chunk size 291 exceeds buffered data left. Already digested 69894 of 69957 buffered bytes.
+ $c =~ s@(?<=size )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=digested )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ $c =~ s@(?<=of )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^The socks connection timed out after/) {
+
+ # The socks connection timed out after 60 seconds.
+ $c =~ s@(?<=after )(\d+)@$h{'Number'}$1$h{'Standard'}@;
}
# XXX: There are probably more messages that deserve highlighting.
return $c;
}
+sub handle_loglevel_received($) {
+
+ my $c = shift;
+
+ if ($c =~ m/^TLS from socket/) {
+ # TLS from socket 3: \x16\xda\xe2\xa2;\x0d\x0a
+
+ $c =~ s@(?<=TLS from socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^from socket/) {
+ # from socket 3: HEAD http://p.p/ HTTP/1.1\x0d\x0aHost: p.p\x0d\x0aUser-Agent: curl/7.85.0\x0d\x0aAccept: */*\x0d\x0aProxy-Connection: Keep-Alive\x0d\x0a\x0d\x0a
+
+ $c =~ s@(?<=from socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ }
+
+ return $c;
+}
+
+sub handle_loglevel_writing($) {
+
+ my $c = shift;
+
+ if ($c =~ m/^to socket/) {
+ # to socket 11: HTTP/1.1 200 Connection established\x0d\x0a\x0d\x0a
+
+ $c =~ s@(?<=to socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+ } elsif ($c =~ m/^TLS on socket /) {
+ # TLS on socket 9: o~\xfcS[\xfa\x8f\xd6\x96\xe6_\xc7$\x1b[...]
+
+ $c =~ s@(?<=TLS on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+ }
+
+ return $c;
+}
sub handle_loglevel_ignore($) {
return shift;
our %cli_options;
# +0200] "GET https://www.youtube.com/watch?v=JmcA9LIIXWw HTTP/1.1" 200 68004
- $content =~ m/^[+-]\d{4}\] "(\w+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/;
+ # +0200] "VERSION-CONTROL http://p.p/ HTTP/1.1" 200 2787
+ $content =~ m/^[+-]\d{4}\] "([^ ]+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/;
$method = $1;
$resource = $2;
$http_version = $3;
$status_code = $4;
$size = $5;
+ $stats{requests_clf}++;
+
unless (defined $method) {
- print("Failed to parse: $content\n");
+ # +0200] "Invalid request" 400 0
+ return if ($content =~ m/^[+-]\d{4}\] "Invalid request"/);
+ # +0100] "Failed reading chunked client body" 400 0
+ return if ($content =~ m/^[+-]\d{4}\] "Failed reading chunked client body"/);
+ # +0100] "GET https://securepubads.g.doubleclick.net/gampad/ads?gd[...]... [too long, truncated]
+ if ($content =~ m/\[too long, truncated\]$/) {
+ print("Skipped LOG_LEVEL_CLF message that got truncated by Privoxy. Statistics will be inprecise.\n");
+ } else {
+ print("Failed to parse: $content\n");
+ }
return;
}
$stats{'method'}{$method}++;
$stats{'http-version'}{$http_version}++;
if ($cli_options{'host-statistics-threshold'} != 0) {
- $resource =~ m@(?:http[s]://)([^/]+)/?@;
+ $resource =~ m@(?:https?://)?([^/]+)/?@;
$stats{'hosts'}{$1}++;
}
$stats{'content-size-total'} += $size;
}
sub gather_loglevel_request_stats($$) {
- my $c = shift;
+ my $request_url = shift;
my $thread = shift;
our %stats;
+ our %cli_options;
$stats{requests}++;
+ if ($cli_options{'passed-request-statistics-threshold'} != 0) {
+ # If the request get blocked we'll decrement
+ # in gather_loglevel_crunch_stats()
+ chomp $request_url;
+ $stats{'passed-request-url'}{$request_url}++;
+ }
}
sub gather_loglevel_crunch_stats($$) {
my $c = shift;
my $thread = shift;
our %stats;
+ our %cli_options;
$stats{crunches}++;
# Connection failure: http://127.0.0.1:8080/
$stats{'connection-failure'}++;
}
+ if ($cli_options{'passed-request-statistics-threshold'} != 0) {
+ $c =~ m/^[^:]+: (.*)/;
+ if ($stats{'passed-request-url'}{$1}) {
+ $stats{'passed-request-url'}{$1}-- ;
+ if ($stats{'passed-request-url'}{$1} == 0) {
+ delete($stats{'passed-request-url'}{$1});
+ }
+ }
+ }
}
sub init_stats() {
our %stats = (
requests => 0,
+ requests_clf => 0,
crunches => 0,
'server-keep-alive' => 0,
'reused-connections' => 0,
our %stats;
our %cli_options;
my $new_connections = $stats{requests} - $stats{crunches} - $stats{'reused-connections'};
- my $outgoing_requests = $stats{requests} - $stats{crunches};
my $client_requests_checksum = 0;
+ my $requests_total;
+
+ if ($stats{requests_clf} && $stats{requests}
+ && $stats{requests_clf} != $stats{requests}) {
+ print "Inconsistent request counts: " . $stats{requests} . "/" . $stats{requests_clf} . "\n";
+ }
+
+ # To get the total number of requests we can use either the number
+ # of Common-Log-Format lines or the number of "Request:" messages.
+ # We prefer the number of CLF lines if available because using
+ # it works when analysing old log files from Privoxy versions before 3.0.29.
+ # In Privoxy 3.0.28 and earlier "Request:" messages excluded
+ # crunched messages.
+ $requests_total = $stats{requests_clf} ? $stats{requests_clf} : $stats{requests};
- if ($stats{requests} eq 0) {
+ if ($requests_total eq 0) {
print "No requests yet.\n";
return;
}
- print "Client requests total: " . $stats{requests} . "\n";
- print "Crunches: " . $stats{crunches} . " (" .
- get_percentage($stats{requests}, $stats{crunches}) . ")\n";
- print "Blocks: " . $stats{'blocked'} . " (" .
- get_percentage($stats{requests}, $stats{'blocked'}) . ")\n";
- print "Fast redirections: " . $stats{'fast-redirections'} . " (" .
- get_percentage($stats{requests}, $stats{'fast-redirections'}) . ")\n";
- print "Connection timeouts: " . $stats{'connection-timeout'} . " (" .
- get_percentage($stats{requests}, $stats{'connection-timeout'}) . ")\n";
- print "Connection failures: " . $stats{'connection-failure'} . " (" .
- get_percentage($stats{requests}, $stats{'connection-failure'}) . ")\n";
- print "Outgoing requests: " . $outgoing_requests . " (" .
- get_percentage($stats{requests}, $outgoing_requests) . ")\n";
+ print "Client requests total: " . $requests_total . "\n";
+ if ($stats{crunches}) {
+ my $outgoing_requests = $requests_total - $stats{crunches};
+ print "Crunches: " . $stats{crunches} . " (" .
+ get_percentage($requests_total, $stats{crunches}) . ")\n";
+ print "Blocks: " . $stats{'blocked'} . " (" .
+ get_percentage($requests_total, $stats{'blocked'}) . ")\n";
+ print "Fast redirections: " . $stats{'fast-redirections'} . " (" .
+ get_percentage($requests_total, $stats{'fast-redirections'}) . ")\n";
+ print "Connection timeouts: " . $stats{'connection-timeout'} . " (" .
+ get_percentage($requests_total, $stats{'connection-timeout'}) . ")\n";
+ print "Connection failures: " . $stats{'connection-failure'} . " (" .
+ get_percentage($requests_total, $stats{'connection-failure'}) . ")\n";
+ print "Outgoing requests: " . $outgoing_requests . " (" .
+ get_percentage($requests_total, $outgoing_requests) . ")\n";
+ } else {
+ print "No crunches detected. Is 'debug 1024' enabled?\n";
+ }
+
print "Server keep-alive offers: " . $stats{'server-keep-alive'} . " (" .
- get_percentage($stats{requests}, $stats{'server-keep-alive'}) . ")\n";
+ get_percentage($requests_total, $stats{'server-keep-alive'}) . ")\n";
print "New outgoing connections: " . $new_connections . " (" .
- get_percentage($stats{requests}, $new_connections) . ")\n";
- print "Reused connections: " . $stats{'reused-connections'} . " (" .
- get_percentage($stats{requests}, $stats{'reused-connections'}) .
+ get_percentage($requests_total, $new_connections) . ")\n";
+ print "Reused server connections: " . $stats{'reused-connections'} . " (" .
+ get_percentage($requests_total, $stats{'reused-connections'}) .
"; server offers accepted: " .
get_percentage($stats{'server-keep-alive'}, $stats{'reused-connections'}) . ")\n";
print "Empty responses: " . $stats{'empty-responses'} . " (" .
- get_percentage($stats{requests}, $stats{'empty-responses'}) . ")\n";
+ get_percentage($requests_total, $stats{'empty-responses'}) . ")\n";
print "Empty responses on new connections: "
. $stats{'empty-responses-on-new-connections'} . " (" .
- get_percentage($stats{requests}, $stats{'empty-responses-on-new-connections'})
+ get_percentage($requests_total, $stats{'empty-responses-on-new-connections'})
. ")\n";
print "Empty responses on reused connections: " .
$stats{'empty-responses-on-reused-connections'} . " (" .
- get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) .
+ get_percentage($requests_total, $stats{'empty-responses-on-reused-connections'}) .
")\n";
print "Client connections: " . $stats{'closed-client-connections'} . "\n";
if ($stats{'content-size-total'}) {
- print "Bytes transfered excluding headers: " . $stats{'content-size-total'} . "\n";
+ print "Bytes of content transferred to the client: " . $stats{'content-size-total'} . "\n";
}
my $lines_printed = 0;
print "Client requests per connection distribution:\n";
printf "Enable --show-complete-request-distribution to get less common numbers as well.\n";
}
# Due to log rotation we may not have a complete picture for all the requests
- printf "Improperly accounted requests: ~%d\n", abs($stats{requests} - $client_requests_checksum);
+ printf "Improperly accounted requests: ~%d\n", abs($requests_total - $client_requests_checksum);
if (exists $stats{method}) {
print "Method distribution:\n";
} else {
print "Method distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
}
- print "Client HTTP versions:\n";
- foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
- printf "%8d : %-8s\n", $stats{'http-version'}{$http_version}, $http_version;
+ if (exists $stats{'http-version'}) {
+ print "Client HTTP versions:\n";
+ foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
+ printf "%8d : %-8s\n", $stats{'http-version'}{$http_version}, $http_version;
+ }
+ } else {
+ print "HTTP version distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
}
if (exists $stats{'status-code'}) {
- print "HTTP status codes:\n";
+ print "HTTP status codes according to 'debug 512' (status codes sent by the server may differ):\n";
foreach my $status_code (sort {$stats{'status-code'}{$b} <=> $stats{'status-code'}{$a}} keys %{$stats{'status-code'}}) {
printf "%8d : %-8d\n", $stats{'status-code'}{$status_code}, $status_code;
}
}
}
+ if ($cli_options{'passed-request-statistics-threshold'} == 0) {
+ print "Passed request statistics are disabled. Increase --passed-request-statistics-threshold to enable them.\n";
+ } else {
+ print "Requested requests that were passed:\n";
+ foreach my $passed_url (sort {$stats{'passed-request-url'}{$b} <=> $stats{'passed-request-url'}{$a}}
+ keys %{$stats{'passed-request-url'}}) {
+ if ($stats{'passed-request-url'}{$passed_url} < $cli_options{'passed-request-statistics-threshold'}) {
+ print "Skipped statistics for passed URLs below the treshold.\n";
+ last;
+ }
+ printf "%d : %s\n", $stats{'passed-request-url'}{$passed_url}, $passed_url;
+ }
+ }
if ($cli_options{'host-statistics-threshold'} == 0) {
print "Host statistics are disabled. Increase --host-statistics-threshold to enable them.\n";
} else {
sub print_non_clf_message($) {
my $content = shift;
+ my $date_string = $keep_date_mode ? $req{$t}{'day'} . ' ' : '';
my $msec_string = $no_msecs_mode ? '' : '.' . $req{$t}{'msecs'};
my $line_start = $html_output_mode ? '' : $h{"Standard"};
return if DEBUG_SUPPRESS_LOG_MESSAGES;
print $line_start
+ . $date_string
. $time_colours[$time_colour_index % 2]
. $req{$t}{'time-stamp'}
. $msec_string
'Force' => \&handle_loglevel_force,
'Error' => \&handle_loglevel_error,
'Fatal error' => \&handle_loglevel_ignore,
- 'Writing' => \&handle_loglevel_ignore,
- 'Received' => \&handle_loglevel_ignore,
+ 'Writing' => \&handle_loglevel_writing,
+ 'Received' => \&handle_loglevel_received,
+ 'Tagging' => \&handle_loglevel_tagging,
'Actions' => \&handle_loglevel_ignore,
'Unknown log level' => \&handle_loglevel_ignore,
);
sub stats_loop() {
- my ($day, $time_stamp, $msecs, $thread, $log_level, $content);
+ my ($day, $time_stamp, $thread, $log_level, $content);
my $strict_checks = cli_option_is_set('strict-checks');
my %log_level_handlers = (
'Connect:' => \&gather_loglevel_connect_stats,
'Redirect:' => \&handle_loglevel_ignore,
'Unknown log level:' => \&handle_loglevel_ignore,
'Writing:' => \&handle_loglevel_ignore,
+ 'Tagging:' => \&handle_loglevel_ignore,
);
while (<>) {
}
+# Convert a timestamp like 18:07:28.733 into milliseconds
+sub time_stamp_to_msecs($) {
+ my $time_stamp = shift;
+
+ if ($time_stamp =~ /(\d\d):(\d\d):(\d\d)\.(\d{3})/) {
+ my ($hours, $minutes, $seconds, $msecs) = ($1, $2, $3, $4);
+
+ $msecs += $seconds * 1000;
+ $msecs += $minutes * 1000 * 60;
+ $msecs += $hours * 1000 * 60 * 60;
+
+ return $msecs;
+ }
+ return undef;
+}
+
+sub inactivity_detection_loop() {
+
+ our %cli_options;
+ my ($date, $time_stamp, $thread, $log_level, $content);
+ my ($msecs, $previous_msecs, $inactivity);
+ my $inactivity_threshold = $cli_options{'inactivity-threshold'};
+ my $previous_date;
+ my $log_messages_out_of_order = 0;
+
+ while (<>) {
+ ($date, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5);
+
+ next if (not defined($log_level));
+ next if ($time_stamp eq "-");
+ $msecs = time_stamp_to_msecs($time_stamp);
+ unless (defined $msecs) {
+ print "Failed to convert $time_stamp into milliseconds\n";
+ print "$_";
+ next;
+ }
+ unless (defined $previous_msecs) {
+ $previous_msecs = $msecs;
+ $previous_date = $date;
+ print "$_";
+ next;
+ }
+ $inactivity = $msecs - $previous_msecs;
+ if ($inactivity < 0) {
+ # This can happen if there's a high load in which case
+ # a Privoxy thread may be moved off schedule between
+ # getting the timestamp for the log message and actually
+ # writing it.
+ $log_messages_out_of_order++;
+ }
+ if ($inactivity > $inactivity_threshold) {
+ if ($previous_date eq $date) {
+ print "Detected inactivity: $inactivity msecs\n";
+ } else {
+ # While we could include the date in the timestamp
+ # we currently don't.
+ print "Detected date change. Timestamp difference ignored.\n";
+ }
+ }
+ print "$_";
+ $previous_msecs = $msecs;
+ $previous_date = $date;
+ }
+ if ($log_messages_out_of_order) {
+ print "At least $log_messages_out_of_order messages were written out of the chronological order.\n";
+ print "This can result in false positives. Consider sorting the log first.\n";
+ }
+}
+
sub unbreak_lines_only_loop() {
my $log_messages_reached = 0;
while (<>) {
sub get_cli_options() {
our %cli_options = (
+ 'detect-inactivity' => CLI_OPTION_DETECT_INACTIVITY,
+ 'inactivity-threshold' => CLI_OPTION_INACTIVITY_THRESHOLD,
'html-output' => CLI_OPTION_DEFAULT_TO_HTML_OUTPUT,
'title' => CLI_OPTION_TITLE,
+ 'keep-date' => CLI_OPTION_KEEP_DATE,
'no-syntax-highlighting' => CLI_OPTION_NO_SYNTAX_HIGHLIGHTING,
'no-embedded-css' => CLI_OPTION_NO_EMBEDDED_CSS,
'no-msecs' => CLI_OPTION_NO_MSECS,
'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
'unbreak-lines-only' => CLI_OPTION_UNBREAK_LINES_ONLY,
'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD,
+ 'passed-request-statistics-threshold' => CLI_OPTION_PASSED_REQUEST_STATISTICS_THRESHOLD,
'show-complete-request-distribution' => CLI_OPTION_SHOW_COMPLETE_REQUEST_DISTRIBUTION,
);
GetOptions (
+ 'detect-inactivity' => \$cli_options{'detect-inactivity'},
+ 'inactivity-threshold=i' => \$cli_options{'inactivity-threshold'},
'html-output' => \$cli_options{'html-output'},
'title' => \$cli_options{'title'},
+ 'keep-date' => \$cli_options{'keep-date'},
'no-syntax-highlighting' => \$cli_options{'no-syntax-highlighting'},
'no-embedded-css' => \$cli_options{'no-embedded-css'},
'no-msecs' => \$cli_options{'no-msecs'},
'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'},
'url-statistics-threshold=i'=> \$cli_options{'url-statistics-threshold'},
'host-statistics-threshold=i'=> \$cli_options{'host-statistics-threshold'},
+ 'passed-request-statistics-threshold=i' => \$cli_options{'passed-request-statistics-threshold'},
'show-complete-request-distribution' => \$cli_options{'show-complete-request-distribution'},
'version' => sub { VersionMessage && exit(0) },
'help' => \&help,
$html_output_mode = cli_option_is_set('html-output');
$no_msecs_mode = cli_option_is_set('no-msecs');
+ $keep_date_mode = cli_option_is_set('keep-date');
$shorten_thread_ids = cli_option_is_set('shorten-thread-ids');
$line_end = get_line_end();
}
print << " EOF"
Options and their default values if they have any:
+ [--detect-innactivity]
+ [--inactivity-threshold $cli_options{'inactivity-threshold'}]
[--host-statistics-threshold $cli_options{'host-statistics-threshold'}]
[--html-output]
[--no-embedded-css]
[--statistics]
[--unbreak-lines-only]
[--url-statistics-threshold $cli_options{'url-statistics-threshold'}]
+ [--passed-request-statistics-threshold $cli_options{'passed-request-statistics-threshold'}]
[--title $cli_options{'title'}]
[--version]
see "perldoc $0" for more information
set_background(DEFAULT_BACKGROUND);
prepare_our_stuff();
- print_intro();
-
# XXX: should explicitly reject incompatible argument combinations
if (cli_option_is_set('unbreak-lines-only')) {
unbreak_lines_only_loop();
} elsif (cli_option_is_set('statistics')) {
stats_loop();
+ } elsif (cli_option_is_set('detect-inactivity')) {
+ inactivity_detection_loop();
} else {
+ print_intro();
parse_loop();
+ print_outro();
}
-
- print_outro();
}
main();
=head1 SYNOPSIS
-B<privoxy-log-parser> [B<--html-output>]
+B<privoxy-log-parser> [B<--detect-inactivity>] [B<--inactivity-threshold msecs>]
+[B<--html-output>]
[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--statistics>]
[B<--shorten-thread-ids>] [B<--show-ineffective-filters>]
[B<--url-statistics-threshold>] [B<--version>]
=head1 OPTIONS
+[B<--detect-inactivity>] Instead of syntax highlighting, detect periods
+of log inactivity of more than the amount of milliseconds specified with
+the B<--inactivity-threshold> option. Mainly useful for debugging.
+
[B<--host-statistics-threshold>] Only show the request count for a host
if it's above or equal to the given threshold. If the threshold is 0, host
statistics are disabled.
-[B<--html-output>] Use HTML and CSS for the syntax highlighting. If this option is
+[B<--html-output>] Use HTML and CSS when syntax highlighting. If this option is
omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is active.
This option is only intended to make embedding log excerpts in web pages easier.
It does not escape any input!
-[B<--no-msecs>] Don't expect milisecond resolution
+[B<--inactivity-threshold msecs>] Specifies the number of milliseconds between
+log messages to consider inactivity when running in [B<--detect-inactivity>]
+mode.
+
+[B<--keep-date>] Don't remove the date when printing highlighted log messages.
+Useful when parsing multiple log files at once.
+
+[B<--no-msecs>] Don't expect millisecond resolution
[B<--no-syntax-highlighting>] Disable syntax-highlighting. Useful when
the filtered output is piped into less in which case the ANSI control
codes don't work, or if the terminal itself doesn't support the control
codes.
+[B<--passed-request-statistics-threshold>] Only show the request count for
+a passed requests if it's above or equal to the given threshold. If the
+threshold is 0, passed request statistics are disabled.
+
[B<--shorten-thread-ids>] Shorten the thread ids to a three-digit decimal number.
Note that the mapping from thread ids to shortened ids is created at run-time
and thus varies with the input.
=head1 SEE ALSO
-privoxy(1)
+privoxy(8)
=head1 AUTHOR