X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=1ebd659790a2f65bdcc08e46bfde9574b0ae81a1;hb=4622b6894625897f18b58bb15d7486d296a9c4ba;hp=97a939cfd176039375975de16219a5ba90f6a3cc;hpb=3f41567fdabdf0e3032f006e7fa3a3d3aca48642;p=privoxy.git diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index 97a939cf..1ebd6597 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.117 2011/05/03 10:33:53 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.132 2012/07/23 12:40:08 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -227,7 +227,7 @@ sub paint_it ($) { # XXX: The Rolling Stones reference has to go. ############################################################### - my $colour = shift @_; + my $colour = shift; return "" if cli_option_is_set('no-syntax-highlighting'); @@ -302,7 +302,7 @@ sub get_semantic_html_markup ($) { # Takes a string and returns a span element ############################################################### - my $type = shift @_; + my $type = shift; my $code; if ($type =~ /Standard/) { @@ -475,7 +475,7 @@ sub get_colour_html_markup ($) { # markup should always be semantically correct. ############################################################### - my $type = shift @_; + my $type = shift; my $code; if ($type =~ /Standard/) { @@ -572,7 +572,7 @@ sub log_parse_error ($) { my $message = shift; if (LOG_UNPARSED_LINES_TO_EXTRA_FILE) { - open(my $errorlog_fd, ">>" . ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed"; + open(my $errorlog_fd, ">>", ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed"; print $errorlog_fd $message; close($errorlog_fd); } @@ -913,6 +913,8 @@ sub handle_loglevel_header ($) { or $c =~ m/^keep-alive support is disabled/ or $c =~ m/^Continue hack in da house/ or $c =~ m/^Merged multiple header lines to:/ + or $c =~ m/^Added header: / + or $c =~ m/^Enlisting (?:sorted|left-over) header/ ) { # XXX: Some of these may need highlighting @@ -960,6 +962,9 @@ sub handle_loglevel_header ($) { # keep-alive support is disabled. Crunching: Keep-Alive: 300. # Continue hack in da house. # Merged multiple header lines to: 'X-FORWARDED-PROTO: http X-HOST: 127.0.0.1' + # Added header: Content-Encoding: deflate + # Enlisting sorted header User-Agent: Mozilla/5.0 (X11; SunOS i86pc; rv:10.0.3) Gecko/20100101 Firefox/10.0.3 + # Enlisting left-over header Connection: close } elsif ($c =~ m/^scanning headers for:/) { @@ -1195,9 +1200,10 @@ sub handle_loglevel_re_filter ($) { } elsif ($c =~ m/^Compressed content from /) { - # Compressed content from 29258 to 8630 bytes. + # Compressed content from 29258 to 8630 bytes. Compression level: 3 $content =~ s@(?<=from )(\d+)@$h{'Number'}$1$h{'Standard'}@; $content =~ s@(?<=to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $content =~ s@(?<=level: )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Reading in filter/) { @@ -1264,6 +1270,11 @@ sub handle_loglevel_redirect ($) { # XXX: assume the same? $c = highlight_matched_url($c, '(?<=assuming that \")[^"]*'); + } elsif ($c =~ m/^Percent-encoding redirect/) { + + # Percent-encoding redirect URL: http://www.example.org/\x02 + $c = highlight_matched_url($c, '(?<=redirect URL: ).*'); + } else { found_unknown_content($c); @@ -1523,10 +1534,17 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=Timeout is: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Stopped waiting for the request line./) { + } elsif ($c =~ m/^Stopped waiting for the request line/ or + $c =~ m/^No request line on socket \d received in time/ or + $c =~ m/^The client side of the connection on socket \d/) { # Stopped waiting for the request line. Timeout: 121. + # Privoxy 3.0.19 and later: + # No request line on socket 5 received in time. Timeout: 1. + # The client side of the connection on socket 5 got closed \ + # without sending a complete request line. $c =~ s@(?<=Timeout: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Waiting for \d/) { @@ -1651,6 +1669,13 @@ sub handle_loglevel_connect ($) { # Waiting for up to 4999 bytes from the client. $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Stopping to watch the client socket/) { + + # Stopping to watch the client socket. There's already another request waiting. + # Privoxy 3.0.20 and later: + # Stopping to watch the client socket 5. There's already another request waiting. + $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or @@ -1660,7 +1685,6 @@ sub handle_loglevel_connect ($) { $c =~ m/^The server still wants to talk, but the client hung up on us./ or $c =~ m/^The server didn't specify how long the connection will stay open/ or $c =~ m/^There might be a request body. The connection will not be kept alive/ or - $c =~ m/^Stopping to watch the client socket. There's already another request waiting./ or $c =~ m/^Done reading from the client\.$/) { # Looks like we reached the end of the last chunk. We better stop reading. @@ -1678,7 +1702,6 @@ sub handle_loglevel_connect ($) { # The server still wants to talk, but the client hung up on us. # The server didn't specify how long the connection will stay open. Assume it's only a second. # There might be a request body. The connection will not be kept alive. - # Stopping to watch the client socket. There's already another request waiting. # Done reading from the client\. } else { @@ -1865,14 +1888,24 @@ sub handle_loglevel_error ($) { my $c = shift; - if ($c =~ m/^Empty server or forwarder response received on socket \d+./) { + if ($c =~ m/^(?:Empty|No) server or forwarder response received on socket \d+\./) { # Empty server or forwarder response received on socket 4. # Empty server or forwarder response received on socket 3. \ # Closing client socket 15 without sending data. + # Used by Privoxy 3.0.18 and later: + # No server or forwarder response received on socket 8. \ + # Closing client socket 10 without sending data. + $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Didn't receive data in time:/) { + + # Didn't receive data in time: a.fsdn.com:443 + $c =~ s@(?<=in time: )(.*)@$h{'destination'}$1$h{'Standard'}@; } + # XXX: There are probably more messages that deserve highlighting. return $c; @@ -1979,7 +2012,7 @@ sub gather_loglevel_header_stats ($$) { # scan: HTTP/1.1 200 OK $stats{'method'}{$2}++; - $stats{'ressource'}{$3}++; + $stats{'resource'}{$3}++; $stats{'http-version'}{$4}++; } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { @@ -2069,12 +2102,12 @@ sub print_stats () { print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n"; } else { print "Requested URLs:\n"; - foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) { - if ($stats{'ressource'}{$ressource} < $cli_options{'url-statistics-threshold'}) { + foreach my $resource (sort {$stats{'resource'}{$b} <=> $stats{'resource'}{$a}} keys %{$stats{'resource'}}) { + if ($stats{'resource'}{$resource} < $cli_options{'url-statistics-threshold'}) { print "Skipped statistics for URLs below the treshold.\n"; last; } - printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource; + printf "%d : %s\n", $stats{'resource'}{$resource}, $resource; } } @@ -2122,7 +2155,7 @@ sub print_clf_message () { sub print_non_clf_message ($) { my $content = shift; - my $msec_string = "." . $req{$t}{'msecs'} unless $no_msecs_mode; + my $msec_string = $no_msecs_mode ? '' : '.' . $req{$t}{'msecs'}; my $line_start = $html_output_mode ? '' : $h{"Standard"}; return if DEBUG_SUPPRESS_LOG_MESSAGES; @@ -2365,8 +2398,8 @@ sub get_cli_options () { 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, 'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'}, - 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'}, - 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'}, + 'url-statistics-threshold=i'=> \$cli_options{'url-statistics-threshold'}, + 'host-statistics-threshold=i'=> \$cli_options{'host-statistics-threshold'}, 'version' => sub { VersionMessage && exit(0) }, 'help' => \&help, ) or exit(1); @@ -2500,7 +2533,7 @@ Only fixes some breakage, but may be good enough or at least better than nothing Doesn't do anything else, so you probably want to pipe the output into B again. -[B<--url-statistics-threshold>] Only show the request count for a ressource +[B<--url-statistics-threshold>] Only show the request count for a resource if it's above or equal to the given threshold. If the threshold is 0, URL statistics are disabled.