X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=aa0b626fba7bf1cf200be48133d8215732d87b8f;hp=79e61ab2ab3f77f2087665b8eb6891c3dd485fd3;hb=c0aea528879a3c5c549da4c586088644b36b9d88;hpb=efb73494ae9de717bde6603dcc95a8ed2a4549a5 diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index 79e61ab2..aa0b626f 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.123 2011/11/12 12:56:42 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.144 2012/10/29 11:59:08 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -70,7 +70,6 @@ use constant { SHOW_FILTER_READIN_IN => 0, SUPPRESS_EMPTY_LINES => 1, SUPPRESS_SUCCESSFUL_CONNECTIONS => 1, - SUPPRESS_ACCEPTED_CONNECTIONS => 1, SUPPRESS_GIF_NOT_CHANGED => 1, SUPPRESS_NEED_TO_DE_CHUNK_FIRST => 1, @@ -139,6 +138,7 @@ sub prepare_our_stuff () { Force => 'red', Writing => 'light_green', Received => 'yellow', + Actions => 'yellow', # ---------------------- URL => 'yellow', path => 'brown', @@ -227,7 +227,7 @@ sub paint_it ($) { # XXX: The Rolling Stones reference has to go. ############################################################### - my $colour = shift @_; + my $colour = shift; return "" if cli_option_is_set('no-syntax-highlighting'); @@ -302,7 +302,7 @@ sub get_semantic_html_markup ($) { # Takes a string and returns a span element ############################################################### - my $type = shift @_; + my $type = shift; my $code; if ($type =~ /Standard/) { @@ -475,7 +475,7 @@ sub get_colour_html_markup ($) { # markup should always be semantically correct. ############################################################### - my $type = shift @_; + my $type = shift; my $code; if ($type =~ /Standard/) { @@ -572,7 +572,7 @@ sub log_parse_error ($) { my $message = shift; if (LOG_UNPARSED_LINES_TO_EXTRA_FILE) { - open(my $errorlog_fd, ">>" . ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed"; + open(my $errorlog_fd, ">>", ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed"; print $errorlog_fd $message; close($errorlog_fd); } @@ -914,6 +914,7 @@ sub handle_loglevel_header ($) { or $c =~ m/^Continue hack in da house/ or $c =~ m/^Merged multiple header lines to:/ or $c =~ m/^Added header: / + or $c =~ m/^Enlisting (?:sorted|left-over) header/ ) { # XXX: Some of these may need highlighting @@ -962,6 +963,8 @@ sub handle_loglevel_header ($) { # Continue hack in da house. # Merged multiple header lines to: 'X-FORWARDED-PROTO: http X-HOST: 127.0.0.1' # Added header: Content-Encoding: deflate + # Enlisting sorted header User-Agent: Mozilla/5.0 (X11; SunOS i86pc; rv:10.0.3) Gecko/20100101 Firefox/10.0.3 + # Enlisting left-over header Connection: close } elsif ($c =~ m/^scanning headers for:/) { @@ -1419,14 +1422,31 @@ sub handle_loglevel_connect ($) { $c = highlight_matched_host($c, '(?<=to )[^\s]+'); - } elsif ($c =~ m/^accepted connection from .*/ or + } elsif ($c =~ m/^[Aa]ccepted connection from .*/ or $c =~ m/^OK/) { - # accepted connection from 10.0.0.1 + # Privoxy 3.0.20: + # Accepted connection from 10.0.0.1 on socket 5 + # Privoxy between 3.0.20 and 3.0.6: + # accepted connection from 10.0.0.1( on socket 5)? # Privoxy 3.0.6 and earlier just say: # OK - return '' if SUPPRESS_ACCEPTED_CONNECTIONS; - $c = highlight_matched_host($c, '(?<=connection from ).*'); + $c = highlight_matched_host($c, '(?<=connection from )[^ ]*'); + $c = highlight_matched_pattern($c, 'Number', '(?<=socket )\d+'); + + } elsif ($c =~ m/^Closing client socket/) { + + # Closing client socket 5. Keep-alive: 0, Socket alive: 1. Data available: 0. + # Privoxy 3.0.20 and later + # Closing client socket 8. Keep-alive: 1. Socket alive: 0. Data available: 0. \ + # Configuration file change detected: 0. Requests received: 11. + + $c = highlight_matched_pattern($c, 'Number', '(?<=socket )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=Keep-alive: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=Socket alive: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=available: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=detected: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=received: )\d+'); } elsif ($c =~ m/^write header to: .* failed:/) { @@ -1531,10 +1551,17 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=Timeout is: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Stopped waiting for the request line./) { + } elsif ($c =~ m/^Stopped waiting for the request line/ or + $c =~ m/^No request line on socket \d received in time/ or + $c =~ m/^The client side of the connection on socket \d/) { # Stopped waiting for the request line. Timeout: 121. + # Privoxy 3.0.19 and later: + # No request line on socket 5 received in time. Timeout: 1. + # The client side of the connection on socket 5 got closed \ + # without sending a complete request line. $c =~ s@(?<=Timeout: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Waiting for \d/) { @@ -1559,15 +1586,18 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=received: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=read: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Continuing buffering headers/) { + } elsif ($c =~ m/^Continuing buffering (?:server )?headers/) { # Continuing buffering headers. byte_count: 19. header_offset: 517. len: 536. $c =~ s@(?<=byte_count: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=header_offset: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=len: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - # 3.0.15 and later: - # Continuing buffering headers. Bytes most recently read: %d. + # 3.0.15 up to 3.0.19: + # Continuing buffering headers. Bytes most recently read: 498. $c =~ s@(?<=read: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + # 3.0.20 and later: + # Continuing buffering server headers from socket 5. Bytes most recently read: 498. + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Received \d+ bytes while/) { @@ -1581,7 +1611,7 @@ sub handle_loglevel_connect ($) { # Rejecting connection from 178.63.152.227. Maximum number of connections reached. $c =~ s@(?<=onnection from )((?:\d+\.?){3}\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^(?:Reusing|Closing) server socket \d./ or + } elsif ($c =~ m/^(?:Reusing|Closing) server socket / or $c =~ m/^No additional client request/) { # Reusing server socket 4. Opened for 10.0.0.1. @@ -1589,9 +1619,17 @@ sub handle_loglevel_connect ($) { # No additional client request received in time. \ # Closing server socket 4, initially opened for 10.0.0.1. # No additional client request received in time on socket 29. + # Privoxy 3.0.20 and later + # Reusing server socket 7 connected to www.privoxy.org. Total requests: 2. + # Closing server socket 6 connected to d.asset.soup.io. Keep-alive: 0.\ + # Tainted: 1. Socket alive: 1. Timeout: 60. Configuration file change detected: 0. $c =~ s@(?<= socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; - $c = highlight_matched_host($c, '(?<=for )[^\s]+(?=\.$)'); + $c = highlight_matched_host($c, '(?<=for )[^\s]+(?=\.)'); + $c = highlight_matched_host($c, '(?<=connected to )[^\s]+(?=\.)'); + for my $number_pattern ('requests', 'Keep-alive', 'Tainted', ' alive', 'Timeout', 'detected') { + $c = highlight_matched_pattern($c, 'Number', '(?<='. $number_pattern . ': )\d+'); + } } elsif ($c =~ m/^Connected to /) { @@ -1610,16 +1648,19 @@ sub handle_loglevel_connect ($) { } elsif ($c =~ m/^Waiting for the next client request/ or $c =~ m/^The connection on server socket/ or - $c =~ m/^Client request arrived in time /) { + $c =~ m/^Client request (?:\d+ )?arrived in time /) { # Waiting for the next client request on socket 3. Keeping the server \ # socket 12 to a.fsdn.com open. # The connection on server socket 6 to upload.wikimedia.org isn't reusable. Closing. - # Used by Privoxy 3.0.18 and later: + # Privoxy 3.0.20 and later: + # Client request 4 arrived in time on socket 7. + # Used by Privoxy 3.0.18 and 3.0.19: # Client request arrived in time on socket 21. # Used by earlier version: # Client request arrived in time or the client closed the connection on socket 12. + $c =~ s@(?<=request )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=server socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c = highlight_matched_host($c, '(?<=to )[^\s]+'); @@ -1659,6 +1700,30 @@ sub handle_loglevel_connect ($) { # Waiting for up to 4999 bytes from the client. $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Optimistically sending /) { + + # Optimistically sending 318 bytes of client headers intended for www.privoxy.org + $c =~ s@(?<=sending )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c = highlight_matched_host($c, '(?<=for )[^\s]+'); + + } elsif ($c =~ m/^Stopping to watch the client socket/) { + + # Stopping to watch the client socket. There's already another request waiting. + # Privoxy 3.0.20 and later: + # Stopping to watch the client socket 5. There's already another request waiting. + $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Drained \d+ bytes before closing/) { + + # Drained 180 bytes before closing socket 6 + $c =~ s@(?<=Drained )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Tainting client socket/) { + + # Tainting client socket 7 due to unread data. + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or @@ -1668,7 +1733,6 @@ sub handle_loglevel_connect ($) { $c =~ m/^The server still wants to talk, but the client hung up on us./ or $c =~ m/^The server didn't specify how long the connection will stay open/ or $c =~ m/^There might be a request body. The connection will not be kept alive/ or - $c =~ m/^Stopping to watch the client socket. There's already another request waiting./ or $c =~ m/^Done reading from the client\.$/) { # Looks like we reached the end of the last chunk. We better stop reading. @@ -1686,7 +1750,6 @@ sub handle_loglevel_connect ($) { # The server still wants to talk, but the client hung up on us. # The server didn't specify how long the connection will stay open. Assume it's only a second. # There might be a request body. The connection will not be kept alive. - # Stopping to watch the client socket. There's already another request waiting. # Done reading from the client\. } else { @@ -1997,7 +2060,7 @@ sub gather_loglevel_header_stats ($$) { # scan: HTTP/1.1 200 OK $stats{'method'}{$2}++; - $stats{'ressource'}{$3}++; + $stats{'resource'}{$3}++; $stats{'http-version'}{$4}++; } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { @@ -2026,7 +2089,15 @@ sub init_stats () { sub get_percentage ($$) { my $big = shift; my $small = shift; + + # If small is 0 the percentage is always 0%. + # Make sure it works even if big is 0 as well. + return "0.00%" if ($small eq 0); + + # Prevent division by zero. + # XXX: Is this still supposed to be reachable? return "NaN" if ($big eq 0); + return sprintf("%.2f%%", $small / $big * 100); } @@ -2087,12 +2158,12 @@ sub print_stats () { print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n"; } else { print "Requested URLs:\n"; - foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) { - if ($stats{'ressource'}{$ressource} < $cli_options{'url-statistics-threshold'}) { + foreach my $resource (sort {$stats{'resource'}{$b} <=> $stats{'resource'}{$a}} keys %{$stats{'resource'}}) { + if ($stats{'resource'}{$resource} < $cli_options{'url-statistics-threshold'}) { print "Skipped statistics for URLs below the treshold.\n"; last; } - printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource; + printf "%d : %s\n", $stats{'resource'}{$resource}, $resource; } } @@ -2140,7 +2211,7 @@ sub print_clf_message () { sub print_non_clf_message ($) { my $content = shift; - my $msec_string = "." . $req{$t}{'msecs'} unless $no_msecs_mode; + my $msec_string = $no_msecs_mode ? '' : '.' . $req{$t}{'msecs'}; my $line_start = $html_output_mode ? '' : $h{"Standard"}; return if DEBUG_SUPPRESS_LOG_MESSAGES; @@ -2202,6 +2273,7 @@ sub parse_loop () { 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, 'Received' => \&handle_loglevel_ignore, + 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore, ); @@ -2293,6 +2365,7 @@ sub stats_loop () { 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, 'Received' => \&handle_loglevel_ignore, + 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore ); @@ -2518,7 +2591,7 @@ Only fixes some breakage, but may be good enough or at least better than nothing Doesn't do anything else, so you probably want to pipe the output into B again. -[B<--url-statistics-threshold>] Only show the request count for a ressource +[B<--url-statistics-threshold>] Only show the request count for a resource if it's above or equal to the given threshold. If the threshold is 0, URL statistics are disabled.