X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=481455f56e0209f1d18d114961d09b85b51689e9;hp=b4fcffa0a061a62561f0b72c978b49df316cd057;hb=e0a1365d9a42490e6063603b2bde1c2f50915298;hpb=ed42b61a0666c729ded5c3748f562fdd8bf23292 diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index b4fcffa0..481455f5 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.116 2011/05/03 10:19:20 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.140 2012/10/21 13:00:36 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -70,7 +70,6 @@ use constant { SHOW_FILTER_READIN_IN => 0, SUPPRESS_EMPTY_LINES => 1, SUPPRESS_SUCCESSFUL_CONNECTIONS => 1, - SUPPRESS_ACCEPTED_CONNECTIONS => 1, SUPPRESS_GIF_NOT_CHANGED => 1, SUPPRESS_NEED_TO_DE_CHUNK_FIRST => 1, @@ -139,6 +138,7 @@ sub prepare_our_stuff () { Force => 'red', Writing => 'light_green', Received => 'yellow', + Actions => 'yellow', # ---------------------- URL => 'yellow', path => 'brown', @@ -227,7 +227,7 @@ sub paint_it ($) { # XXX: The Rolling Stones reference has to go. ############################################################### - my $colour = shift @_; + my $colour = shift; return "" if cli_option_is_set('no-syntax-highlighting'); @@ -302,7 +302,7 @@ sub get_semantic_html_markup ($) { # Takes a string and returns a span element ############################################################### - my $type = shift @_; + my $type = shift; my $code; if ($type =~ /Standard/) { @@ -475,7 +475,7 @@ sub get_colour_html_markup ($) { # markup should always be semantically correct. ############################################################### - my $type = shift @_; + my $type = shift; my $code; if ($type =~ /Standard/) { @@ -572,7 +572,7 @@ sub log_parse_error ($) { my $message = shift; if (LOG_UNPARSED_LINES_TO_EXTRA_FILE) { - open(my $errorlog_fd, ">>" . ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed"; + open(my $errorlog_fd, ">>", ERROR_LOG_FILE) || die "Writing " . ERROR_LOG_FILE . " failed"; print $errorlog_fd $message; close($errorlog_fd); } @@ -913,6 +913,8 @@ sub handle_loglevel_header ($) { or $c =~ m/^keep-alive support is disabled/ or $c =~ m/^Continue hack in da house/ or $c =~ m/^Merged multiple header lines to:/ + or $c =~ m/^Added header: / + or $c =~ m/^Enlisting (?:sorted|left-over) header/ ) { # XXX: Some of these may need highlighting @@ -960,6 +962,9 @@ sub handle_loglevel_header ($) { # keep-alive support is disabled. Crunching: Keep-Alive: 300. # Continue hack in da house. # Merged multiple header lines to: 'X-FORWARDED-PROTO: http X-HOST: 127.0.0.1' + # Added header: Content-Encoding: deflate + # Enlisting sorted header User-Agent: Mozilla/5.0 (X11; SunOS i86pc; rv:10.0.3) Gecko/20100101 Firefox/10.0.3 + # Enlisting left-over header Connection: close } elsif ($c =~ m/^scanning headers for:/) { @@ -1193,6 +1198,13 @@ sub handle_loglevel_re_filter ($) { # Adding dynamic re_filter job s@^(?:\w*)\s+.*\s+HTTP/\d\.\d\s*@IP-ADDRESS: $origin@D\ # to filter client-ip-address succeeded. + } elsif ($c =~ m/^Compressed content from /) { + + # Compressed content from 29258 to 8630 bytes. Compression level: 3 + $content =~ s@(?<=from )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $content =~ s@(?<=to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $content =~ s@(?<=level: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Reading in filter/) { return '' unless SHOW_FILTER_READIN_IN; @@ -1258,6 +1270,11 @@ sub handle_loglevel_redirect ($) { # XXX: assume the same? $c = highlight_matched_url($c, '(?<=assuming that \")[^"]*'); + } elsif ($c =~ m/^Percent-encoding redirect/) { + + # Percent-encoding redirect URL: http://www.example.org/\x02 + $c = highlight_matched_url($c, '(?<=redirect URL: ).*'); + } else { found_unknown_content($c); @@ -1405,14 +1422,31 @@ sub handle_loglevel_connect ($) { $c = highlight_matched_host($c, '(?<=to )[^\s]+'); - } elsif ($c =~ m/^accepted connection from .*/ or + } elsif ($c =~ m/^[Aa]ccepted connection from .*/ or $c =~ m/^OK/) { - # accepted connection from 10.0.0.1 + # Privoxy 3.0.20: + # Accepted connection from 10.0.0.1 on socket 5 + # Privoxy between 3.0.20 and 3.0.6: + # accepted connection from 10.0.0.1( on socket 5)? # Privoxy 3.0.6 and earlier just say: # OK - return '' if SUPPRESS_ACCEPTED_CONNECTIONS; - $c = highlight_matched_host($c, '(?<=connection from ).*'); + $c = highlight_matched_host($c, '(?<=connection from )[^ ]*'); + $c = highlight_matched_pattern($c, 'Number', '(?<=socket )\d+'); + + } elsif ($c =~ m/^Closing client socket/) { + + # Closing client socket 5. Keep-alive: 0, Socket alive: 1. Data available: 0. + # Privoxy 3.0.20 and later + # Closing client socket 8. Keep-alive: 1. Socket alive: 0. Data available: 0. \ + # Configuration file change detected: 0. Requests received: 11. + + $c = highlight_matched_pattern($c, 'Number', '(?<=socket )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=Keep-alive: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=Socket alive: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=available: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=detected: )\d+'); + $c = highlight_matched_pattern($c, 'Number', '(?<=received: )\d+'); } elsif ($c =~ m/^write header to: .* failed:/) { @@ -1517,10 +1551,17 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=Timeout is: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Stopped waiting for the request line./) { + } elsif ($c =~ m/^Stopped waiting for the request line/ or + $c =~ m/^No request line on socket \d received in time/ or + $c =~ m/^The client side of the connection on socket \d/) { # Stopped waiting for the request line. Timeout: 121. + # Privoxy 3.0.19 and later: + # No request line on socket 5 received in time. Timeout: 1. + # The client side of the connection on socket 5 got closed \ + # without sending a complete request line. $c =~ s@(?<=Timeout: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Waiting for \d/) { @@ -1545,15 +1586,18 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=received: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=read: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Continuing buffering headers/) { + } elsif ($c =~ m/^Continuing buffering (?:server )?headers/) { # Continuing buffering headers. byte_count: 19. header_offset: 517. len: 536. $c =~ s@(?<=byte_count: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=header_offset: )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=len: )(\d+)@$h{'Number'}$1$h{'Standard'}@; - # 3.0.15 and later: - # Continuing buffering headers. Bytes most recently read: %d. + # 3.0.15 up to 3.0.19: + # Continuing buffering headers. Bytes most recently read: 498. $c =~ s@(?<=read: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + # 3.0.20 and later: + # Continuing buffering server headers from socket 5. Bytes most recently read: 498. + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Received \d+ bytes while/) { @@ -1596,16 +1640,19 @@ sub handle_loglevel_connect ($) { } elsif ($c =~ m/^Waiting for the next client request/ or $c =~ m/^The connection on server socket/ or - $c =~ m/^Client request arrived in time /) { + $c =~ m/^Client request (?:\d+ )?arrived in time /) { # Waiting for the next client request on socket 3. Keeping the server \ # socket 12 to a.fsdn.com open. # The connection on server socket 6 to upload.wikimedia.org isn't reusable. Closing. - # Used by Privoxy 3.0.18 and later: + # Privoxy 3.0.20 and later: + # Client request 4 arrived in time on socket 7. + # Used by Privoxy 3.0.18 and 3.0.19: # Client request arrived in time on socket 21. # Used by earlier version: # Client request arrived in time or the client closed the connection on socket 12. + $c =~ s@(?<=request )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=server socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c = highlight_matched_host($c, '(?<=to )[^\s]+'); @@ -1645,6 +1692,13 @@ sub handle_loglevel_connect ($) { # Waiting for up to 4999 bytes from the client. $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Stopping to watch the client socket/) { + + # Stopping to watch the client socket. There's already another request waiting. + # Privoxy 3.0.20 and later: + # Stopping to watch the client socket 5. There's already another request waiting. + $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or @@ -1654,7 +1708,6 @@ sub handle_loglevel_connect ($) { $c =~ m/^The server still wants to talk, but the client hung up on us./ or $c =~ m/^The server didn't specify how long the connection will stay open/ or $c =~ m/^There might be a request body. The connection will not be kept alive/ or - $c =~ m/^Stopping to watch the client socket. There's already another request waiting./ or $c =~ m/^Done reading from the client\.$/) { # Looks like we reached the end of the last chunk. We better stop reading. @@ -1672,7 +1725,6 @@ sub handle_loglevel_connect ($) { # The server still wants to talk, but the client hung up on us. # The server didn't specify how long the connection will stay open. Assume it's only a second. # There might be a request body. The connection will not be kept alive. - # Stopping to watch the client socket. There's already another request waiting. # Done reading from the client\. } else { @@ -1859,14 +1911,24 @@ sub handle_loglevel_error ($) { my $c = shift; - if ($c =~ m/^Empty server or forwarder response received on socket \d+./) { + if ($c =~ m/^(?:Empty|No) server or forwarder response received on socket \d+\./) { # Empty server or forwarder response received on socket 4. # Empty server or forwarder response received on socket 3. \ # Closing client socket 15 without sending data. + # Used by Privoxy 3.0.18 and later: + # No server or forwarder response received on socket 8. \ + # Closing client socket 10 without sending data. + $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Didn't receive data in time:/) { + + # Didn't receive data in time: a.fsdn.com:443 + $c =~ s@(?<=in time: )(.*)@$h{'destination'}$1$h{'Standard'}@; } + # XXX: There are probably more messages that deserve highlighting. return $c; @@ -1973,7 +2035,7 @@ sub gather_loglevel_header_stats ($$) { # scan: HTTP/1.1 200 OK $stats{'method'}{$2}++; - $stats{'ressource'}{$3}++; + $stats{'resource'}{$3}++; $stats{'http-version'}{$4}++; } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { @@ -2002,7 +2064,15 @@ sub init_stats () { sub get_percentage ($$) { my $big = shift; my $small = shift; + + # If small is 0 the percentage is always 0%. + # Make sure it works even if big is 0 as well. + return "0.00%" if ($small eq 0); + + # Prevent division by zero. + # XXX: Is this still supposed to be reachable? return "NaN" if ($big eq 0); + return sprintf("%.2f%%", $small / $big * 100); } @@ -2063,12 +2133,12 @@ sub print_stats () { print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n"; } else { print "Requested URLs:\n"; - foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) { - if ($stats{'ressource'}{$ressource} < $cli_options{'url-statistics-threshold'}) { + foreach my $resource (sort {$stats{'resource'}{$b} <=> $stats{'resource'}{$a}} keys %{$stats{'resource'}}) { + if ($stats{'resource'}{$resource} < $cli_options{'url-statistics-threshold'}) { print "Skipped statistics for URLs below the treshold.\n"; last; } - printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource; + printf "%d : %s\n", $stats{'resource'}{$resource}, $resource; } } @@ -2116,7 +2186,7 @@ sub print_clf_message () { sub print_non_clf_message ($) { my $content = shift; - my $msec_string = "." . $req{$t}{'msecs'} unless $no_msecs_mode; + my $msec_string = $no_msecs_mode ? '' : '.' . $req{$t}{'msecs'}; my $line_start = $html_output_mode ? '' : $h{"Standard"}; return if DEBUG_SUPPRESS_LOG_MESSAGES; @@ -2178,6 +2248,7 @@ sub parse_loop () { 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, 'Received' => \&handle_loglevel_ignore, + 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore, ); @@ -2269,6 +2340,7 @@ sub stats_loop () { 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, 'Received' => \&handle_loglevel_ignore, + 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore ); @@ -2359,8 +2431,8 @@ sub get_cli_options () { 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, 'unbreak-lines-only' => \$cli_options{'unbreak-lines-only'}, - 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'}, - 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'}, + 'url-statistics-threshold=i'=> \$cli_options{'url-statistics-threshold'}, + 'host-statistics-threshold=i'=> \$cli_options{'host-statistics-threshold'}, 'version' => sub { VersionMessage && exit(0) }, 'help' => \&help, ) or exit(1); @@ -2494,7 +2566,7 @@ Only fixes some breakage, but may be good enough or at least better than nothing Doesn't do anything else, so you probably want to pipe the output into B again. -[B<--url-statistics-threshold>] Only show the request count for a ressource +[B<--url-statistics-threshold>] Only show the request count for a resource if it's above or equal to the given threshold. If the threshold is 0, URL statistics are disabled.