X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=d987af0aa0ad6d6cb7e37848db47796ec52b5554;hb=7687e9f5bafbd2c3e55285dda4cc004f6a5def16;hp=62d19bd322d11406a824c45f41756704525191c0;hpb=58830411633115693355fc73eda0ce3355d64f9d;p=privoxy.git diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index 62d19bd3..d987af0a 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.98 2010/11/06 13:27:45 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.103 2010/11/08 17:53:29 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -62,6 +62,7 @@ use constant { CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0, CLI_OPTION_STATISTICS => 0, CLI_OPTION_URL_STATISTICS_THRESHOLD => 0, + CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0, SUPPRESS_SUCCEEDED_FILTER_ADDITIONS => 1, SHOW_SCAN_INTRO => 0, @@ -177,6 +178,7 @@ sub prepare_our_stuff () { 'action-bits-update' => 'light_red', 'configuration-line' => 'red', 'content-type' => 'yellow', + 'HOST' => HEADER_DEFAULT_COLOUR, ); %h_colours = %h; @@ -808,12 +810,12 @@ sub handle_loglevel_header ($) { } elsif ($c =~ m/^(scan: )(\w+ .+ HTTP\/\d\.\d)/) { - # scan: HTTP/1.1 200 OK + # scan: GET http://p.p/ HTTP/1.1 $c = $1 . highlight_request_line($2); } elsif ($c =~ m/^(scan: )((?:HTTP\/\d\.\d|ICY) (\d+) (.*))/) { - # Server response line + # scan: HTTP/1.1 200 OK $req{$t}{'response_line'} = $2; $req{$t}{'status_code'} = $3; $req{$t}{'status_message'} = $4; @@ -909,6 +911,7 @@ sub handle_loglevel_header ($) { or $c =~ m/^Removing 'Connection: close' to imply keep-alive./ or $c =~ m/^keep-alive support is disabled/ or $c =~ m/^Continue hack in da house/ + or $c =~ m/^Merged multiple header lines to:/ ) { # XXX: Some of these may need highlighting @@ -955,6 +958,7 @@ sub handle_loglevel_header ($) { # Removing 'Connection: close' to imply keep-alive. # keep-alive support is disabled. Crunching: Keep-Alive: 300. # Continue hack in da house. + # Merged multiple header lines to: 'X-FORWARDED-PROTO: http X-HOST: 127.0.0.1' } elsif ($c =~ m/^scanning headers for:/) { @@ -1007,6 +1011,12 @@ sub handle_loglevel_header ($) { $c =~ s@(?<= from )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<= to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Killed all-caps Host header line: HOST:/) { + + # Killed all-caps Host header line: HOST: bestproxydb.com + $c = highlight_matched_host($c, '(?<=HOST: )[^\s]+'); + $c = highlight_matched_pattern($c, 'HOST', 'HOST'); + } else { found_unknown_content($c); @@ -1620,6 +1630,12 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=set to )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=reading )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Reducing expected bytes to /) { + + # Reducing expected bytes to 0. Marking the server socket tainted after throwing 4 bytes away. + $c =~ s@(?<=bytes to )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=after throwing )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Waiting for up to /) { # Waiting for up to 4999 bytes from the client. @@ -1944,6 +1960,11 @@ sub gather_loglevel_header_stats ($$) { $stats{'method'}{$2}++; $stats{'ressource'}{$3}++; $stats{'http-version'}{$4}++; + + } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { + + # scan: Host: p.p + $stats{'hosts'}{$1}++; } } @@ -2035,6 +2056,19 @@ sub print_stats () { printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource; } } + + if ($cli_options{'host-statistics-threshold'} == 0) { + print "Host statistics are disabled. Increase --host-statistics-threshold to enable them.\n"; + } else { + print "Requested Hosts:\n"; + foreach my $host (sort {$stats{'hosts'}{$b} <=> $stats{'hosts'}{$a}} keys %{$stats{'hosts'}}) { + if ($stats{'hosts'}{$host} < $cli_options{'host-statistics-threshold'}) { + print "Skipped statistics for Hosts below the treshold.\n"; + last; + } + printf "%d : %s\n", $stats{'hosts'}{$host}, $host; + } + } } @@ -2270,7 +2304,8 @@ sub get_cli_options () { 'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS, 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES, 'statistics' => CLI_OPTION_STATISTICS, - 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD, + 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD, + 'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD, ); GetOptions ( @@ -2284,6 +2319,7 @@ sub get_cli_options () { 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'}, + 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'}, 'version' => sub { VersionMessage && exit(0) }, 'help' => \&help, ) or exit(1); @@ -2304,6 +2340,7 @@ sub help () { Options and their default values if they have any: [--accept-unknown-messages] + [--host-statistics-threshold $cli_options{'host-statistics-threshold'}] [--html-output] [--no-embedded-css] [--no-msecs] @@ -2378,6 +2415,10 @@ will hide the "filter foo caused 0 hits" message. [B<--accept-unknown-messages>] Don't print warnings in case of unknown messages, just don't highlight them. +[B<--host-statistics-threshold>] Only show the request count for a host +if it's above or equal to the given threshold. If the threshold is 0, host +statistics are disabled. + [B<--html-output>] Use HTML and CSS for the syntax highlighting. If this option is omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is active. This option is only intended to make embedding log excerpts in web pages easier.