X-Git-Url: http://www.privoxy.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=tools%2Fprivoxy-log-parser.pl;h=0fdc1d219ae2f1a96a9bc87e6773f2ee6de945ae;hb=04f80a552e0db8899ea01e9039ce5143693850f8;hp=962958e998312ac9d07849a27b65d013cc99aa3a;hpb=b14d86513b524f0cca9f72f2f5d73d6c2637dc2e;p=privoxy.git diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index 962958e9..0fdc1d21 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.79 2010/05/30 16:44:25 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.89 2010/08/28 13:20:23 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -23,8 +23,9 @@ # - Handle incomplete input without Perl warning about undefined variables. # - Use generic highlighting function that takes a regex and the # hash key as input. +# - Add --compress and --decompress options. # -# Copyright (c) 2007-2009 Fabian Keil +# Copyright (c) 2007-2010 Fabian Keil # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -56,6 +57,7 @@ use constant { CLI_OPTION_NO_EMBEDDED_CSS => 0, CLI_OPTION_NO_MSECS => 0, CLI_OPTION_NO_SYNTAX_HIGHLIGHTING => 0, + CLI_OPTION_SHORTEN_THREAD_IDS => 0, CLI_OPTION_SHOW_INEFFECTIVE_FILTERS => 0, CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0, CLI_OPTION_STATISTICS => 0, @@ -105,6 +107,7 @@ my $header_highlight_regex = ''; my $html_output_mode; my $no_msecs_mode; # XXX: should probably be removed +my $shorten_thread_ids; my $line_end; sub prepare_our_stuff () { @@ -132,6 +135,7 @@ sub prepare_our_stuff () { 'Gif-Deanimate' => 'blue', Force => 'red', Writing => 'light_green', + Received => 'yellow', # ---------------------- URL => 'yellow', path => 'brown', @@ -801,15 +805,10 @@ sub handle_loglevel_header ($) { update_header_highlight_regex($header); } - } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) { + } elsif ($c =~ m/^(scan: )(\w+ .+ HTTP\/\d\.\d)/) { - # Client request line - # Save for statistics (XXX: Not implemented yet) - $req{$t}{'method'} = $2; - $req{$t}{'destination'} = $3; - $req{$t}{'http-version'} = $4; - - $c = highlight_request_line($1); + # scan: HTTP/1.1 200 OK + $c = $1 . highlight_request_line($2); } elsif ($c =~ m/^(scan: )((?:HTTP\/\d\.\d|ICY) (\d+) (.*))/) { @@ -1550,10 +1549,11 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=Received )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=expecting )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Connection from/) { + } elsif ($c =~ m/^(Rejecting c|C)onnection from/) { # Connection from 81.163.28.218 dropped due to ACL - $c =~ s@(?<=^Connection from )((?:\d+\.?){4})@$h{'Number'}$1$h{'Standard'}@; + # Rejecting connection from 178.63.152.227. Maximum number of connections reached. + $c =~ s@(?<=onnection from )((?:\d+\.?){3}\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^(?:Reusing|Closing) server socket \d./ or $c =~ m/^No additional client request/) { @@ -1624,7 +1624,7 @@ sub handle_loglevel_connect ($) { # Waiting for up to 4999 bytes from the client. $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@; - } elsif ($c =~ m/^Looks like we rea/ or + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or $c =~ m/^Complete client request received/ or @@ -1639,6 +1639,8 @@ sub handle_loglevel_connect ($) { # Looks like we reached the end of the last chunk. We better stop reading. # Looks like we read the end of the last chunk together with the server \ # headers. We better stop reading. + # Looks like we got the last chunk together with the server headers. \ + # We better stop reading. # Unsetting keep-alive flag. # No connections to wait for left. # Client request arrived in time or the client closed the connection. @@ -1925,6 +1927,13 @@ sub gather_loglevel_header_stats ($$) { # A HTTP/1.1 response without Connection header implies keep-alive. # Keeping the server header 'Connection: keep-alive' around. $stats{'server-keep-alive'}++; + + } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) { + + # scan: HTTP/1.1 200 OK + $stats{'method'}{$2}++; + $stats{'ressource'}{$3}++; + $stats{'http-version'}{$4}++; } } @@ -1979,6 +1988,23 @@ sub print_stats () { $stats{'empty-responses-on-reused-connections'} . " (" . get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) . ")\n"; + + if ($stats{method} eq 0) { + print "No response lines parsed yet yet.\n"; + return; + } + print "Method distribution:\n"; + foreach my $method (sort {$stats{'method'}{$b} <=> $stats{'method'}{$a}} keys %{$stats{'method'}}) { + printf "%8d : %-8s\n", $stats{'method'}{$method}, $method; + } + print "Client HTTP versions:\n"; + foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) { + printf "%d : %s\n", $stats{'http-version'}{$http_version}, $http_version; + } + print "Requested ressources:\n"; + foreach my $ressource (sort {$stats{'ressource'}{$b} <=> $stats{'ressource'}{$a}} keys %{$stats{'ressource'}}) { + printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource; + } } @@ -2033,6 +2059,20 @@ sub print_non_clf_message ($) { . $line_end; } +sub shorten_thread_id ($) { + + my $thread_id = shift; + + our %short_thread_ids; + our $max_threadid; + + unless (defined $short_thread_ids{$thread_id}) { + $short_thread_ids{$thread_id} = sprintf "%.3d", $max_threadid++; + } + + return $short_thread_ids{$thread_id} +} + sub parse_loop () { my ($day, $time_stamp, $thread, $log_level, $content, $c, $msecs); @@ -2058,13 +2098,14 @@ sub parse_loop () { 'Error' => \&handle_loglevel_error, 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, + 'Received' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore, ); while (<>) { if (m/^(\w{3} \d{2}) (\d\d:\d\d:\d\d)\.?(\d+)? (?:Privoxy\()?([^\)\s]*)[\)]? ([\w -]*): (.*?)\r?$/) { - $thread = $t = $4; + $thread = $t = ($shorten_thread_ids) ? shorten_thread_id($4) : $4; $req{$t}{'day'} = $day = $1; $req{$t}{'time-stamp'} = $time_stamp = $2; $req{$t}{'msecs'} = $msecs = $3 ? $3 : 0; # Only the cool kids have micro second resolution; @@ -2180,7 +2221,7 @@ sub VersionMessage { my $version_message; $version_message .= 'Privoxy-Log-Parser ' . PRIVOXY_LOG_PARSER_VERSION . "\n"; - $version_message .= 'Copyright (C) 2007-2009 Fabian Keil ' . "\n"; + $version_message .= 'Copyright (C) 2007-2010 Fabian Keil ' . "\n"; $version_message .= 'http://www.fabiankeil.de/sourcecode/privoxy-log-parser/' . "\n"; print $version_message; @@ -2194,6 +2235,7 @@ sub get_cli_options () { 'no-syntax-highlighting' => CLI_OPTION_NO_SYNTAX_HIGHLIGHTING, 'no-embedded-css' => CLI_OPTION_NO_EMBEDDED_CSS, 'no-msecs' => CLI_OPTION_NO_MSECS, + 'shorten-thread-ids' => CLI_OPTION_SHORTEN_THREAD_IDS, 'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS, 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES, 'statistics' => CLI_OPTION_STATISTICS, @@ -2205,6 +2247,7 @@ sub get_cli_options () { 'no-syntax-highlighting' => \$cli_options{'no-syntax-highlighting'}, 'no-embedded-css' => \$cli_options{'no-embedded-css'}, 'no-msecs' => \$cli_options{'no-msecs'}, + 'shorten-thread-ids' => \$cli_options{'shorten-thread-ids'}, 'show-ineffective-filters' => \$cli_options{'show-ineffective-filters'}, 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, @@ -2214,6 +2257,7 @@ sub get_cli_options () { $html_output_mode = cli_option_is_set('html-output'); $no_msecs_mode = cli_option_is_set('no-msecs'); + $shorten_thread_ids = cli_option_is_set('shorten-thread-ids'); $line_end = get_line_end(); } @@ -2231,6 +2275,7 @@ Options and their default values if they have any: [--no-embedded-css] [--no-msecs] [--no-syntax-highlighting] + [--shorten-thread-ids] [--show-ineffective-filters] [--statistics] [--title $cli_options{'title'}] @@ -2270,8 +2315,8 @@ B - A parser and syntax-highlighter for Privoxy log messages =head1 SYNOPSIS B [B<--accept-unknown-messages>] [B<--html-output>] -[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--show-ineffective-filters>] -[B<--version>] +[B<--no-msecs>] [B<--no-syntax-higlighting>] [B<--statistics>] +[B<--shorten-thread-ids>] [B<--show-ineffective-filters>] [B<--version>] =head1 DESCRIPTION @@ -2310,6 +2355,10 @@ the filtered output is piped into less in which case the ANSI control codes don't work, or if the terminal itself doesn't support the control codes. +[B<--shorten-thread-ids>] Shorten the thread ids to a three-digit decimal number. +Note that the mapping from thread ids to shortened ids is created at run-time +and thus varies with the input. + [B<--show-ineffective-filters>] Don't suppress log lines for filters that didn't modify the content.