From 62a13c8ffb2c0038804805ad1346232296969db9 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 8 Nov 2010 17:52:36 +0000 Subject: [PATCH] Add an option to also gather statistics about how many request where made per host. --- tools/privoxy-log-parser.pl | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index 62d19bd3..bcced797 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,7 +8,7 @@ # # http://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.98 2010/11/06 13:27:45 fabiankeil Exp $ +# $Id: privoxy-log-parser.pl,v 1.99 2010/11/08 17:51:54 fabiankeil Exp $ # # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting @@ -62,6 +62,7 @@ use constant { CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES => 0, CLI_OPTION_STATISTICS => 0, CLI_OPTION_URL_STATISTICS_THRESHOLD => 0, + CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0, SUPPRESS_SUCCEEDED_FILTER_ADDITIONS => 1, SHOW_SCAN_INTRO => 0, @@ -1944,6 +1945,11 @@ sub gather_loglevel_header_stats ($$) { $stats{'method'}{$2}++; $stats{'ressource'}{$3}++; $stats{'http-version'}{$4}++; + + } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { + + # scan: Host: p.p + $stats{'hosts'}{$1}++; } } @@ -2035,6 +2041,19 @@ sub print_stats () { printf "%d : %s\n", $stats{'ressource'}{$ressource}, $ressource; } } + + if ($cli_options{'host-statistics-threshold'} == 0) { + print "Host statistics are disabled. Increase --host-statistics-threshold to enable them.\n"; + } else { + print "Requested Hosts:\n"; + foreach my $host (sort {$stats{'hosts'}{$b} <=> $stats{'hosts'}{$a}} keys %{$stats{'hosts'}}) { + if ($stats{'hosts'}{$host} < $cli_options{'host-statistics-threshold'}) { + print "Skipped statistics for Hosts below the treshold.\n"; + last; + } + printf "%d : %s\n", $stats{'hosts'}{$host}, $host; + } + } } @@ -2270,7 +2289,8 @@ sub get_cli_options () { 'show-ineffective-filters' => CLI_OPTION_SHOW_INEFFECTIVE_FILTERS, 'accept-unknown-messages' => CLI_OPTION_ACCEPT_UNKNOWN_MESSAGES, 'statistics' => CLI_OPTION_STATISTICS, - 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD, + 'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD, + 'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD, ); GetOptions ( @@ -2284,6 +2304,7 @@ sub get_cli_options () { 'accept-unknown-messages' => \$cli_options{'accept-unknown-messages'}, 'statistics' => \$cli_options{'statistics'}, 'url-statistics-threshold=s'=> \$cli_options{'url-statistics-threshold'}, + 'host-statistics-threshold=s'=> \$cli_options{'host-statistics-threshold'}, 'version' => sub { VersionMessage && exit(0) }, 'help' => \&help, ) or exit(1); @@ -2304,6 +2325,7 @@ sub help () { Options and their default values if they have any: [--accept-unknown-messages] + [--host-statistics-threshold $cli_options{'host-statistics-threshold'}] [--html-output] [--no-embedded-css] [--no-msecs] @@ -2378,6 +2400,10 @@ will hide the "filter foo caused 0 hits" message. [B<--accept-unknown-messages>] Don't print warnings in case of unknown messages, just don't highlight them. +[B<--host-statistics-threshold>] Only show the request count for a host +if it's above or equal to the given threshold. If the threshold is 0, host +statistics are disabled. + [B<--html-output>] Use HTML and CSS for the syntax highlighting. If this option is omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is active. This option is only intended to make embedding log excerpts in web pages easier. -- 2.39.2