X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=5a1ebca8af45e4aa23d41ee3d7d739193d1d1345;hp=e381d79ddf52a23b4f15d1651d707070aac72525;hb=fd57abaf17b528b7ede87bdc2f40ad751de2f968;hpb=be2a51e948694e910ca84f3e076c6dc947f254ff diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index e381d79d..5a1ebca8 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,8 +8,6 @@ # # https://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.166 2017/02/24 12:00:07 fabiankeil Exp $ -# # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting # - create fancy statistics @@ -25,7 +23,7 @@ # hash key as input. # - Add --compress and --decompress options. # -# Copyright (c) 2007-2017 Fabian Keil +# Copyright (c) 2007-2020 Fabian Keil # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -45,7 +43,7 @@ use warnings; use Getopt::Long; use constant { - PRIVOXY_LOG_PARSER_VERSION => '0.8', + PRIVOXY_LOG_PARSER_VERSION => '0.9.1', # Feel free to mess with these ... DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black') DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black') @@ -1982,6 +1980,39 @@ sub handle_loglevel_ignore ($) { return shift; } +sub gather_loglevel_clf_stats ($) { + + my $content = shift; + my ($method, $resource, $http_version, $status_code, $size); + our %stats; + our %cli_options; + + # +0200] "GET https://www.youtube.com/watch?v=JmcA9LIIXWw HTTP/1.1" 200 68004 + $content =~ m/^[+-]\d{4}\] "(\w+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/; + $method = $1; + $resource = $2; + $http_version = $3; + $status_code = $4; + $size = $5; + + unless (defined $method) { + print("Failed to parse: $content\n"); + return; + } + $stats{'method'}{$method}++; + if ($cli_options{'url-statistics-threshold'} != 0) { + $stats{'resource'}{$resource}++; + } + $stats{'http-version'}{$http_version}++; + + if ($cli_options{'host-statistics-threshold'} != 0) { + $resource =~ m@(?:http[s]://)([^/]+)/?@; + $stats{'hosts'}{$1}++; + } + $stats{'content-size-total'} += $size; + $stats{'status-code'}{$status_code}++; +} + sub gather_loglevel_request_stats ($$) { my $c = shift; my $thread = shift; @@ -1995,7 +2026,6 @@ sub gather_loglevel_crunch_stats ($$) { my $thread = shift; our %stats; - $stats{requests}++; $stats{crunches}++; if ($c =~ m/^Redirected:/) { @@ -2082,6 +2112,7 @@ sub gather_loglevel_header_stats ($$) { my ($c, $thread) = @_; our %stats; + our %cli_options; if ($c =~ m/^A HTTP\/1\.1 response without/ or $c =~ m/^Keeping the server header 'Connection: keep-alive' around./) @@ -2089,18 +2120,6 @@ sub gather_loglevel_header_stats ($$) { # A HTTP/1.1 response without Connection header implies keep-alive. # Keeping the server header 'Connection: keep-alive' around. $stats{'server-keep-alive'}++; - - } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) { - - # scan: HTTP/1.1 200 OK - $stats{'method'}{$2}++; - $stats{'resource'}{$3}++; - $stats{'http-version'}{$4}++; - - } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { - - # scan: Host: p.p - $stats{'hosts'}{$1}++; } } @@ -2115,9 +2134,12 @@ sub init_stats () { 'empty-responses-on-reused-connections' => 0, 'fast-redirections' => 0, 'blocked' => 0, + 'connection-failure' => 0, + 'connection-timeout' => 0, 'reused-connections' => 0, 'server-keep-alive' => 0, 'closed-client-connections' => 0, + 'content-size-total' => 0, ); $stats{'client-requests-on-connection'}{1} = 0; } @@ -2182,7 +2204,9 @@ sub print_stats () { get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) . ")\n"; print "Client connections: " . $stats{'closed-client-connections'} . "\n"; - + if ($stats{'content-size-total'}) { + print "Bytes transfered excluding headers: " . $stats{'content-size-total'} . "\n"; + } my $lines_printed = 0; print "Client requests per connection distribution:\n"; foreach my $client_requests (sort { @@ -2209,11 +2233,19 @@ sub print_stats () { printf "%8d : %-8s\n", $stats{'method'}{$method}, $method; } } else { - print "Method distribution unknown. No response headers parsed yet. Is 'debug 8' enabled?\n"; + print "Method distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n"; } print "Client HTTP versions:\n"; foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) { - printf "%d : %s\n", $stats{'http-version'}{$http_version}, $http_version; + printf "%8d : %-8s\n", $stats{'http-version'}{$http_version}, $http_version; + } + if (exists $stats{'status-code'}) { + print "HTTP status codes:\n"; + foreach my $status_code (sort {$stats{'status-code'}{$b} <=> $stats{'status-code'}{$a}} keys %{$stats{'status-code'}}) { + printf "%8d : %-8d\n", $stats{'status-code'}{$status_code}, $status_code; + } + } else { + print "Status code distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n"; } if ($cli_options{'url-statistics-threshold'} == 0) { @@ -2437,10 +2469,14 @@ sub stats_loop () { while (<>) { (undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5); - # Skip LOG_LEVEL_CLF - next if (not defined($log_level) or $time_stamp eq "-"); - if (defined($log_level_handlers{$log_level})) { + next if (not defined($log_level)); + + if ($time_stamp eq "-") { + + gather_loglevel_clf_stats($content); + + } elsif (defined($log_level_handlers{$log_level})) { $content = $log_level_handlers{$log_level}($content, $thread);