#
# https://www.fabiankeil.de/sourcecode/privoxy-log-parser/
#
-# $Id: privoxy-log-parser.pl,v 1.169 2017/03/03 17:43:11 fabiankeil Exp $
-#
# TODO:
# - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting
# - create fancy statistics
# hash key as input.
# - Add --compress and --decompress options.
#
-# Copyright (c) 2007-2017 Fabian Keil <fk@fabiankeil.de>
+# Copyright (c) 2007-2020 Fabian Keil <fk@fabiankeil.de>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
use Getopt::Long;
use constant {
- PRIVOXY_LOG_PARSER_VERSION => '0.9',
+ PRIVOXY_LOG_PARSER_VERSION => '0.9.1',
# Feel free to mess with these ...
DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black')
DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black')
return shift;
}
+sub gather_loglevel_clf_stats ($) {
+
+ my $content = shift;
+ my ($method, $resource, $http_version, $status_code, $size);
+ our %stats;
+ our %cli_options;
+
+ # +0200] "GET https://www.youtube.com/watch?v=JmcA9LIIXWw HTTP/1.1" 200 68004
+ $content =~ m/^[+-]\d{4}\] "(\w+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/;
+ $method = $1;
+ $resource = $2;
+ $http_version = $3;
+ $status_code = $4;
+ $size = $5;
+
+ unless (defined $method) {
+ print("Failed to parse: $content\n");
+ return;
+ }
+ $stats{'method'}{$method}++;
+ if ($cli_options{'url-statistics-threshold'} != 0) {
+ $stats{'resource'}{$resource}++;
+ }
+ $stats{'http-version'}{$http_version}++;
+
+ if ($cli_options{'host-statistics-threshold'} != 0) {
+ $resource =~ m@(?:http[s]://)([^/]+)/?@;
+ $stats{'hosts'}{$1}++;
+ }
+ $stats{'content-size-total'} += $size;
+ $stats{'status-code'}{$status_code}++;
+}
+
sub gather_loglevel_request_stats ($$) {
my $c = shift;
my $thread = shift;
my $thread = shift;
our %stats;
- $stats{requests}++;
$stats{crunches}++;
if ($c =~ m/^Redirected:/) {
# A HTTP/1.1 response without Connection header implies keep-alive.
# Keeping the server header 'Connection: keep-alive' around.
$stats{'server-keep-alive'}++;
-
- } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) {
-
- # scan: HTTP/1.1 200 OK
- $stats{'method'}{$2}++;
- if ($cli_options{'url-statistics-threshold'} != 0) {
- $stats{'resource'}{$3}++;
- }
- $stats{'http-version'}{$4}++;
-
- } elsif ($c =~ m/^scan: Host: ([^\s]+)/ and
- $cli_options{'host-statistics-threshold'} != 0) {
-
- # scan: Host: p.p
- $stats{'hosts'}{$1}++;
}
}
'reused-connections' => 0,
'server-keep-alive' => 0,
'closed-client-connections' => 0,
+ 'content-size-total' => 0,
);
$stats{'client-requests-on-connection'}{1} = 0;
}
get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) .
")\n";
print "Client connections: " . $stats{'closed-client-connections'} . "\n";
-
+ if ($stats{'content-size-total'}) {
+ print "Bytes transfered excluding headers: " . $stats{'content-size-total'} . "\n";
+ }
my $lines_printed = 0;
print "Client requests per connection distribution:\n";
foreach my $client_requests (sort {
printf "%8d : %-8s\n", $stats{'method'}{$method}, $method;
}
} else {
- print "Method distribution unknown. No response headers parsed yet. Is 'debug 8' enabled?\n";
+ print "Method distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
}
print "Client HTTP versions:\n";
foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
- printf "%d : %s\n", $stats{'http-version'}{$http_version}, $http_version;
+ printf "%8d : %-8s\n", $stats{'http-version'}{$http_version}, $http_version;
+ }
+ if (exists $stats{'status-code'}) {
+ print "HTTP status codes:\n";
+ foreach my $status_code (sort {$stats{'status-code'}{$b} <=> $stats{'status-code'}{$a}} keys %{$stats{'status-code'}}) {
+ printf "%8d : %-8d\n", $stats{'status-code'}{$status_code}, $status_code;
+ }
+ } else {
+ print "Status code distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
}
if ($cli_options{'url-statistics-threshold'} == 0) {
while (<>) {
(undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5);
- # Skip LOG_LEVEL_CLF
- next if (not defined($log_level) or $time_stamp eq "-");
- if (defined($log_level_handlers{$log_level})) {
+ next if (not defined($log_level));
+
+ if ($time_stamp eq "-") {
+
+ gather_loglevel_clf_stats($content);
+
+ } elsif (defined($log_level_handlers{$log_level})) {
$content = $log_level_handlers{$log_level}($content, $thread);