+sub gather_loglevel_clf_stats($) {
+
+ my $content = shift;
+ my ($method, $resource, $http_version, $status_code, $size);
+ our %stats;
+ our %cli_options;
+
+ # +0200] "GET https://www.youtube.com/watch?v=JmcA9LIIXWw HTTP/1.1" 200 68004
+ # +0200] "VERSION-CONTROL http://p.p/ HTTP/1.1" 200 2787
+ $content =~ m/^[+-]\d{4}\] "([^ ]+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/;
+ $method = $1;
+ $resource = $2;
+ $http_version = $3;
+ $status_code = $4;
+ $size = $5;
+
+ $stats{requests_clf}++;
+
+ unless (defined $method) {
+ # +0200] "Invalid request" 400 0
+ return if ($content =~ m/^[+-]\d{4}\] "Invalid request"/);
+ # +0100] "GET https://securepubads.g.doubleclick.net/gampad/ads?gd[...]... [too long, truncated]
+ if ($content =~ m/\[too long, truncated\]$/) {
+ print("Skipped LOG_LEVEL_CLF message that got truncated by Privoxy. Statistics will be inprecise.\n");
+ } else {
+ print("Failed to parse: $content\n");
+ }
+ return;
+ }
+ $stats{'method'}{$method}++;
+ if ($cli_options{'url-statistics-threshold'} != 0) {
+ $stats{'resource'}{$resource}++;
+ }
+ $stats{'http-version'}{$http_version}++;
+
+ if ($cli_options{'host-statistics-threshold'} != 0) {
+ $resource =~ m@(?:https?://)?([^/]+)/?@;
+ $stats{'hosts'}{$1}++;
+ }
+ $stats{'content-size-total'} += $size;
+ $stats{'status-code'}{$status_code}++;
+}
+
+sub gather_loglevel_request_stats($$) {
+ my $c = shift;
+ my $thread = shift;
+ our %stats;
+
+ $stats{requests}++;
+}
+
+sub gather_loglevel_crunch_stats($$) {
+ my $c = shift;
+ my $thread = shift;
+ our %stats;
+
+ $stats{crunches}++;
+
+ if ($c =~ m/^Redirected:/) {
+ # Redirected: http://www.example.org/http://p.p/
+ $stats{'fast-redirections'}++;
+
+ } elsif ($c =~ m/^Blocked:/) {
+ # Blocked: blogger.googleusercontent.com:443
+ $stats{'blocked'}++;
+
+ } elsif ($c =~ m/^Connection timeout:/) {
+ # Connection timeout: http://c.tile.openstreetmap.org/18/136116/87842.png
+ $stats{'connection-timeout'}++;
+
+ } elsif ($c =~ m/^Connection failure:/) {
+ # Connection failure: http://127.0.0.1:8080/
+ $stats{'connection-failure'}++;
+ }
+}
+
+
+sub gather_loglevel_error_stats($$) {
+
+ my $c = shift;
+ my $thread = shift;
+ our %stats;
+ our %thread_data;
+
+ if ($c =~ m/^Empty server or forwarder response received on socket \d+./) {
+
+ # Empty server or forwarder response received on socket 4.
+ $stats{'empty-responses'}++;
+ if ($thread_data{$thread}{'new_connection'}) {
+ $stats{'empty-responses-on-new-connections'}++;
+ } else {
+ $stats{'empty-responses-on-reused-connections'}++;
+ }
+ }
+}
+
+sub gather_loglevel_connect_stats($$) {
+
+ my ($c, $thread) = @_;
+ our %thread_data;
+ our %stats;
+
+ if ($c =~ m/^via ([^\s]+) to: [^\s]+/) {
+
+ # Connect: via 10.0.0.1:8123 to: www.example.org.noconnect
+ $thread_data{$thread}{'forwarder'} = $1; # XXX: is this missue?
+
+ } elsif ($c =~ m/^to ([^\s]*)$/) {
+
+ # Connect: to lists.sourceforge.net:443
+
+ $thread_data{$thread}{'forwarder'} = 'direct connection';
+
+ } elsif ($c =~ m/^Created new connection to/) {
+
+ # Created new connection to www.privoxy.org:80 on socket 11.
+
+ $thread_data{$thread}{'new_connection'} = 1;
+
+ } elsif ($c =~ m/^Reusing server socket \d./ or
+ $c =~ m/^Found reusable socket/) {
+
+ # Reusing server socket 4. Opened for 10.0.0.1.
+ # Found reusable socket 9 for www.privoxy.org:80 in slot 0.
+
+ $thread_data{$thread}{'new_connection'} = 0;
+ $stats{'reused-connections'}++;
+
+ } elsif ($c =~ m/^Closing client socket \d+. .* Requests received: (\d+)\.$/) {
+
+ # Closing client socket 12. Keep-alive: 1. Socket alive: 1. Data available: 0. \
+ # Configuration file change detected: 0. Requests received: 14.
+
+ $stats{'client-requests-on-connection'}{$1}++;
+ $stats{'closed-client-connections'}++;
+ }
+}
+
+sub gather_loglevel_header_stats($$) {
+
+ my ($c, $thread) = @_;
+ our %stats;
+ our %cli_options;
+
+ if ($c =~ m/^A HTTP\/1\.1 response without/ or
+ $c =~ m/^Keeping the server header 'Connection: keep-alive' around./)
+ {
+ # A HTTP/1.1 response without Connection header implies keep-alive.
+ # Keeping the server header 'Connection: keep-alive' around.
+ $stats{'server-keep-alive'}++;
+ }
+}
+
+sub init_stats() {
+ our %stats = (
+ requests => 0,
+ requests_clf => 0,
+ crunches => 0,
+ 'server-keep-alive' => 0,
+ 'reused-connections' => 0,
+ 'empty-responses' => 0,
+ 'empty-responses-on-new-connections' => 0,
+ 'empty-responses-on-reused-connections' => 0,
+ 'fast-redirections' => 0,
+ 'blocked' => 0,
+ 'connection-failure' => 0,
+ 'connection-timeout' => 0,
+ 'reused-connections' => 0,
+ 'server-keep-alive' => 0,
+ 'closed-client-connections' => 0,
+ 'content-size-total' => 0,
+ );
+ $stats{'client-requests-on-connection'}{1} = 0;
+}
+
+sub get_percentage($$) {
+ my $big = shift;
+ my $small = shift;
+
+ # If small is 0 the percentage is always 0%.
+ # Make sure it works even if big is 0 as well.
+ return "0.00%" if ($small eq 0);
+
+ # Prevent division by zero.
+ # XXX: Is this still supposed to be reachable?
+ return "NaN" if ($big eq 0);
+
+ return sprintf("%.2f%%", $small / $big * 100);
+}
+
+sub print_stats() {
+
+ our %stats;
+ our %cli_options;
+ my $new_connections = $stats{requests} - $stats{crunches} - $stats{'reused-connections'};
+ my $client_requests_checksum = 0;
+ my $requests_total;
+
+ if ($stats{requests_clf} && $stats{requests}
+ && $stats{requests_clf} != $stats{requests}) {
+ print "Inconsistent request counts: " . $stats{requests} . "/" . $stats{requests_clf} . "\n";
+ }
+
+ # To get the total number of requests we can use either the number
+ # of Common-Log-Format lines or the number of "Request:" messages.
+ # We prefer the number of CLF lines if available because using
+ # it works when analysing old log files from Privoxy versions before 3.0.29.
+ # In Privoxy 3.0.28 and earlier "Request:" messages excluded
+ # crunched messages.
+ $requests_total = $stats{requests_clf} ? $stats{requests_clf} : $stats{requests};
+
+ if ($requests_total eq 0) {
+ print "No requests yet.\n";
+ return;
+ }
+
+ print "Client requests total: " . $requests_total . "\n";
+ if ($stats{crunches}) {
+ my $outgoing_requests = $requests_total - $stats{crunches};
+ print "Crunches: " . $stats{crunches} . " (" .
+ get_percentage($requests_total, $stats{crunches}) . ")\n";
+ print "Blocks: " . $stats{'blocked'} . " (" .
+ get_percentage($requests_total, $stats{'blocked'}) . ")\n";
+ print "Fast redirections: " . $stats{'fast-redirections'} . " (" .
+ get_percentage($requests_total, $stats{'fast-redirections'}) . ")\n";
+ print "Connection timeouts: " . $stats{'connection-timeout'} . " (" .
+ get_percentage($requests_total, $stats{'connection-timeout'}) . ")\n";
+ print "Connection failures: " . $stats{'connection-failure'} . " (" .
+ get_percentage($requests_total, $stats{'connection-failure'}) . ")\n";
+ print "Outgoing requests: " . $outgoing_requests . " (" .
+ get_percentage($requests_total, $outgoing_requests) . ")\n";
+ } else {
+ print "No crunches detected. Is 'debug 1024' enabled?\n";
+ }
+
+ print "Server keep-alive offers: " . $stats{'server-keep-alive'} . " (" .
+ get_percentage($requests_total, $stats{'server-keep-alive'}) . ")\n";
+ print "New outgoing connections: " . $new_connections . " (" .
+ get_percentage($requests_total, $new_connections) . ")\n";
+ print "Reused connections: " . $stats{'reused-connections'} . " (" .
+ get_percentage($requests_total, $stats{'reused-connections'}) .
+ "; server offers accepted: " .
+ get_percentage($stats{'server-keep-alive'}, $stats{'reused-connections'}) . ")\n";
+ print "Empty responses: " . $stats{'empty-responses'} . " (" .
+ get_percentage($requests_total, $stats{'empty-responses'}) . ")\n";
+ print "Empty responses on new connections: "
+ . $stats{'empty-responses-on-new-connections'} . " (" .
+ get_percentage($requests_total, $stats{'empty-responses-on-new-connections'})
+ . ")\n";
+ print "Empty responses on reused connections: " .
+ $stats{'empty-responses-on-reused-connections'} . " (" .
+ get_percentage($requests_total, $stats{'empty-responses-on-reused-connections'}) .
+ ")\n";
+ print "Client connections: " . $stats{'closed-client-connections'} . "\n";
+ if ($stats{'content-size-total'}) {
+ print "Bytes of content transfered to the client: " . $stats{'content-size-total'} . "\n";
+ }
+ my $lines_printed = 0;
+ print "Client requests per connection distribution:\n";
+ foreach my $client_requests (sort {
+ $stats{'client-requests-on-connection'}{$b} <=> $stats{'client-requests-on-connection'}{$a}}
+ keys %{$stats{'client-requests-on-connection'}
+ })
+ {
+ my $count = $stats{'client-requests-on-connection'}{$client_requests};
+ $client_requests_checksum += $count * $client_requests;
+ if ($cli_options{'show-complete-request-distribution'} or ($lines_printed < 10)) {
+ printf "%8d: %d\n", $count, $client_requests;
+ $lines_printed++;
+ }
+ }
+ unless ($cli_options{'show-complete-request-distribution'}) {
+ printf "Enable --show-complete-request-distribution to get less common numbers as well.\n";
+ }
+ # Due to log rotation we may not have a complete picture for all the requests
+ printf "Improperly accounted requests: ~%d\n", abs($requests_total - $client_requests_checksum);
+
+ if (exists $stats{method}) {
+ print "Method distribution:\n";
+ foreach my $method (sort {$stats{'method'}{$b} <=> $stats{'method'}{$a}} keys %{$stats{'method'}}) {
+ printf "%8d : %-8s\n", $stats{'method'}{$method}, $method;
+ }
+ } else {
+ print "Method distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
+ }
+ if (exists $stats{'http-version'}) {
+ print "Client HTTP versions:\n";
+ foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) {
+ printf "%8d : %-8s\n", $stats{'http-version'}{$http_version}, $http_version;
+ }
+ } else {
+ print "HTTP version distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
+ }
+ if (exists $stats{'status-code'}) {
+ print "HTTP status codes:\n";
+ foreach my $status_code (sort {$stats{'status-code'}{$b} <=> $stats{'status-code'}{$a}} keys %{$stats{'status-code'}}) {
+ printf "%8d : %-8d\n", $stats{'status-code'}{$status_code}, $status_code;
+ }
+ } else {
+ print "Status code distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n";
+ }
+
+ if ($cli_options{'url-statistics-threshold'} == 0) {
+ print "URL statistics are disabled. Increase --url-statistics-threshold to enable them.\n";
+ } else {
+ print "Requested URLs:\n";
+ foreach my $resource (sort {$stats{'resource'}{$b} <=> $stats{'resource'}{$a}} keys %{$stats{'resource'}}) {
+ if ($stats{'resource'}{$resource} < $cli_options{'url-statistics-threshold'}) {
+ print "Skipped statistics for URLs below the treshold.\n";
+ last;
+ }
+ printf "%d : %s\n", $stats{'resource'}{$resource}, $resource;
+ }
+ }
+
+ if ($cli_options{'host-statistics-threshold'} == 0) {
+ print "Host statistics are disabled. Increase --host-statistics-threshold to enable them.\n";
+ } else {
+ print "Requested Hosts:\n";
+ foreach my $host (sort {$stats{'hosts'}{$b} <=> $stats{'hosts'}{$a}} keys %{$stats{'hosts'}}) {
+ if ($stats{'hosts'}{$host} < $cli_options{'host-statistics-threshold'}) {
+ print "Skipped statistics for Hosts below the treshold.\n";
+ last;
+ }
+ printf "%d : %s\n", $stats{'hosts'}{$host}, $host;
+ }
+ }
+}
+
+