X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=54ffb2ccedd81c39bbe795a930129e20d865306f;hp=d088710a056ca79fbc0086e5296cc4467d5ea97f;hb=ee191563e8abe63a4cafad72cf04411b0a28accf;hpb=51bd109fafe6658af115b239819cf45e48f7a62f diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index d088710a..54ffb2cc 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -8,8 +8,6 @@ # # https://www.fabiankeil.de/sourcecode/privoxy-log-parser/ # -# $Id: privoxy-log-parser.pl,v 1.167 2017/02/24 12:00:16 fabiankeil Exp $ -# # TODO: # - LOG_LEVEL_CGI, LOG_LEVEL_ERROR, LOG_LEVEL_WRITE content highlighting # - create fancy statistics @@ -25,7 +23,7 @@ # hash key as input. # - Add --compress and --decompress options. # -# Copyright (c) 2007-2017 Fabian Keil +# Copyright (c) 2007-2021 Fabian Keil # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -45,7 +43,7 @@ use warnings; use Getopt::Long; use constant { - PRIVOXY_LOG_PARSER_VERSION => '0.9', + PRIVOXY_LOG_PARSER_VERSION => '0.9.2', # Feel free to mess with these ... DEFAULT_BACKGROUND => 'black', # Choose registered colour (like 'black') DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black') @@ -54,6 +52,7 @@ use constant { CLI_OPTION_DEFAULT_TO_HTML_OUTPUT => 0, CLI_OPTION_TITLE => 'Privoxy-Log-Parser in da house', + CLI_OPTION_KEEP_DATE => 0, CLI_OPTION_NO_EMBEDDED_CSS => 0, CLI_OPTION_NO_MSECS => 0, CLI_OPTION_NO_SYNTAX_HIGHLIGHTING => 0, @@ -109,11 +108,12 @@ my %h_colours; my $header_highlight_regex = ''; my $html_output_mode; +my $keep_date_mode; my $no_msecs_mode; # XXX: should probably be removed my $shorten_thread_ids; my $line_end; -sub prepare_our_stuff () { +sub prepare_our_stuff() { # Syntax Higlight hash @all_colours = ( @@ -130,6 +130,7 @@ sub prepare_our_stuff () { 'Re-Filter' => 'purple', Connect => 'brown', Request => 'light_cyan', + Tagging => 'purple', CGI => 'light_green', Redirect => 'cyan', Error => 'light_red', @@ -178,6 +179,7 @@ sub prepare_our_stuff () { 'pcrs-delimiter' => 'light_red', 'ignored' => 'light_red', 'action-bits-update' => 'light_red', + 'http-downgrade' => 'light_red', 'configuration-line' => 'red', 'content-type' => 'yellow', 'HOST' => HEADER_DEFAULT_COLOUR, @@ -221,7 +223,7 @@ sub prepare_our_stuff () { init_stats(); } -sub paint_it ($) { +sub paint_it($) { ############################################################### # Takes a colour string and returns an ANSI escape sequence # (unless --no-syntax-highlighting is used). @@ -298,7 +300,7 @@ sub paint_it ($) { return $colour_code; } -sub get_semantic_html_markup ($) { +sub get_semantic_html_markup($) { ############################################################### # Takes a string and returns a span element ############################################################### @@ -316,7 +318,7 @@ sub get_semantic_html_markup ($) { return $code; } -sub cli_option_is_set ($) { +sub cli_option_is_set($) { our %cli_options; my $cli_option = shift; @@ -326,7 +328,7 @@ sub cli_option_is_set ($) { return $cli_options{$cli_option}; } -sub get_html_title () { +sub get_html_title() { our %cli_options; return $cli_options{'title'}; @@ -356,7 +358,7 @@ sub init_css_colours() { ); } -sub get_css_colour ($) { +sub get_css_colour($) { our %css_colours; my $colour = shift; @@ -366,7 +368,7 @@ sub get_css_colour ($) { return '#' . $css_colours{$colour}; } -sub get_css_line ($) { +sub get_css_line($) { my $class = shift; my $css_line; @@ -380,7 +382,7 @@ sub get_css_line ($) { return $css_line; } -sub get_css_line_for_colour ($) { +sub get_css_line_for_colour($) { my $colour = shift; my $css_line; @@ -394,7 +396,7 @@ sub get_css_line_for_colour ($) { } # XXX: Wrong solution -sub get_missing_css_lines () { +sub get_missing_css_lines() { my $css_line; @@ -406,7 +408,7 @@ sub get_missing_css_lines () { return $css_line; } -sub get_css () { +sub get_css() { our %css_colours; #XXX: Wrong solution @@ -435,7 +437,7 @@ sub get_css () { return $css; } -sub print_intro () { +sub print_intro() { my $intro = ''; @@ -453,7 +455,7 @@ sub print_intro () { } } -sub print_outro () { +sub print_outro() { my $outro = ''; @@ -465,11 +467,11 @@ sub print_outro () { } } -sub get_line_end () { +sub get_line_end() { return cli_option_is_set('html-output') ? "
\n" : "\n"; } -sub get_colour_html_markup ($) { +sub get_colour_html_markup($) { ############################################################### # Takes a colour string a span element. XXX: WHAT? # XXX: This function shouldn't be necessary, the @@ -488,21 +490,21 @@ sub get_colour_html_markup ($) { return $code; } -sub default_colours () { +sub default_colours() { # XXX: Properly our $bg_code; return reset_colours(); } -sub show_colours () { +sub show_colours() { # XXX: Implement } -sub reset_colours () { +sub reset_colours() { return ESCAPE . "0m"; } -sub set_background ($){ +sub set_background($) { my $colour = shift; our $bg_code; @@ -525,11 +527,11 @@ sub set_background ($){ } } -sub get_background (){ +sub get_background() { return our $bg_code; } -sub prepare_highlight_hash ($) { +sub prepare_highlight_hash($) { my $ref = shift; foreach my $key (keys %$ref) { @@ -539,7 +541,7 @@ sub prepare_highlight_hash ($) { } } -sub prepare_colour_array ($) { +sub prepare_colour_array($) { my $ref = shift; foreach my $i (0 ... @$ref - 1) { @@ -549,7 +551,7 @@ sub prepare_colour_array ($) { } } -sub found_unknown_content ($) { +sub found_unknown_content($) { my $unknown = shift; my $message; @@ -568,7 +570,7 @@ sub found_unknown_content ($) { die "Unworthy content parser" if PUNISH_MISSING_LOG_KNOWLEDGE_WITH_DEATH; } -sub log_parse_error ($) { +sub log_parse_error($) { my $message = shift; @@ -579,7 +581,7 @@ sub log_parse_error ($) { } } -sub debug_message (@) { +sub debug_message(@) { my @message = @_; print $h{'debug'} . "@message" . $h{'Standard'} . "\n"; @@ -589,7 +591,7 @@ sub debug_message (@) { # highlighter functions that aren't loglevel-specific ################################################################################ -sub h ($) { +sub h($) { # Get highlight marker my $highlight = shift; # XXX: Stupid name; @@ -611,7 +613,7 @@ sub h ($) { return $result; } -sub highlight_known_headers ($) { +sub highlight_known_headers($) { my $content = shift; @@ -626,7 +628,7 @@ sub highlight_known_headers ($) { return $content; } -sub highlight_matched_request_line ($$) { +sub highlight_matched_request_line($$) { my $result = shift; # XXX: Stupid name; my $regex = shift; @@ -636,7 +638,7 @@ sub highlight_matched_request_line ($$) { return $result; } -sub highlight_request_line ($) { +sub highlight_request_line($) { my $rl = shift; my ($method, $url, $http_version); @@ -677,7 +679,7 @@ sub highlight_request_line ($) { return $rl; } -sub highlight_response_line ($) { +sub highlight_response_line($) { my $rl = shift; my ($http_version, $status_code, $status_message); @@ -704,7 +706,7 @@ sub highlight_response_line ($) { return $rl; } -sub highlight_matched_url ($$) { +sub highlight_matched_url($$) { my $result = shift; # XXX: Stupid name; my $regex = shift; @@ -719,7 +721,7 @@ sub highlight_matched_url ($$) { return $result; } -sub highlight_matched_host ($$) { +sub highlight_matched_host($$) { my ($result, $regex) = @_; # XXX: result ist stupid name; @@ -730,7 +732,7 @@ sub highlight_matched_host ($$) { return $result; } -sub highlight_matched_pattern ($$$) { +sub highlight_matched_pattern($$$) { my $result = shift; # XXX: Stupid name; my $key = shift; @@ -745,7 +747,7 @@ sub highlight_matched_pattern ($$$) { return $result; } -sub highlight_matched_path ($$) { +sub highlight_matched_path($$) { my $result = shift; # XXX: Stupid name; my $regex = shift; @@ -757,7 +759,7 @@ sub highlight_matched_path ($$) { return $result; } -sub highlight_url ($) { +sub highlight_url($) { my $url = shift; @@ -774,7 +776,7 @@ sub highlight_url ($) { return $url; } -sub update_header_highlight_regex ($) { +sub update_header_highlight_regex($) { my $header = shift; my $headers = join ('|', keys %header_colours); @@ -787,7 +789,7 @@ sub update_header_highlight_regex ($) { # loglevel-specific highlighter functions ################################################################################ -sub handle_loglevel_header ($) { +sub handle_loglevel_header($) { my $c = shift; @@ -1039,7 +1041,7 @@ sub handle_loglevel_header ($) { return $c; } -sub handle_loglevel_re_filter ($) { +sub handle_loglevel_re_filter($) { my $content = shift; my $c = $content; @@ -1063,7 +1065,7 @@ sub handle_loglevel_re_filter ($) { return ''; } - $c =~ s@(?<=\(size )(\d+)\)(?= with)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=\(size )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=\(new size )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=produced )(\d+)(?= hits)@$h{'Number'}$1$h{'Standard'}@; @@ -1076,6 +1078,17 @@ sub handle_loglevel_re_filter ($) { $c .= $req{$t}{'content_size_change'} . $h{'Standard'} . ")"; $content = $c; + } elsif ($c =~ m/^filtering request body from client /) { + + # filtering request body from client 127.0.0.1 (size 958) with 'null-filter' produced 0 hits (new size 958). + + $c =~ s@(?<=from client )([^\s]+)@$h{'ip-address'}$1$h{'Standard'}@; + $c =~ s@(?<=\(size )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@([^\s]+?)(\'? produced)@$h{'filter'}$1$h{'Standard'}$2@; + $c =~ s@(?<=\(new size )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=produced )(\d+)(?= hits)@$h{'Number'}$1$h{'Standard'}@; + $content = $c; + } elsif ($c =~ /\.{3}$/ and $c =~ m/^(?:re_)?filtering \'?(.*?)\'? \(size (\d*)\) with (?:filter )?\'?([^\s]*?)\'? ?\.{3}$/) { @@ -1212,6 +1225,12 @@ sub handle_loglevel_re_filter ($) { return '' unless SHOW_FILTER_READIN_IN; + } elsif ($c =~ m/^Decompression didn't result/) { + + # Decompression didn't result in any content. + + # Nothing to highlight. + } else { found_unknown_content($content); @@ -1221,7 +1240,42 @@ sub handle_loglevel_re_filter ($) { return $content; } -sub handle_loglevel_redirect ($) { +sub handle_loglevel_tagging($) { + + my $c = shift; + + if ($c =~ /^Tagger \'([^\']*)\' added tag \'([^\']*)\'/ or + $c =~ m/^Adding tag \'([^\']*)\' created by header tagger \'([^\']*)\'/) { + + # Adding tag 'GET request' created by header tagger 'method-man' (XXX: no longer used) + # Tagger 'revalidation' added tag 'REVALIDATION-REQUEST'. No action bit update necessary. + # Tagger 'revalidation' added tag 'REVALIDATION-REQUEST'. Action bits updated accordingly. + + # XXX: Save tag and tagger + + $c =~ s@(?<=^Tagger \')([^\']*)@$h{'tagger'}$1$h{'Standard'}@; + $c =~ s@(?<=added tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@; + $c =~ s@(?<=Action bits )(updated)@$h{'action-bits-update'}$1$h{'Standard'}@; + + } elsif ($c =~ /^Enlisting tag/) { + + # Enlisting tag 'forward-directly' for client 127.0.0.1. + + $c =~ s@(?<=tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@; + $c = highlight_matched_host($c, '[^\s]+(?=\.$)'); + + } elsif ($c =~ /^Client tag/) { + + # Client tag 'forward-directly' matches + + $c =~ s@(?<=tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@; + + } + + return $c; +} + +sub handle_loglevel_redirect($) { my $c = shift; @@ -1278,6 +1332,17 @@ sub handle_loglevel_redirect ($) { # Percent-encoding redirect URL: http://www.example.org/\x02 $c = highlight_matched_url($c, '(?<=redirect URL: ).*'); + } elsif ($c =~ m/^Rewrite detected:/) { + + # Rewrite detected: GET http://10.0.0.2:88/blah.txt HTTP/1.1 + # Rewrite detected: GET https://www.electrobsd.org/CommonJS/ajax/libs/jquery/3.4.1/jquery.min.js HTTP/1.1 + $c = highlight_matched_request_line($c, '(?<=^Rewrite detected: ).*'); + + } elsif ($c =~ m/^Rewritten request line results in downgrade to http/) { + + # Rewritten request line results in downgrade to http + $c =~ s@(downgrade)@$h{'http-downgrade'}$1$h{'Standard'}@; + } else { found_unknown_content($c); @@ -1287,7 +1352,7 @@ sub handle_loglevel_redirect ($) { return $c; } -sub handle_loglevel_gif_deanimate ($) { +sub handle_loglevel_gif_deanimate($) { my $content = shift; @@ -1333,7 +1398,7 @@ sub handle_loglevel_gif_deanimate ($) { return $content; } -sub handle_loglevel_request ($) { +sub handle_loglevel_request($) { my $content = shift; @@ -1368,7 +1433,7 @@ sub handle_loglevel_request ($) { return $content; } -sub handle_loglevel_crunch ($) { +sub handle_loglevel_crunch($) { my $content = shift; @@ -1383,6 +1448,11 @@ sub handle_loglevel_crunch ($) { # [...]&filter... [too long, truncated] $content = highlight_matched_pattern($content, 'request_', '^.*(?=\.\.\. \[too long, truncated\]$)'); + } elsif ($content =~ m/Certificate error:/) { + + # Certificate error: ASN date error, current date after: https://expired.badssl.com/ + $content = highlight_matched_pattern($content, 'request_', 'https://.*'); + } else { # Blocked: http://ads.example.org/ @@ -1392,7 +1462,7 @@ sub handle_loglevel_crunch ($) { return $content; } -sub handle_loglevel_connect ($) { +sub handle_loglevel_connect($) { my $c = shift; @@ -1612,7 +1682,10 @@ sub handle_loglevel_connect ($) { # Connection from 81.163.28.218 dropped due to ACL # Rejecting connection from 178.63.152.227. Maximum number of connections reached. - $c =~ s@(?<=onnection from )((?:\d+\.?){3}\d+)@$h{'Number'}$1$h{'Standard'}@; + # Connection from 192.168.2.1 on 127.0.1.1:8118 (socket 3) dropped due to ACL + $c = highlight_matched_host($c, '(?<=onnection from )[\d.:]+'); + $c = highlight_matched_host($c, '(?<=on )[\d.:]+'); + $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^(?:Reusing|Closing) server socket / or $c =~ m/^No additional client request/) { @@ -1626,6 +1699,7 @@ sub handle_loglevel_connect ($) { # Reusing server socket 7 connected to www.privoxy.org. Total requests: 2. # Closing server socket 6 connected to d.asset.soup.io. Keep-alive: 0.\ # Tainted: 1. Socket alive: 1. Timeout: 60. Configuration file change detected: 0. + # Reusing server socket 35 connected to nl.wikipedia.org. Requests already sent: 5. $c =~ s@(?<= socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c = highlight_matched_host($c, '(?<=for )[^\s]+(?=\.)'); @@ -1633,6 +1707,7 @@ sub handle_loglevel_connect ($) { for my $number_pattern ('requests', 'Keep-alive', 'Tainted', ' alive', 'Timeout', 'detected') { $c = highlight_matched_pattern($c, 'Number', '(?<='. $number_pattern . ': )\d+'); } + $c =~ s@(?<=already sent: )(\d+)@$h{'Number'}$1$h{'Standard'}@; } elsif ($c =~ m/^Connected to /) { @@ -1737,6 +1812,59 @@ sub handle_loglevel_connect ($) { $c =~ s@(?<=Shifting )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=by )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Flushed (\d+) bytes of request body while expecting (\d+)/) { + + # Flushed 30 bytes of request body while expecting 30 + $c =~ s@(?<=Flushed )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=expecting )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Performing the TLS\/SSL handshake with client. Hash of host:/) { + + # Performing the TLS/SSL handshake with client. Hash of host: bab5296b25e256c7b06b92b17b56bcae + $c = highlight_matched_host($c, '(?<=Hash of host: ).+'); + + } elsif ($c =~ m/^Forwarding \d+ bytes of encrypted POST data/) { + + # Forwarding 1954 bytes of encrypted POST data + $c =~ s@(?<=Forwarding )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Forwarded the last \d+ bytes/) { + + # Forwarded the last 1954 bytes + $c =~ s@(?<=the last )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Waiting for the next client connection. Currently active threads:/) { + + # Waiting for the next client connection. Currently active threads: 30 + $c =~ s@(?<=threads: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Data arrived in time on client socket/) { + + # Data arrived in time on client socket 6. Requests so far: 3 + $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=Requests so far: )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Dropping the client connection on socket/) { + + # Dropping the client connection on socket 71. The server connection has not been established yet. + $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^The client socket \d+ has become unusable while the server/) { + + # The client socket 16 has become unusable while the server socket 24 is still open. + $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=server socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^The last \d+ bytes of the request body have been read/) { + + # The last 12078 bytes of the request body have been read + $c =~ s@(?<=The last )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Flushed \d+ bytes of request body/) { + + # Flushed 3153 bytes of request body + $c =~ s@(?<=Flushed )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or @@ -1778,7 +1906,7 @@ sub handle_loglevel_connect ($) { } -sub handle_loglevel_info ($) { +sub handle_loglevel_info($) { my $c = shift; @@ -1904,7 +2032,7 @@ sub handle_loglevel_info ($) { return $c; } -sub handle_loglevel_cgi ($) { +sub handle_loglevel_cgi($) { my $c = shift; @@ -1926,7 +2054,7 @@ sub handle_loglevel_cgi ($) { return $c; } -sub handle_loglevel_force ($) { +sub handle_loglevel_force($) { my $c = shift; @@ -1950,7 +2078,7 @@ sub handle_loglevel_force ($) { return $c; } -sub handle_loglevel_error ($) { +sub handle_loglevel_error($) { my $c = shift; @@ -1970,6 +2098,19 @@ sub handle_loglevel_error ($) { # Didn't receive data in time: a.fsdn.com:443 $c =~ s@(?<=in time: )(.*)@$h{'destination'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Sending data on socket \d+ over TLS/) { + + # Sending data on socket 33 over TLS/SSL failed: no TLS/SSL errors detected + $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Chunk size \d+ exceeds buffered data left/) { + + # Chunk size 291 exceeds buffered data left. Already digested 69894 of 69957 buffered bytes. + $c =~ s@(?<=size )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=digested )(\d+)@$h{'Number'}$1$h{'Standard'}@; + $c =~ s@(?<=of )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } # XXX: There are probably more messages that deserve highlighting. @@ -1978,11 +2119,54 @@ sub handle_loglevel_error ($) { } -sub handle_loglevel_ignore ($) { +sub handle_loglevel_ignore($) { return shift; } -sub gather_loglevel_request_stats ($$) { +sub gather_loglevel_clf_stats($) { + + my $content = shift; + my ($method, $resource, $http_version, $status_code, $size); + our %stats; + our %cli_options; + + # +0200] "GET https://www.youtube.com/watch?v=JmcA9LIIXWw HTTP/1.1" 200 68004 + # +0200] "VERSION-CONTROL http://p.p/ HTTP/1.1" 200 2787 + $content =~ m/^[+-]\d{4}\] "([^ ]+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/; + $method = $1; + $resource = $2; + $http_version = $3; + $status_code = $4; + $size = $5; + + $stats{requests_clf}++; + + unless (defined $method) { + # +0200] "Invalid request" 400 0 + return if ($content =~ m/^[+-]\d{4}\] "Invalid request"/); + # +0100] "GET https://securepubads.g.doubleclick.net/gampad/ads?gd[...]... [too long, truncated] + if ($content =~ m/\[too long, truncated\]$/) { + print("Skipped LOG_LEVEL_CLF message that got truncated by Privoxy. Statistics will be inprecise.\n"); + } else { + print("Failed to parse: $content\n"); + } + return; + } + $stats{'method'}{$method}++; + if ($cli_options{'url-statistics-threshold'} != 0) { + $stats{'resource'}{$resource}++; + } + $stats{'http-version'}{$http_version}++; + + if ($cli_options{'host-statistics-threshold'} != 0) { + $resource =~ m@(?:https?://)?([^/]+)/?@; + $stats{'hosts'}{$1}++; + } + $stats{'content-size-total'} += $size; + $stats{'status-code'}{$status_code}++; +} + +sub gather_loglevel_request_stats($$) { my $c = shift; my $thread = shift; our %stats; @@ -1990,12 +2174,11 @@ sub gather_loglevel_request_stats ($$) { $stats{requests}++; } -sub gather_loglevel_crunch_stats ($$) { +sub gather_loglevel_crunch_stats($$) { my $c = shift; my $thread = shift; our %stats; - $stats{requests}++; $stats{crunches}++; if ($c =~ m/^Redirected:/) { @@ -2017,7 +2200,7 @@ sub gather_loglevel_crunch_stats ($$) { } -sub gather_loglevel_error_stats ($$) { +sub gather_loglevel_error_stats($$) { my $c = shift; my $thread = shift; @@ -2036,7 +2219,7 @@ sub gather_loglevel_error_stats ($$) { } } -sub gather_loglevel_connect_stats ($$) { +sub gather_loglevel_connect_stats($$) { my ($c, $thread) = @_; our %thread_data; @@ -2078,10 +2261,11 @@ sub gather_loglevel_connect_stats ($$) { } } -sub gather_loglevel_header_stats ($$) { +sub gather_loglevel_header_stats($$) { my ($c, $thread) = @_; our %stats; + our %cli_options; if ($c =~ m/^A HTTP\/1\.1 response without/ or $c =~ m/^Keeping the server header 'Connection: keep-alive' around./) @@ -2089,24 +2273,13 @@ sub gather_loglevel_header_stats ($$) { # A HTTP/1.1 response without Connection header implies keep-alive. # Keeping the server header 'Connection: keep-alive' around. $stats{'server-keep-alive'}++; - - } elsif ($c =~ m/^scan: ((\w+) (.+) (HTTP\/\d\.\d))/) { - - # scan: HTTP/1.1 200 OK - $stats{'method'}{$2}++; - $stats{'resource'}{$3}++; - $stats{'http-version'}{$4}++; - - } elsif ($c =~ m/^scan: Host: ([^\s]+)/) { - - # scan: Host: p.p - $stats{'hosts'}{$1}++; } } -sub init_stats () { +sub init_stats() { our %stats = ( requests => 0, + requests_clf => 0, crunches => 0, 'server-keep-alive' => 0, 'reused-connections' => 0, @@ -2115,14 +2288,17 @@ sub init_stats () { 'empty-responses-on-reused-connections' => 0, 'fast-redirections' => 0, 'blocked' => 0, + 'connection-failure' => 0, + 'connection-timeout' => 0, 'reused-connections' => 0, 'server-keep-alive' => 0, 'closed-client-connections' => 0, + 'content-size-total' => 0, ); $stats{'client-requests-on-connection'}{1} = 0; } -sub get_percentage ($$) { +sub get_percentage($$) { my $big = shift; my $small = shift; @@ -2137,52 +2313,73 @@ sub get_percentage ($$) { return sprintf("%.2f%%", $small / $big * 100); } -sub print_stats () { +sub print_stats() { our %stats; our %cli_options; my $new_connections = $stats{requests} - $stats{crunches} - $stats{'reused-connections'}; - my $outgoing_requests = $stats{requests} - $stats{crunches}; my $client_requests_checksum = 0; + my $requests_total; + + if ($stats{requests_clf} && $stats{requests} + && $stats{requests_clf} != $stats{requests}) { + print "Inconsistent request counts: " . $stats{requests} . "/" . $stats{requests_clf} . "\n"; + } + + # To get the total number of requests we can use either the number + # of Common-Log-Format lines or the number of "Request:" messages. + # We prefer the number of CLF lines if available because using + # it works when analysing old log files from Privoxy versions before 3.0.29. + # In Privoxy 3.0.28 and earlier "Request:" messages excluded + # crunched messages. + $requests_total = $stats{requests_clf} ? $stats{requests_clf} : $stats{requests}; - if ($stats{requests} eq 0) { + if ($requests_total eq 0) { print "No requests yet.\n"; return; } - print "Client requests total: " . $stats{requests} . "\n"; - print "Crunches: " . $stats{crunches} . " (" . - get_percentage($stats{requests}, $stats{crunches}) . ")\n"; - print "Blocks: " . $stats{'blocked'} . " (" . - get_percentage($stats{requests}, $stats{'blocked'}) . ")\n"; - print "Fast redirections: " . $stats{'fast-redirections'} . " (" . - get_percentage($stats{requests}, $stats{'fast-redirections'}) . ")\n"; - print "Connection timeouts: " . $stats{'connection-timeout'} . " (" . - get_percentage($stats{requests}, $stats{'connection-timeout'}) . ")\n"; - print "Connection failures: " . $stats{'connection-failure'} . " (" . - get_percentage($stats{requests}, $stats{'connection-failure'}) . ")\n"; - print "Outgoing requests: " . $outgoing_requests . " (" . - get_percentage($stats{requests}, $outgoing_requests) . ")\n"; + print "Client requests total: " . $requests_total . "\n"; + if ($stats{crunches}) { + my $outgoing_requests = $requests_total - $stats{crunches}; + print "Crunches: " . $stats{crunches} . " (" . + get_percentage($requests_total, $stats{crunches}) . ")\n"; + print "Blocks: " . $stats{'blocked'} . " (" . + get_percentage($requests_total, $stats{'blocked'}) . ")\n"; + print "Fast redirections: " . $stats{'fast-redirections'} . " (" . + get_percentage($requests_total, $stats{'fast-redirections'}) . ")\n"; + print "Connection timeouts: " . $stats{'connection-timeout'} . " (" . + get_percentage($requests_total, $stats{'connection-timeout'}) . ")\n"; + print "Connection failures: " . $stats{'connection-failure'} . " (" . + get_percentage($requests_total, $stats{'connection-failure'}) . ")\n"; + print "Outgoing requests: " . $outgoing_requests . " (" . + get_percentage($requests_total, $outgoing_requests) . ")\n"; + } else { + print "No crunches detected. Is 'debug 1024' enabled?\n"; + } + print "Server keep-alive offers: " . $stats{'server-keep-alive'} . " (" . - get_percentage($stats{requests}, $stats{'server-keep-alive'}) . ")\n"; + get_percentage($requests_total, $stats{'server-keep-alive'}) . ")\n"; print "New outgoing connections: " . $new_connections . " (" . - get_percentage($stats{requests}, $new_connections) . ")\n"; + get_percentage($requests_total, $new_connections) . ")\n"; print "Reused connections: " . $stats{'reused-connections'} . " (" . - get_percentage($stats{requests}, $stats{'reused-connections'}) . + get_percentage($requests_total, $stats{'reused-connections'}) . "; server offers accepted: " . get_percentage($stats{'server-keep-alive'}, $stats{'reused-connections'}) . ")\n"; print "Empty responses: " . $stats{'empty-responses'} . " (" . - get_percentage($stats{requests}, $stats{'empty-responses'}) . ")\n"; + get_percentage($requests_total, $stats{'empty-responses'}) . ")\n"; print "Empty responses on new connections: " . $stats{'empty-responses-on-new-connections'} . " (" . - get_percentage($stats{requests}, $stats{'empty-responses-on-new-connections'}) + get_percentage($requests_total, $stats{'empty-responses-on-new-connections'}) . ")\n"; print "Empty responses on reused connections: " . $stats{'empty-responses-on-reused-connections'} . " (" . - get_percentage($stats{requests}, $stats{'empty-responses-on-reused-connections'}) . + get_percentage($requests_total, $stats{'empty-responses-on-reused-connections'}) . ")\n"; print "Client connections: " . $stats{'closed-client-connections'} . "\n"; - + if ($stats{'content-size-total'}) { + print "Bytes of content transfered to the client: " . $stats{'content-size-total'} . "\n"; + } my $lines_printed = 0; print "Client requests per connection distribution:\n"; foreach my $client_requests (sort { @@ -2201,7 +2398,7 @@ sub print_stats () { printf "Enable --show-complete-request-distribution to get less common numbers as well.\n"; } # Due to log rotation we may not have a complete picture for all the requests - printf "Improperly accounted requests: ~%d\n", abs($stats{requests} - $client_requests_checksum); + printf "Improperly accounted requests: ~%d\n", abs($requests_total - $client_requests_checksum); if (exists $stats{method}) { print "Method distribution:\n"; @@ -2209,11 +2406,23 @@ sub print_stats () { printf "%8d : %-8s\n", $stats{'method'}{$method}, $method; } } else { - print "Method distribution unknown. No response headers parsed yet. Is 'debug 8' enabled?\n"; + print "Method distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n"; + } + if (exists $stats{'http-version'}) { + print "Client HTTP versions:\n"; + foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) { + printf "%8d : %-8s\n", $stats{'http-version'}{$http_version}, $http_version; + } + } else { + print "HTTP version distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n"; } - print "Client HTTP versions:\n"; - foreach my $http_version (sort {$stats{'http-version'}{$b} <=> $stats{'http-version'}{$a}} keys %{$stats{'http-version'}}) { - printf "%d : %s\n", $stats{'http-version'}{$http_version}, $http_version; + if (exists $stats{'status-code'}) { + print "HTTP status codes:\n"; + foreach my $status_code (sort {$stats{'status-code'}{$b} <=> $stats{'status-code'}{$a}} keys %{$stats{'status-code'}}) { + printf "%8d : %-8d\n", $stats{'status-code'}{$status_code}, $status_code; + } + } else { + print "Status code distribution unknown. No CLF message parsed yet. Is 'debug 512' enabled?\n"; } if ($cli_options{'url-statistics-threshold'} == 0) { @@ -2248,7 +2457,7 @@ sub print_stats () { # Functions that actually print stuff ################################################################################ -sub print_clf_message () { +sub print_clf_message() { our ($ip, $timestamp, $request_line, $status_code, $size); my $output = ''; @@ -2270,15 +2479,17 @@ sub print_clf_message () { print $output; } -sub print_non_clf_message ($) { +sub print_non_clf_message($) { my $content = shift; + my $date_string = $keep_date_mode ? $req{$t}{'day'} . ' ' : ''; my $msec_string = $no_msecs_mode ? '' : '.' . $req{$t}{'msecs'}; my $line_start = $html_output_mode ? '' : $h{"Standard"}; return if DEBUG_SUPPRESS_LOG_MESSAGES; print $line_start + . $date_string . $time_colours[$time_colour_index % 2] . $req{$t}{'time-stamp'} . $msec_string @@ -2295,7 +2506,7 @@ sub print_non_clf_message ($) { . $line_end; } -sub shorten_thread_id ($) { +sub shorten_thread_id($) { my $thread_id = shift; @@ -2309,7 +2520,7 @@ sub shorten_thread_id ($) { return $short_thread_ids{$thread_id} } -sub parse_loop () { +sub parse_loop() { my ($day, $time_stamp, $thread, $log_level, $content, $c, $msecs); my $last_msecs = 0; @@ -2335,6 +2546,7 @@ sub parse_loop () { 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, 'Received' => \&handle_loglevel_ignore, + 'Tagging' => \&handle_loglevel_tagging, 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore, ); @@ -2409,7 +2621,7 @@ sub parse_loop () { } } -sub stats_loop () { +sub stats_loop() { my ($day, $time_stamp, $msecs, $thread, $log_level, $content); my $strict_checks = cli_option_is_set('strict-checks'); @@ -2432,15 +2644,20 @@ sub stats_loop () { 'Redirect:' => \&handle_loglevel_ignore, 'Unknown log level:' => \&handle_loglevel_ignore, 'Writing:' => \&handle_loglevel_ignore, + 'Tagging:' => \&handle_loglevel_ignore, ); while (<>) { (undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5); - # Skip LOG_LEVEL_CLF - next if (not defined($log_level) or $time_stamp eq "-"); - if (defined($log_level_handlers{$log_level})) { + next if (not defined($log_level)); + + if ($time_stamp eq "-") { + + gather_loglevel_clf_stats($content); + + } elsif (defined($log_level_handlers{$log_level})) { $content = $log_level_handlers{$log_level}($content, $thread); @@ -2487,11 +2704,12 @@ sub VersionMessage { print $version_message; } -sub get_cli_options () { +sub get_cli_options() { our %cli_options = ( 'html-output' => CLI_OPTION_DEFAULT_TO_HTML_OUTPUT, 'title' => CLI_OPTION_TITLE, + 'keep-date' => CLI_OPTION_KEEP_DATE, 'no-syntax-highlighting' => CLI_OPTION_NO_SYNTAX_HIGHLIGHTING, 'no-embedded-css' => CLI_OPTION_NO_EMBEDDED_CSS, 'no-msecs' => CLI_OPTION_NO_MSECS, @@ -2508,6 +2726,7 @@ sub get_cli_options () { GetOptions ( 'html-output' => \$cli_options{'html-output'}, 'title' => \$cli_options{'title'}, + 'keep-date' => \$cli_options{'keep-date'}, 'no-syntax-highlighting' => \$cli_options{'no-syntax-highlighting'}, 'no-embedded-css' => \$cli_options{'no-embedded-css'}, 'no-msecs' => \$cli_options{'no-msecs'}, @@ -2525,11 +2744,12 @@ sub get_cli_options () { $html_output_mode = cli_option_is_set('html-output'); $no_msecs_mode = cli_option_is_set('no-msecs'); + $keep_date_mode = cli_option_is_set('keep-date'); $shorten_thread_ids = cli_option_is_set('shorten-thread-ids'); $line_end = get_line_end(); } -sub help () { +sub help() { our %cli_options; @@ -2560,7 +2780,7 @@ see "perldoc $0" for more information ################################################################################ # main ################################################################################ -sub main () { +sub main() { get_cli_options(); set_background(DEFAULT_BACKGROUND); @@ -2624,7 +2844,10 @@ omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is ac This option is only intended to make embedding log excerpts in web pages easier. It does not escape any input! -[B<--no-msecs>] Don't expect milisecond resolution +[B<--keep-date>] Don't remove the date when printing highlighted log messages. +Useful when parsing multiple log files at once. + +[B<--no-msecs>] Don't expect millisecond resolution [B<--no-syntax-highlighting>] Disable syntax-highlighting. Useful when the filtered output is piped into less in which case the ANSI control @@ -2713,7 +2936,7 @@ Many settings can't be controlled through command line options yet. =head1 SEE ALSO -privoxy(1) +privoxy(8) =head1 AUTHOR