wolfSSL: Use LIBWOLFSSL_VERSION_HEX to decide whether or not to use WOLFSSL_X509_V_OK
[privoxy.git] / tools / privoxy-log-parser.pl
index accc39f..ae18c57 100755 (executable)
@@ -23,7 +23,7 @@
 #         hash key as input.
 #       - Add --compress and --decompress options.
 #
-# Copyright (c) 2007-2021 Fabian Keil <fk@fabiankeil.de>
+# Copyright (c) 2007-2024 Fabian Keil <fk@fabiankeil.de>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
@@ -43,7 +43,7 @@ use warnings;
 use Getopt::Long;
 
 use constant {
-    PRIVOXY_LOG_PARSER_VERSION => '0.9.4',
+    PRIVOXY_LOG_PARSER_VERSION => '0.9.5',
     # Feel free to mess with these ...
     DEFAULT_BACKGROUND => 'black',  # Choose registered colour (like 'black')
     DEFAULT_TEXT_COLOUR => 'white', # Choose registered colour (like 'black')
@@ -64,6 +64,7 @@ use constant {
     CLI_OPTION_STRICT_CHECKS => 0,
     CLI_OPTION_UNBREAK_LINES_ONLY => 0,
     CLI_OPTION_URL_STATISTICS_THRESHOLD => 0,
+    CLI_OPTION_PASSED_REQUEST_STATISTICS_THRESHOLD => 0,
     CLI_OPTION_HOST_STATISTICS_THRESHOLD => 0,
     CLI_OPTION_SHOW_COMPLETE_REQUEST_DISTRIBUTION => 0,
 
@@ -185,6 +186,8 @@ sub prepare_our_stuff() {
         'configuration-line' => 'red',
         'content-type'       => 'yellow',
         'HOST'               => HEADER_DEFAULT_COLOUR,
+        'tls-version'        => 'pink',
+        'cipher-suite'       => 'light_cyan',
     );
 
     %h_colours = %h;
@@ -1457,7 +1460,8 @@ sub handle_loglevel_crunch($) {
 
     # Highlight crunch reason
     foreach my $reason (keys %reason_colours) {
-        $content =~ s@($reason)@$reason_colours{$reason}$1$h{'Standard'}@g;
+        # Crunch: Blocked: https://capture.condenastdigital.com/track?_o=cne&[...]&dim2=%7B%22adBlocked%[...]
+        $content =~ s@($reason)@$reason_colours{$reason}$1$h{'Standard'}@;
     }
 
     if ($content =~ m/\[too long, truncated\]$/) {
@@ -1570,6 +1574,11 @@ sub handle_loglevel_connect($) {
 
         $c =~ s@(?<=accept failed: )(.*)@$h{'Error'}$1$h{'Standard'}@;
 
+    } elsif ($c =~ m/^Failed to accept\(\) incoming connection:/) {
+
+        # Failed to accept() incoming connection: Software caused connection abort
+        $c =~ s@(?<=connection: )(.*)@$h{'Error'}$1$h{'Standard'}@;
+
     } elsif ($c =~ m/^Overriding forwarding settings/) {
 
         # Overriding forwarding settings based on 'forward 10.0.0.1:8123'
@@ -1624,9 +1633,10 @@ sub handle_loglevel_connect($) {
         $c = highlight_matched_host($c, '(?<=for )[^\s]+');
         $c =~ s@(?<=in slot )(\d+)@$h{'Number'}$1$h{'Standard'}@;
 
-    } elsif ($c =~ m/^Socket/) {
+    } elsif ($c =~ m/^Socket \d+ (already|closed)/) {
 
         # Socket 16 already forgotten or never remembered.
+        # Socket 9 closed while waiting for client headers
         $c =~ s@(?<=Socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
 
     } elsif ($c =~ m/^The connection to/) {
@@ -1643,7 +1653,7 @@ sub handle_loglevel_connect($) {
         $c =~ s@(?<=Assumed latency: )(\d+)@$h{'Number'}$1$h{'Standard'}@;
 
     } elsif ($c =~ m/^Stopped waiting for the request line/ or
-             $c =~ m/^No request line on socket \d received in time/ or
+             $c =~ m/^No request line on socket \d+ received in time/ or
              $c =~ m/^The client side of the connection on socket \d/) {
 
         # Stopped waiting for the request line. Timeout: 121.
@@ -1800,8 +1810,13 @@ sub handle_loglevel_connect($) {
     } elsif ($c =~ m/^Optimistically sending /) {
 
         # Optimistically sending 318 bytes of client headers intended for www.privoxy.org
+        # Optimistically sending 318 bytes of client headers intended for www.privoxy.org.
         $c =~ s@(?<=sending )(\d+)@$h{'Number'}$1$h{'Standard'}@;
-        $c = highlight_matched_host($c, '(?<=for )[^\s]+');
+        if ($c =~ /\.$/) {
+            $c = highlight_matched_host($c, '[^\s]+(?=\.)');
+        } else {
+            $c = highlight_matched_host($c, '(?<=for )[^\s]+');
+        }
 
     } elsif ($c =~ m/^Stopping to watch the client socket/) {
 
@@ -1902,6 +1917,70 @@ sub handle_loglevel_connect($) {
         # Client socket 7 is no longer usable. The server socket has been closed.
         $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
 
+    } elsif ($c =~ m/^Socket timeout \d+ reached/) {
+
+        # Socket timeout 3 reached: http://127.0.0.1:20000/no-filter/chunked-content/36
+        $c =~ s@(?<=timeout )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+        $c = highlight_matched_url($c, "(?<=reached: ).*")
+
+    } elsif ($c =~ m/^Prepared to read up to /) {
+
+        # Prepared to read up to 157 bytes of encrypted request body from the client.
+        $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Forwarding \d+ bytes /) {
+
+        # Forwarding 157 bytes of encrypted request body.
+        $c =~ s@(?<=Forwarding )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Buffering encrypted client body/) {
+
+        # Buffering encrypted client body. Prepared to read up to 2236 bytes.
+        $c =~ s@(?<=up to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^The last \d+ bytes of the encrypted request body have been read/) {
+
+        # The last 6945 bytes of the encrypted request body have been read.
+        $c =~ s@(?<=The last )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Reducing the chunk offset from/) {
+
+        # Reducing the chunk offset from 1096654 to 32704 after discarding 1063950 bytes to make room in the buffer.
+        # Reducing the chunk offset from 16219 to 128 after flushing 16091 bytes.
+        $c =~ s@(?<=\d to )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+        $c =~ s@(?<=offset from )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+        $c =~ s@(?<=after discarding )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+        $c =~ s@(?<=after flushing )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Client socket \d+ is no longer usable/) {
+
+        # Client socket 21 is no longer usable. The server socket has been closed.
+        $c =~ s@(?<=Client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^(Client|Server) successfully connected over/) {
+
+        # Server successfully connected over TLSv1.3 (TLS_AES_256_GCM_SHA384).
+        # Client successfully connected over TLSv1.3 (TLS_AES_128_GCM_SHA256).
+        $c =~ s@(?<=connected over )(TLSv\d\.\d)@$h{'tls-version'}$1$h{'Standard'}@;
+        $c =~ s@(?<=\()([^)]+)@$h{'cipher-suite'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Couldn't deliver the error message for/) {
+
+        # Couldn't deliver the error message for https://m.media-amazon.com/[...] through client socket 18 using TLS/SSL
+        $c = highlight_matched_url($c, "(?<=error message for )[^ ]*");
+        $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Keeping chunk offset at/) {
+
+        # Keeping chunk offset at 0 despite flushing 31 bytes.
+        $c =~ s@(?<=offset at )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+        $c =~ s@(?<=flushing )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^Not shutting down client connection on/) {
+
+        # Not shutting down client connection on socket 8. The socket is no longer alive.
+        $c =~ s@(?<=socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
     } elsif ($c =~ m/^Looks like we / or
              $c =~ m/^Unsetting keep-alive flag/ or
              $c =~ m/^No connections to wait/ or
@@ -2148,6 +2227,10 @@ sub handle_loglevel_error($) {
         $c =~ s@(?<=digested )(\d+)@$h{'Number'}$1$h{'Standard'}@;
         $c =~ s@(?<=of )(\d+)@$h{'Number'}$1$h{'Standard'}@;
 
+    } elsif ($c =~ m/^The socks connection timed out after/) {
+
+        # The socks connection timed out after 60 seconds.
+        $c =~ s@(?<=after )(\d+)@$h{'Number'}$1$h{'Standard'}@;
     }
 
     # XXX: There are probably more messages that deserve highlighting.
@@ -2155,6 +2238,41 @@ sub handle_loglevel_error($) {
     return $c;
 }
 
+sub handle_loglevel_received($) {
+
+    my $c = shift;
+
+    if ($c =~ m/^TLS from socket/) {
+        # TLS from socket 3: \x16\xda\xe2\xa2;\x0d\x0a
+
+        $c =~ s@(?<=TLS from socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^from socket/) {
+        # from socket 3: HEAD http://p.p/ HTTP/1.1\x0d\x0aHost: p.p\x0d\x0aUser-Agent: curl/7.85.0\x0d\x0aAccept: */*\x0d\x0aProxy-Connection: Keep-Alive\x0d\x0a\x0d\x0a
+
+        $c =~ s@(?<=from socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+    }
+
+    return $c;
+}
+
+sub handle_loglevel_writing($) {
+
+    my $c = shift;
+
+    if ($c =~ m/^to socket/) {
+        # to socket 11: HTTP/1.1 200 Connection established\x0d\x0a\x0d\x0a
+
+        $c =~ s@(?<=to socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+
+    } elsif ($c =~ m/^TLS on socket /) {
+        # TLS on socket 9: o~\xfcS[\xfa\x8f\xd6\x96\xe6_\xc7$\x1b[...]
+
+        $c =~ s@(?<=TLS on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@;
+    }
+
+    return $c;
+}
 
 sub handle_loglevel_ignore($) {
     return shift;
@@ -2206,17 +2324,25 @@ sub gather_loglevel_clf_stats($) {
 }
 
 sub gather_loglevel_request_stats($$) {
-    my $c = shift;
+    my $request_url = shift;
     my $thread = shift;
     our %stats;
+    our %cli_options;
 
     $stats{requests}++;
+    if ($cli_options{'passed-request-statistics-threshold'} != 0) {
+        # If the request get blocked we'll decrement
+        # in gather_loglevel_crunch_stats()
+        chomp $request_url;
+        $stats{'passed-request-url'}{$request_url}++;
+    }
 }
 
 sub gather_loglevel_crunch_stats($$) {
     my $c = shift;
     my $thread = shift;
     our %stats;
+    our %cli_options;
 
     $stats{crunches}++;
 
@@ -2236,6 +2362,15 @@ sub gather_loglevel_crunch_stats($$) {
         # Connection failure: http://127.0.0.1:8080/
         $stats{'connection-failure'}++;
     }
+    if ($cli_options{'passed-request-statistics-threshold'} != 0) {
+        $c =~ m/^[^:]+: (.*)/;
+        if ($stats{'passed-request-url'}{$1}) {
+            $stats{'passed-request-url'}{$1}-- ;
+            if ($stats{'passed-request-url'}{$1} == 0) {
+                delete($stats{'passed-request-url'}{$1});
+            }
+        }
+    }
 }
 
 
@@ -2477,6 +2612,19 @@ sub print_stats() {
         }
     }
 
+    if ($cli_options{'passed-request-statistics-threshold'} == 0) {
+        print "Passed request statistics are disabled. Increase --passed-request-statistics-threshold to enable them.\n";
+    } else {
+        print "Requested requests that were passed:\n";
+        foreach my $passed_url (sort {$stats{'passed-request-url'}{$b} <=> $stats{'passed-request-url'}{$a}}
+                                keys %{$stats{'passed-request-url'}}) {
+            if ($stats{'passed-request-url'}{$passed_url} < $cli_options{'passed-request-statistics-threshold'}) {
+                print "Skipped statistics for passed URLs below the treshold.\n";
+                last;
+            }
+            printf "%d : %s\n", $stats{'passed-request-url'}{$passed_url}, $passed_url;
+        }
+    }
     if ($cli_options{'host-statistics-threshold'} == 0) {
         print "Host statistics are disabled. Increase --host-statistics-threshold to enable them.\n";
     } else {
@@ -2583,8 +2731,8 @@ sub parse_loop() {
         'Force'             => \&handle_loglevel_force,
         'Error'             => \&handle_loglevel_error,
         'Fatal error'       => \&handle_loglevel_ignore,
-        'Writing'           => \&handle_loglevel_ignore,
-        'Received'          => \&handle_loglevel_ignore,
+        'Writing'           => \&handle_loglevel_writing,
+        'Received'          => \&handle_loglevel_received,
         'Tagging'           => \&handle_loglevel_tagging,
         'Actions'           => \&handle_loglevel_ignore,
         'Unknown log level' => \&handle_loglevel_ignore,
@@ -2710,7 +2858,7 @@ sub stats_loop() {
 
 }
 
-# Convert a timestamp like 18:07:28.733 into miliseconds
+# Convert a timestamp like 18:07:28.733 into milliseconds
 sub time_stamp_to_msecs($) {
     my $time_stamp = shift;
 
@@ -2729,24 +2877,27 @@ sub time_stamp_to_msecs($) {
 sub inactivity_detection_loop() {
 
     our %cli_options;
-    my ($time_stamp, $thread, $log_level, $content);
+    my ($date, $time_stamp, $thread, $log_level, $content);
     my ($msecs, $previous_msecs, $inactivity);
     my $inactivity_threshold = $cli_options{'inactivity-threshold'};
-    my $previous_message;
-    my $log_message_out_of_order = 0;
+    my $previous_date;
+    my $log_messages_out_of_order = 0;
 
     while (<>) {
-        (undef, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5);
+        ($date, $time_stamp, $thread, $log_level, $content) = split(/ /, $_, 5);
 
         next if (not defined($log_level));
         next if ($time_stamp eq "-");
         $msecs = time_stamp_to_msecs($time_stamp);
         unless (defined $msecs) {
-            print "Failed to convert $time_stamp into miliseconds\n";
+            print "Failed to convert $time_stamp into milliseconds\n";
+            print "$_";
             next;
         }
         unless (defined $previous_msecs) {
             $previous_msecs = $msecs;
+            $previous_date = $date;
+            print "$_";
             next;
         }
         $inactivity = $msecs - $previous_msecs;
@@ -2755,18 +2906,23 @@ sub inactivity_detection_loop() {
             # a Privoxy thread may be moved off schedule between
             # getting the timestamp for the log message and actually
             # writing it.
-            $log_message_out_of_order++;
+            $log_messages_out_of_order++;
         }
         if ($inactivity > $inactivity_threshold) {
-            #print "$previous_message";
-            print "Detected inactivity: $inactivity msecs\n";
+            if ($previous_date eq $date) {
+                print "Detected inactivity: $inactivity msecs\n";
+            } else {
+                # While we could include the date in the timestamp
+                # we currently don't.
+                print "Detected date change. Timestamp difference ignored.\n";
+            }
         }
         print "$_";
         $previous_msecs = $msecs;
-        $previous_message = $_;
+        $previous_date = $date;
     }
-    if ($log_message_out_of_order) {
-        print "At least $log_message_out_of_order messages were written out of the chronological order.\n";
+    if ($log_messages_out_of_order) {
+        print "At least $log_messages_out_of_order messages were written out of the chronological order.\n";
         print "This can result in false positives. Consider sorting the log first.\n";
     }
 }
@@ -2822,6 +2978,7 @@ sub get_cli_options() {
         'url-statistics-threshold' => CLI_OPTION_URL_STATISTICS_THRESHOLD,
         'unbreak-lines-only'       => CLI_OPTION_UNBREAK_LINES_ONLY,
         'host-statistics-threshold'=> CLI_OPTION_HOST_STATISTICS_THRESHOLD,
+        'passed-request-statistics-threshold' => CLI_OPTION_PASSED_REQUEST_STATISTICS_THRESHOLD,
         'show-complete-request-distribution' => CLI_OPTION_SHOW_COMPLETE_REQUEST_DISTRIBUTION,
     );
 
@@ -2841,6 +2998,7 @@ sub get_cli_options() {
         'unbreak-lines-only'       => \$cli_options{'unbreak-lines-only'},
         'url-statistics-threshold=i'=> \$cli_options{'url-statistics-threshold'},
         'host-statistics-threshold=i'=> \$cli_options{'host-statistics-threshold'},
+        'passed-request-statistics-threshold=i' => \$cli_options{'passed-request-statistics-threshold'},
         'show-complete-request-distribution' => \$cli_options{'show-complete-request-distribution'},
         'version'                  => sub { VersionMessage && exit(0) },
         'help'                     => \&help,
@@ -2875,6 +3033,7 @@ Options and their default values if they have any:
     [--statistics]
     [--unbreak-lines-only]
     [--url-statistics-threshold $cli_options{'url-statistics-threshold'}]
+    [--passed-request-statistics-threshold $cli_options{'passed-request-statistics-threshold'}]
     [--title $cli_options{'title'}]
     [--version]
 see "perldoc $0" for more information
@@ -2942,8 +3101,9 @@ will hide the "filter foo caused 0 hits" message.
 
 =head1 OPTIONS
 
-[B<--detect-inactivity>] Instead of syntax highlighting, detect inactivities
-of more than B<inactivity-threshold> miliseconds. Mainly useful for debugging.
+[B<--detect-inactivity>] Instead of syntax highlighting, detect periods
+of log inactivity of more than the amount of milliseconds specified with
+the B<--inactivity-threshold> option. Mainly useful for debugging.
 
 [B<--host-statistics-threshold>] Only show the request count for a host
 if it's above or equal to the given threshold. If the threshold is 0, host
@@ -2954,8 +3114,9 @@ omitted, ANSI escape sequences are used unless B<--no-syntax-highlighting> is ac
 This option is only intended to make embedding log excerpts in web pages easier.
 It does not escape any input!
 
-[B<--inactivity-threshold>] Specifies the number of miliseconds between log
-messages to consider inactivity when running in [B<--detect-inactivity>] mode.
+[B<--inactivity-threshold msecs>] Specifies the number of milliseconds between
+log messages to consider inactivity when running in [B<--detect-inactivity>]
+mode.
 
 [B<--keep-date>] Don't remove the date when printing highlighted log messages.
 Useful when parsing multiple log files at once.
@@ -2967,6 +3128,10 @@ the filtered output is piped into less in which case the ANSI control
 codes don't work, or if the terminal itself doesn't support the control
 codes.
 
+[B<--passed-request-statistics-threshold>] Only show the request count for
+a passed requests if it's above or equal to the given threshold. If the
+threshold is 0, passed request statistics are disabled.
+
 [B<--shorten-thread-ids>] Shorten the thread ids to a three-digit decimal number.
 Note that the mapping from thread ids to shortened ids is created at run-time
 and thus varies with the input.