X-Git-Url: http://www.privoxy.org/gitweb/?p=privoxy.git;a=blobdiff_plain;f=tools%2Fprivoxy-log-parser.pl;h=c0b7790557d07ca047b7a2dbd42357796fe53f5d;hp=d9ffb580556ae123fe1966c571ff763a301032b2;hb=a5b4d31ab5ad2ed24cdb53ffa92679411b4176b0;hpb=6e7644a6d52dc8a9b86f4bd398366f990baa3753 diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl index d9ffb580..c0b77905 100755 --- a/tools/privoxy-log-parser.pl +++ b/tools/privoxy-log-parser.pl @@ -23,7 +23,7 @@ # hash key as input. # - Add --compress and --decompress options. # -# Copyright (c) 2007-2020 Fabian Keil +# Copyright (c) 2007-2021 Fabian Keil # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -179,6 +179,7 @@ sub prepare_our_stuff() { 'pcrs-delimiter' => 'light_red', 'ignored' => 'light_red', 'action-bits-update' => 'light_red', + 'http-downgrade' => 'light_red', 'configuration-line' => 'red', 'content-type' => 'yellow', 'HOST' => HEADER_DEFAULT_COLOUR, @@ -1228,6 +1229,26 @@ sub handle_loglevel_re_filter($) { return $content; } +sub handle_loglevel_tagging($) { + + my $c = shift; + + if ($c =~ /^Tagger \'([^\']*)\' added tag \'([^\']*)\'/ or + $c =~ m/^Adding tag \'([^\']*)\' created by header tagger \'([^\']*)\'/) { + + # Adding tag 'GET request' created by header tagger 'method-man' (XXX: no longer used) + # Tagger 'revalidation' added tag 'REVALIDATION-REQUEST'. No action bit update necessary. + # Tagger 'revalidation' added tag 'REVALIDATION-REQUEST'. Action bits updated accordingly. + + # XXX: Save tag and tagger + + $c =~ s@(?<=^Tagger \')([^\']*)@$h{'tagger'}$1$h{'Standard'}@; + $c =~ s@(?<=added tag \')([^\']*)@$h{'tag'}$1$h{'Standard'}@; + $c =~ s@(?<=Action bits )(updated)@$h{'action-bits-update'}$1$h{'Standard'}@; + } + return $c; +} + sub handle_loglevel_redirect($) { my $c = shift; @@ -1285,6 +1306,17 @@ sub handle_loglevel_redirect($) { # Percent-encoding redirect URL: http://www.example.org/\x02 $c = highlight_matched_url($c, '(?<=redirect URL: ).*'); + } elsif ($c =~ m/^Rewrite detected:/) { + + # Rewrite detected: GET http://10.0.0.2:88/blah.txt HTTP/1.1 + # Rewrite detected: GET https://www.electrobsd.org/CommonJS/ajax/libs/jquery/3.4.1/jquery.min.js HTTP/1.1 + $c = highlight_matched_request_line($c, '(?<=^Rewrite detected: ).*'); + + } elsif ($c =~ m/^Rewritten request line results in downgrade to http/) { + + # Rewritten request line results in downgrade to http + $c =~ s@(downgrade)@$h{'http-downgrade'}$1$h{'Standard'}@; + } else { found_unknown_content($c); @@ -1390,6 +1422,11 @@ sub handle_loglevel_crunch($) { # [...]&filter... [too long, truncated] $content = highlight_matched_pattern($content, 'request_', '^.*(?=\.\.\. \[too long, truncated\]$)'); + } elsif ($content =~ m/Certificate error:/) { + + # Certificate error: ASN date error, current date after: https://expired.badssl.com/ + $content = highlight_matched_pattern($content, 'request_', 'https://.*'); + } else { # Blocked: http://ads.example.org/ @@ -1792,6 +1829,16 @@ sub handle_loglevel_connect($) { $c =~ s@(?<=client socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; $c =~ s@(?<=server socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^The last \d+ bytes of the request body have been read/) { + + # The last 12078 bytes of the request body have been read + $c =~ s@(?<=The last )(\d+)@$h{'Number'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Flushed \d+ bytes of request body/) { + + # Flushed 3153 bytes of request body + $c =~ s@(?<=Flushed )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } elsif ($c =~ m/^Looks like we / or $c =~ m/^Unsetting keep-alive flag/ or $c =~ m/^No connections to wait/ or @@ -2025,6 +2072,12 @@ sub handle_loglevel_error($) { # Didn't receive data in time: a.fsdn.com:443 $c =~ s@(?<=in time: )(.*)@$h{'destination'}$1$h{'Standard'}@; + + } elsif ($c =~ m/^Sending data on socket \d+ over TLS/) { + + # Sending data on socket 33 over TLS/SSL failed: no TLS/SSL errors detected + $c =~ s@(?<=on socket )(\d+)@$h{'Number'}$1$h{'Standard'}@; + } # XXX: There are probably more messages that deserve highlighting. @@ -2058,7 +2111,12 @@ sub gather_loglevel_clf_stats($) { unless (defined $method) { # +0200] "Invalid request" 400 0 return if ($content =~ m/^[+-]\d{4}\] "Invalid request"/); - print("Failed to parse: $content\n"); + # +0100] "GET https://securepubads.g.doubleclick.net/gampad/ads?gd[...]... [too long, truncated] + if ($content =~ m/\[too long, truncated\]$/) { + print("Skipped LOG_LEVEL_CLF message that got truncated by Privoxy. Statistics will be inprecise.\n"); + } else { + print("Failed to parse: $content\n"); + } return; } $stats{'method'}{$method}++; @@ -2455,7 +2513,7 @@ sub parse_loop() { 'Fatal error' => \&handle_loglevel_ignore, 'Writing' => \&handle_loglevel_ignore, 'Received' => \&handle_loglevel_ignore, - 'Tagging' => \&handle_loglevel_ignore, + 'Tagging' => \&handle_loglevel_tagging, 'Actions' => \&handle_loglevel_ignore, 'Unknown log level' => \&handle_loglevel_ignore, );