From 974846c1439708803fe1aa35b18d3efad5b81a3f Mon Sep 17 00:00:00 2001
From: Fabian Keil <fk@fabiankeil.de>
Date: Tue, 9 Jun 2020 09:26:38 +0200
Subject: [PATCH] privoxy-log-parser.pl: Make gather_loglevel_clf_stats() more
 tolerant

While at it, count all CLF messages as requests,
even if the request is invalid.
---
 tools/privoxy-log-parser.pl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/privoxy-log-parser.pl b/tools/privoxy-log-parser.pl
index bc1ab04b..fb220980 100755
--- a/tools/privoxy-log-parser.pl
+++ b/tools/privoxy-log-parser.pl
@@ -2017,14 +2017,19 @@ sub gather_loglevel_clf_stats($) {
     our %cli_options;
 
     # +0200] "GET https://www.youtube.com/watch?v=JmcA9LIIXWw HTTP/1.1" 200 68004
-    $content =~ m/^[+-]\d{4}\] "(\w+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/;
+    # +0200] "VERSION-CONTROL http://p.p/ HTTP/1.1" 200 2787
+    $content =~ m/^[+-]\d{4}\] "([^ ]+) (.+) (HTTP\/\d\.\d)" (\d+) (\d+)/;
     $method       = $1;
     $resource     = $2;
     $http_version = $3;
     $status_code  = $4;
     $size         = $5;
 
+    $stats{requests_clf}++;
+
     unless (defined $method) {
+        # +0200] "Invalid request" 400 0
+        return if ($content =~ m/^[+-]\d{4}\] "Invalid request"/);
         print("Failed to parse: $content\n");
         return;
     }
@@ -2040,7 +2045,6 @@ sub gather_loglevel_clf_stats($) {
     }
     $stats{'content-size-total'} += $size;
     $stats{'status-code'}{$status_code}++;
-    $stats{requests_clf}++;
 }
 
 sub gather_loglevel_request_stats($$) {
-- 
2.49.0