#
# File : $Source: /cvsroot/ijbswa/current/default.filter,v $
#
-# $Id: default.filter,v 1.34 2006/12/12 17:32:23 fabiankeil Exp $
+# $Id: default.filter,v 1.43 2007/06/01 14:17:04 fabiankeil Exp $
#
# Purpose : Rules to process the content of web pages
#
-# Copyright : Written by and Copyright (C) 2001 - 2006 the
+# Copyright : Written by and Copyright (C) 2001 - 2007 the
# Privoxy team. http://www.privoxy.org/
#
# We value your feedback. However, to provide you with the best support,
#
# Syntax:
#
-# Filters start with a line "FILTER: name description". They are then referrable
-# from the actionsfile with +filter{name}
+# Generally filters start with a line like "FILTER: name description".
+# They are then referrable from the actionsfile with +filter{name}
+#
+# FILTER marks a filter as content filter, other filter
+# types are CLIENT-HEADER-FILTER, CLIENT-HEADER-TAGGER,
+# SERVER-HEADER-FILTER and SERVER-HEADER-TAGGER.
#
# Inside the filters, write one Perl-Style substitution (job) per line.
# Jobs that precede the first FILTER: line are ignored.
# For Details see the pcrs manpage contained in this distribution.
# (and the perlre, perlop and pcre manpages)
#
-# Note that you are free to choose the delimter as you see fit.
+# Note that you are free to choose the delimiter as you see fit.
#
# Note2: In addidion to the Perl options gimsx, the following nonstandard
# options are supported:
#
# 'U' turns the default to ungreedy matching. Add ? to quantifiers to
# switch back to greedy.
+#
# 'T' (trivial) prevents parsing for backreferences in the substitute.
# Use if you want to include text like '$&' in your substitute without
# quoting.
+#
+# 'D' (Dynamic) allows the use of variables. Supported variables are:
+# $host, $origin (the IP address the request came from), $path and $url.
+#
+# Note that '$' is a bad choice as delimiter for dynamic filters as you
+# might end up with unintended variables if you use a variable name
+# directly after the delimiter. Variables will be resolved without
+# escaping anything, therefore you also have to be careful not to chose
+# delimiters that appear in the replacement text. For example '<' should
+# be save, while '?' will sooner or later cause conflicts with $url.
#
#################################################################################
s|(?:\w+\.)+referrer|"Not Your Business!"|gisU
# The status bar is for displaying link targets, not pointless blahblah
-#
-s/(\W\s*)((this|window)\.(default)?status)\s*=\s*((['"]).*?(?<!\\)\6)/$1if(typeof(this.href) != 'undefined') $2 = $5 + ' URL: ' + this.href;else return false/ig
+#
+s@([\W]\s*)((?:this|window)\.(?:default)?status)\s*=\s*((['"]).*?\4)@$1$2 =\
+ (typeof(this.href) != 'undefined')?($3 + ' URL: ' + this.href):($2)@ig
+
s/(?:(?:this|window)\.(?:default)?status)\s*=\s*\w*\s*;//ig
# Kill OnUnload popups. Yummy.
FILTER: unsolicited-popups Disable only unsolicited pop-up windows
s+([^'"]\s*<head.*>)(?=\s*[^'"])+$1<script>function PrivoxyWindowOpen(){return(null);}</script>+isU
-s+([^\w\s.]\s*)((window|this|parent)\.)?open\s*\(+$1PrivoxyWindowOpen(+ig
+s@([^\w\s.]\s*)((?:map)?(window|this|parent)\.?)?open\s*\(@$1PrivoxyWindowOpen(@ig
s+([^'"]\s*</html>)(?!\s*(\\n|'|"))+$1<script>function PrivoxyWindowOpen(a, b, c){return(window.open(a, b, c));}</script>+iU
#################################################################################
FILTER: all-popups Kill all popups in JavaScript and HTML
-s/((\W\s*)(window|this|parent)\.)open\s*\\?\(/$1concat(/ig # JavaScript
-s/\starget\s*=\s*(['"]?)_?(blank|new)\1?/ notarget/ig # HTML
-
+s@((\W\s*)(?:map)?(window|this|parent)\.?)open\s*\\?\(@$1concat(@ig # JavaScript
+#s/\starget\s*=\s*(['"]?)_?(blank|new)\1?/ notarget/ig # HTML
+s/\starget\s*=\s*(['"]?)_?(blank|new)\1?/ /ig # (X)HTML
##################################################################################
#
FILTER: banners-by-size Kill banners by size
# 88*31
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)88\4)[^>]*?(height=(['"]?)31\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)88\4)[^>]*?(height=(['"]?)31\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 120*60, 120*90, 120*240, 120*600
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)120\4)[^>]*?(height=(['"]?)(?:600?|90|240)\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)120\4)[^>]*?(height=(['"]?)(?:600?|90|240)\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 125*125
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)125\4)[^>]*?(height=(['"]?)125\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)125\4)[^>]*?(height=(['"]?)125\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 160*600
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)160\4)[^>]*?(height=(['"]?)600\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)160\4)[^>]*?(height=(['"]?)600\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 180*150
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)180\4)[^>]*?(height=(['"]?)150\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)180\4)[^>]*?(height=(['"]?)150\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 234*60, 468*60 (Most Banners!)
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)(?:234|468)\4)[^>]*?(height=(['"]?)60\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)(?:234|468)\4)[^>]*?(height=(['"]?)60\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 240*400
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)240\4)[^>]*?(height=(['"]?)400\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)240\4)[^>]*?(height=(['"]?)400\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 250*250, 300*250
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)(?:250|300)\4)[^>]*?(height=(['"]?)250\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)(?:250|300)\4)[^>]*?(height=(['"]?)250\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# 336*280
-s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)336\4)[^>]*?(height=(['"]?)280\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)336\4)[^>]*?(height=(['"]?)280\6)[^>]*?(?=/?>)@\
+ <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
# Note: 200*50 was also proposed, but it probably causes too much collateral damage:
#
-#s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)200\4)[^>]*?(height=(['"]?)50\6)[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed-$2-by-size$1 $3 $5>@sig
+#s@<img\s+(?:src\s*=\s*(['"]?)([^>\\\1\s]+)\1)?[^>]*?(width=(['"]?)200\4)[^>]*?(height=(['"]?)50\6)[^>]*?(?=/?>)@\
+# <img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed-$2-by-size" $3 $5@sig
#################################################################################
#
s@<a\s+href\s*=\s*(['"]?)([^>\1\s]*?(?:\
adclick # See www.dn.se \
+| advert # see dict.leo.org \
| atwola\.com/(?:link|redir) # see www.cnn.com \
| /jump/ # redirs for doublecklick.net ads \
| tracker | counter # common \
| adlog\.pl # see sf.net \
-)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*((?:width|height)\s*=\s*(['"]?)\d+?\6)[^>]*((?:width|height)\s*=\s*(['"]?)\d+?\8)[^>]*>\
-@<img $5 $7 src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed $4 by link to $2$1>@sigx
+)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*((?:width|height)\s*=\s*(['"]?)\d+?\6)[^>]*((?:width|height)\s*=\s*(['"]?)\d+?\8)[^>]*?(?=/?>)\
+@<img $5 $7 src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed $4 by link to $2"@sigx
# Rare case w/o explicit dimensions:
#
-s@<a\s+href\s*=\s*(['"]?)([^>\1\s]*?(?:adclick|atwola\.com/(?:link|redir)|doubleclick\.net/jump/|tracker|counter|adlog\.pl)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*>@<img src=$1http://config.privoxy.org/send-banner?type=auto$1 border="0" title=$1Killed $4 by link to $2$1>@sig
+s@<a\s+href\s*=\s*(['"]?)([^>\1\s]*?(?:ad(?:click|vert)|atwola\.com/(?:link|redir)|doubleclick\.net/jump/|tracker|counter|adlog\.pl)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*?(?=/?>)@<img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed $4 by link to $2"@sig
################################################################################
#################################################################################
#
-# crude-parental: Crude parental filtering? (Use along with a suitable blocklist).
-# Shows how to deny access to whole page based on a keyword.
+# crude-parental: Crude parental filtering. Use with a suitable blocklist.
+# Pages are "blocked" based on keyword matching.
#
#################################################################################
-FILTER: crude-parental Crude parental filtering (demo only)
+FILTER: crude-parental Crude parental filtering
# (Note: Middlesex, Sussex and Essex are counties in the UK, not rude words)
# (Note #2: Is 'sex' a rude word?!)
s%^.*(?<!middle)(?<!sus)(?<!es)sex.*$%<html><head><title>Blocked</title></head><body><h3>Blocked due to possible adult content. Please see <a href="http://dmoz.org/Kids_and_Teens/">this site</a>.</h3></body></html>%is
+
s+^.*warez.*$+<html><head><title>No Warez</title></head><body><h3>You're not searching for illegal stuff, are you?</h3></body></html>+is
+# Remove by description
+s/^.*\
+(suck |lick |tounge |rub |fuck |fingering |finger |chicks? )?\
+(her |your |my |hard |with |big |wet |tight |pink |hot |moist |young |teen )+\
+(dicks?|penis|cocks?|balls?|tits?|pussy|cunt|clit|ass|mouth).*$\
+/This page has been blocked by Privoxy's crude-parental content filter\
+/is
+
+#Remove by link text
+s/^.*\
+(download|broadband|view|watch|free|get|extreem)? \
+(sex|xxx|porn|cumshot|fuck(ing|s)?|anal|ass|asian|adult|Amateur|org(y|ies)|close ups?|hand ? job|nail(ed)?)+ \
+(movies?|pics?|videos?|dvds?|dvd's|links?).*$\
+/This page has been blocked by Privoxy's crude-parental content filter\
+/is
+
+#Remove by age disclaimer
+s/^.*\
+(models?|chicks?|girls?|women|persons) \
+(who|are|were)+ (over|at least) (16|18|21) years (old|of age).*$\
+/This page has been blocked by Privoxy's crude-parental content filter\
+/is
+
+#Remove by regulations
+s/^.*(Section 2257|18 U.?S.?C.? 2257).*$\
+/This page has been blocked by Privoxy's crude-parental content filter\
+/is
+
#################################################################################
#
s@</head>@\n<style type="text/css">\n\
/* Style sheet inserted by Privoxy's yahoo filter. */\n\
\#symadbn, \#ymadbn, .yschbox, \#yschsec, .yschhd, \#yschanswr, .yschftad,\
- .yschspn, .yschspns, \#ygrp-sponsored-links {display: none !important;}\n\
+ .yschspn, .yschspns, \#ygrp-sponsored-links, \#ks-ypn-ads {display: none !important;}\n\
\#yschpri, \#yschweb {width: 100% !important; max-width: 100% !important;}\n\
\#yschqcon, \#yschtg {width: auto !important; /* No useless horizontal scrollbar please */}\n\
</style>\n$0@
#################################################################################
#
-# x-httpd-php-to-html: Header filter to change the Content-Type from
+# x-httpd-php-to-html: Changes the Content-Type header from
# x-httpd-php to html. "Content-Type: x-httpd-php"
# is set by clueless PHP users and causes many
# browsers do open a download menu instead of
# rendering the page.
#
#################################################################################
-FILTER: x-httpd-php-to-html Header filter to change the Content-Type from x-httpd-php to html.
-s@^(Content-Type:) application/x-httpd-php@$1 text/html@
+SERVER-HEADER-FILTER: x-httpd-php-to-html Changes the Content-Type header from x-httpd-php to html.
+s@^(Content-Type:)\s*application/x-httpd-php@$1 text/html@i
#################################################################################
#
-# html-to-xml: Header filter to change the Content-Type from html to xml.
+# html-to-xml: Changes the Content-Type header from html to xml.
#
#################################################################################
-FILTER: html-to-xml Header filter to change the Content-Type from html to xml.
-s@^(Content-Type:) text/html(;.*)?$@$1 application/xhtml+xml$2@
+SERVER-HEADER-FILTER: html-to-xml Changes the Content-Type header from html to xml.
+s@^(Content-Type:)\s*text/html(;.*)?$@$1 application/xhtml+xml$2@i
#################################################################################
#
-# xml-to-html: Header filter to change the Content-Type from xml to html.
+# xml-to-html: Changes the Content-Type header from xml to html.
#
#################################################################################
-FILTER: xml-to-html Header filter to change the Content-Type from xml to html.
-s@^(Content-Type:) (?:application|text)/(?:xhtml\+)?xml(;.*)?$@$1 text/html$2@
+SERVER-HEADER-FILTER: xml-to-html Changes the Content-Type header from xml to html.
+s@^(Content-Type:)\s*(?:application|text)/(?:xhtml\+)?xml(;.*)?$@$1 text/html$2@i
#################################################################################
#
-# hide-tor-exit-notation: Header filter to remove the Tor exit node notation
-# in Host and Referer headers.
+# hide-tor-exit-notation: Remove the Tor exit node notation in Host and Referer headers.
#
# Note: If Privoxy and Tor are chained and Privoxy is configured to
# use socks4a, one can use http://www.example.org.foobar.exit/
# coming from.
#
#################################################################################
-FILTER: hide-tor-exit-notation Header filter to remove the Tor exit node notation in Host and Referer headers.
+CLIENT-HEADER-FILTER: hide-tor-exit-notation Removes the Tor exit node notation in Host and Referer headers.
s@^((?:Referer|Host):\s*(?:https?://)?[^/]*)\.[^\./]*?\.exit@$1@i
+#################################################################################
+#
+# less-download-windows: Prevents annoying download windows for content types
+# the browser can handle itself.
+#
+#################################################################################
+SERVER-HEADER-FILTER: less-download-windows Prevent annoying download windows for content types the browser can handle itself
+s@^Content-Disposition:.*filename=(["']?).*\.(png|gif|jpe?g|diff?|d?patch|c|h|pl|shar)\1.*$@@i
+s@^(Content-Type:)\s*(?:message/(?:news|rfc822)|text/x-.*|application/x-sh)\s*@$1 text/plain@i
+
+#################################################################################
+#
+# image-requests: Tags detected image requests as "IMAGE-REQUEST". Whether
+# or not the detection actually works depends on the browser.
+#
+#################################################################################
+CLIENT-HEADER-TAGGER: image-requests Tags detected image requests as "IMAGE-REQUEST".
+s@Accept:\s*image/.*@IMAGE-REQUEST@i
+
+#################################################################################
+#
+# css-requests: Tags detected CSS requests as "CSS-REQUEST". Whether
+# or not the detection actually works depends on the browser.
+#
+#################################################################################
+CLIENT-HEADER-TAGGER: text-requests Tags detected CSS requests as "CSS-REQUEST".
+s@Accept:\s*text/css.*@CSS-REQUEST@i
##############################################################################
#
# Revisions :
# $Log: default.filter,v $
+# Revision 1.43 2007/06/01 14:17:04 fabiankeil
+# Mention possible delimiter conflicts with variables in dynamic pcrs commands.
+#
+# Revision 1.42 2007/05/17 15:55:36 fabiankeil
+# Undo an improperly tested last-minute change
+# and turn "text-requests" back into "css-requests".
+#
+# Revision 1.41 2007/05/17 15:45:41 fabiankeil
+# - Mention new filter types and the 'D' option.
+# - Header filters are now case-insensitive and accept a
+# varying amount of whitespace after the colon.
+# - Add another selector for yahoo ads.
+# - New server-header filter: less-download-windows
+# - New client-header taggers: text-requests and image-requests.
+#
+# Revision 1.40 2007/03/20 15:40:00 fabiankeil
+# Adjust to new world order with dedicated header-filter actions.
+#
+# Revision 1.39 2007/02/21 14:10:23 fabiankeil
+# - Fix a js-annoyances pcrs command that broke
+# evaluated code. (BR #1124071, thanks to Bor Gergely)
+# - Have unsolicited-popups and all-popups catch the
+# wheather.com popup reported in in AF #1640173.
+#
+# Revision 1.38 2007/02/19 11:22:48 hal9
+# Adding back the orginal filter content to offset problems found by Fabian.
+#
+# Revision 1.37 2007/02/17 13:29:44 hal9
+# Updates to the crude parental filter per Feature Requests item #1648657.
+#
+# Revision 1.36 2007/02/05 16:47:31 fabiankeil
+# - Let banners-by-link look for "advert".
+# - Fix XML systax problems with banners-by-link
+# and banners-by-size (AF#1651570).
+#
+# Revision 1.35 2006/12/21 12:28:12 fabiankeil
+# Escaping special characters in filter descriptions is no
+# longer necessary, it's done by Privoxy now.
+#
# Revision 1.34 2006/12/12 17:32:23 fabiankeil
# Added id mbEnd to google filter, it's now and then
# used for the sponsored links.