#
# File : $Source: /cvsroot/ijbswa/current/default.filter,v $
#
-# $Id: default.filter,v 1.40 2007/03/20 15:40:00 fabiankeil Exp $
+# $Id: default.filter,v 1.47 2007/10/06 15:45:25 fabiankeil Exp $
#
# Purpose : Rules to process the content of web pages
#
#
# 'U' turns the default to ungreedy matching. Add ? to quantifiers to
# switch back to greedy.
+#
# 'T' (trivial) prevents parsing for backreferences in the substitute.
# Use if you want to include text like '$&' in your substitute without
# quoting.
+#
# 'D' (Dynamic) allows the use of variables. Supported variables are:
# $host, $origin (the IP address the request came from), $path and $url.
+#
# Note that '$' is a bad choice as delimiter for dynamic filters as you
# might end up with unintended variables if you use a variable name
-# directly after the delimiter.
+# directly after the delimiter. Variables will be resolved without
+# escaping anything, therefore you also have to be careful not to chose
+# delimiters that appear in the replacement text. For example '<' should
+# be save, while '?' will sooner or later cause conflicts with $url.
#
#################################################################################
FILTER: all-popups Kill all popups in JavaScript and HTML
s@((\W\s*)(?:map)?(window|this|parent)\.?)open\s*\\?\(@$1concat(@ig # JavaScript
-s/\starget\s*=\s*(['"]?)_?(blank|new)\1?/ notarget/ig # HTML
-
+#s/\starget\s*=\s*(['"]?)_?(blank|new)\1?/ notarget/ig # HTML
+s/\starget\s*=\s*(['"]?)_?(blank|new)\1?/ /ig # (X)HTML
##################################################################################
#
| advert # see dict.leo.org \
| atwola\.com/(?:link|redir) # see www.cnn.com \
| /jump/ # redirs for doublecklick.net ads \
-| tracker | counter # common \
+| counter # common \
+| (?<!&type=)tracker # (&type=tracker is used in sf's project statistics) \
| adlog\.pl # see sf.net \
)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*((?:width|height)\s*=\s*(['"]?)\d+?\6)[^>]*((?:width|height)\s*=\s*(['"]?)\d+?\8)[^>]*?(?=/?>)\
@<img $5 $7 src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed $4 by link to $2"@sigx
# Rare case w/o explicit dimensions:
#
-s@<a\s+href\s*=\s*(['"]?)([^>\1\s]*?(?:ad(?:click|vert)|atwola\.com/(?:link|redir)|doubleclick\.net/jump/|tracker|counter|adlog\.pl)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*?(?=/?>)@<img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed $4 by link to $2"@sig
+s@<a\s+href\s*=\s*(['"]?)([^>\1\s]*?(?:ad(?:click|vert)|atwola\.com/(?:link|redir)|doubleclick\.net/jump/|(?<!&type=)tracker|counter|adlog\.pl)[^>\1\s]*)\1[^>]*>\s*<img\s+(?:src\s*=\s*(['"]?)([^>\\\3\s]+)\3)?[^>]*?(?=/?>)@<img src="http://config.privoxy.org/send-banner?type=auto" border="0" title="Killed $4 by link to $2"@sig
################################################################################
# Pages are "blocked" based on keyword matching.
#
#################################################################################
-FILTER: crude-parental Crude parental filtering
+FILTER: crude-parental Crude parental filtering. Note that this filter doesn't work reliable.
# (Note: Middlesex, Sussex and Essex are counties in the UK, not rude words)
# (Note #2: Is 'sex' a rude word?!)
-s%^.*(?<!middle)(?<!sus)(?<!es)sex.*$%<html><head><title>Blocked</title></head><body><h3>Blocked due to possible adult content. Please see <a href="http://dmoz.org/Kids_and_Teens/">this site</a>.</h3></body></html>%is
+s%^.*(?<!middle)(?<!sus)(?<!es)sex.*$%<html><head><title>Blocked</title></head><body>\
+<h3>Blocked by Privoxy's crude-parental filter due to possible adult content.</h3></body></html>%is
s+^.*warez.*$+<html><head><title>No Warez</title></head><body><h3>You're not searching for illegal stuff, are you?</h3></body></html>+is
# Remove by description
s/^.*\
-(suck |lick |tounge |rub |fuck |fingering |finger |chicks? )?\
-(her |your |my |hard |with |big |wet |tight |pink |hot |moist |young |teen )+\
+(?:(suck|lick|tounge|rub|fuck|fingering|finger|chicks?)\s*)?\
+(?:(her|your|my|hard|with|big|wet|tight|pink|hot|moist|young|teen)\s*)+\
(dicks?|penis|cocks?|balls?|tits?|pussy|cunt|clit|ass|mouth).*$\
/This page has been blocked by Privoxy's crude-parental content filter\
-/is
+/is
#Remove by link text
s/^.*\
-(download|broadband|view|watch|free|get|extreem)? \
-(sex|xxx|porn|cumshot|fuck(ing|s)?|anal|ass|asian|adult|Amateur|org(y|ies)|close ups?|hand ? job|nail(ed)?)+ \
+(download|broadband|view|watch|free|get|extreem)?\s*\
+(sex|xxx|porn|cumshot|fuck(ing|s)?|anal|ass|asian|adult|Amateur|org(y|ies)|close ups?|hand?job|nail(ed)?)+\s*\
(movies?|pics?|videos?|dvds?|dvd's|links?).*$\
/This page has been blocked by Privoxy's crude-parental content filter\
-/is
+/is
#Remove by age disclaimer
s/^.*\
-(models?|chicks?|girls?|women|persons) \
+(models?|chicks?|girls?|women|persons)\s*\
(who|are|were)+ (over|at least) (16|18|21) years (old|of age).*$\
/This page has been blocked by Privoxy's crude-parental content filter\
-/is
+/is
#Remove by regulations
s/^.*(Section 2257|18 U.?S.?C.? 2257).*$\
s@(<div[^>]*) id=(["']?)ads_[^\2]*\2@$1 class="msn_ads"@Uig
s@(<a[^>]*href=\")http://g.msn.com/.*\?(http://.*)(&&DI=.*)(\")@$1$2$4@Ug
s@(<a[^>]*)gping=\".*\"@$1 title="URL cleaned up by Privoxy's msn filter"@Ug
+s@<div id=\"ar\">(<h2>Sponsored sites</h2>)@<div class="msn_ads">$1@
+s@(</script><div) id=\"at\"@$1 class="msn_ads"@
#################################################################################
#
#################################################################################
#
-# text-requests: Tags detected CSS requests as "TEXT-REQUEST". Whether
+# css-requests: Tags detected CSS requests as "CSS-REQUEST". Whether
# or not the detection actually works depends on the browser.
#
#################################################################################
-CLIENT-HEADER-TAGGER: text-requests Tags detected requests for text documents as "TEXT-REQUEST".
-s@Accept:\s*text/.*@TEXT-REQUEST@i
+CLIENT-HEADER-TAGGER: css-requests Tags detected CSS requests as "CSS-REQUEST".
+s@Accept:\s*text/css.*@CSS-REQUEST@i
+
+#################################################################################
+#
+# privoxy-control: The taggers create tags with the content of X-Privoxy-Control
+# headers, the filters remove said headers.
+#
+#################################################################################
+CLIENT-HEADER-TAGGER: privoxy-control Creates tags with he content\
+ of X-Privoxy-Control headers
+s@^X-Privoxy-Control:\s*@@i
+
+CLIENT-HEADER-FILTER: privoxy-control Removes X-Privoxy-Control headers
+s@^X-Privoxy-Control:.*@@i
+
+SERVER-HEADER-TAGGER: privoxy-control Creates tags with he content\
+ of X-Privoxy-Control headers
+s@^X-Privoxy-Control:\s*@@i
+
+SERVER-HEADER-FILTER: privoxy-control Removes X-Privoxy-Control headers
+s@^X-Privoxy-Control:.*@@i
+
##############################################################################
#
# Revisions :
# $Log: default.filter,v $
+# Revision 1.47 2007/10/06 15:45:25 fabiankeil
+# Let msn hide sponsored links in #at divs.
+#
+# Revision 1.46 2007/10/06 09:54:13 fabiankeil
+# - Let msn hide sponsored links in #ar divs.
+# - Teach banners-by-link not to block the graphs for sf's tracker statistics.
+#
+# Revision 1.45 2007/08/11 16:54:12 fabiankeil
+# - Complete the changes from r1.42.
+# - Make crude-parental less sensitive to the amount of white-space,
+# add the note that it doesn't work too well again and replace the
+# DMOZ link with a less confusing explanation.
+#
+# Revision 1.44 2007/07/18 11:06:56 hal9
+# Replace notarget with '' in all popups filter to keep from breaking xhmtl per
+# report from Siegfried Gipp.
+#
+# Revision 1.43 2007/06/01 14:17:04 fabiankeil
+# Mention possible delimiter conflicts with variables in dynamic pcrs commands.
+#
+# Revision 1.42 2007/05/17 15:55:36 fabiankeil
+# Undo an improperly tested last-minute change
+# and turn "text-requests" back into "css-requests".
+#
+# Revision 1.41 2007/05/17 15:45:41 fabiankeil
+# - Mention new filter types and the 'D' option.
+# - Header filters are now case-insensitive and accept a
+# varying amount of whitespace after the colon.
+# - Add another selector for yahoo ads.
+# - New server-header filter: less-download-windows
+# - New client-header taggers: text-requests and image-requests.
+#
# Revision 1.40 2007/03/20 15:40:00 fabiankeil
# Adjust to new world order with dedicated header-filter actions.
#