#
# File : $Source: /cvsroot/ijbswa/current/default.filter,v $
#
-# $Id: default.filter,v 1.24 2006/10/06 11:25:31 fabiankeil Exp $
+# $Id: default.filter,v 1.32 2006/11/10 18:04:04 fabiankeil Exp $
#
# Purpose : Rules to process the content of web pages
#
# The status bar is for displaying link targets, not pointless blahblah
#
-s/(\W\s*)((this|window)\.(default)?status)\s*=\s*((['"]).*?\6)/$1if(typeof(this.href) != 'undefined') $2 = $5 + ' URL: ' + this.href;else return false/ig
+s/(\W\s*)((this|window)\.(default)?status)\s*=\s*((['"]).*?(?<!\\)\6)/$1if(typeof(this.href) != 'undefined') $2 = $5 + ' URL: ' + this.href;else return false/ig
# Kill OnUnload popups. Yummy.
# Test: http://www.zdnet.com/zdsubs/yahoo/tree/yfs.html
# This makes banners-by-size more effective and allows both banners-by-size
# and banners-by-link to preserve the original image URL in the title attribute.
-s|<img\s+?([^>]*) src\s*=\s*(['"])([^>\\\2]+)\2|<img src=$2$3$2$1|siUg
+s|<img\s+?([^>]*) src\s*=\s*(['"])([^>\\\2]+)\2|<img src=$2$3$2 $1|siUg
s|<img\s+?([^>]*) src\s*=\s*([^'">\\\s]+)|<img src=$2 $1|sig
-s|<img (src=(?:(['"])[^>\\\\2]*\2\|[^'">\\\s]+?))([^>]*)\s*width\s*=\s*(["']?)(\d+?)\4|<img $1 width=$4$5$4$3|siUg
+s|<img (src=(?:(['"])[^>\\\\2]*\2\|[^'">\\\s]+?))([^>]*)\s+width\s*=\s*(["']?)(\d+?)\4|<img $1 width=$4$5$4$3|siUg
#################################################################################
#################################################################################
FILTER: webbugs Squish WebBugs (1x1 invisible GIFs used for user tracking)
-s/<img\s+[^>]*(?:width|height)\s*=\s*['"]?[01](?=\D)[^>]*(?:width|height)\s*=\s*['"]?[01](?=\D)[^>]*?>//siUg
+s@<img[^>]*\s(?:width|height)\s*=\s*['"]?[01](?=\D)[^>]*\s(?:width|height)\s*=\s*['"]?[01](?=\D)[^>]*?>@@siUg
#################################################################################
#################################################################################
FILTER: jumping-windows Prevent windows from resizing and moving themselves
-s/(?:window|this|self)\.(?:move|resize)(?:to|by)\(/''.concat(/ig
-
+s/(?<=[\W])(?:window|this|self)\.(?:move|resize)(?:to|by)\(/''.concat(/ig
#################################################################################
#
# SCNR
#
-s/microsoft(?!.com)/MicroSuck/ig
+s/microsoft(?!.[^\s])/MicroSuck/ig
# Buzzword Bingo (example for extended regex syntax)
#
s|(<INPUT name="\d{2,4}" type="RADIO" value="0") CHECKED |$1|g
s|<INPUT name="\d{2,4}" type="RADIO" value="3" |$0 checked|g
+#################################################################################
+#
+# no-ping: Removes non-standard ping attributes in <a> and <area> tags.
+#
+#################################################################################
+FILTER: no-ping Removes non-standard ping attributes in <a> and <area> tags.
+s@(<a(?:rea)?[^>]*?)\sping=(['"]?)([^"'>]+)\2([>\s]?)@\
+<strong style="color:white; background-color:red;" title="Privoxy removed ping target '$3'">PING!</strong>\n$1$4@ig
+
#################################################################################
#
# google: CSS-based block for Google text ads. Also removes
s@</head>@<style type="text/css">\n\
/* Style sheet inserted by Privoxy's google filter. */\n\
\#fbc, \#fbl, \#ra, .rhh {visibility: hidden !important;}\n\
- \#tpa1,\#tpa2,\#tpa3,\#tpa4,\#tpa5,\#tpa5, \#spl, .ch, \#ads, \#toolbar, \#google_ads_frame{display: none !important;}\n\
+ \#tpa1,\#tpa2,\#tpa3,\#tpa4,\#tpa5,\#tpa5, \#spl, .ch, \#ads,\
+ \#toolbar, \#google_ads_frame{display: none !important;}\n\
.main_body, .j {width: 100%}\n</style>\n$0@
s@<div style=\"padding-top:11px;min-width:500px\">@<div id="main_body">@
s@(<table cellspacing=0 cellpadding=0 width=25% align=right bgcolor=\#ffffff border=0\
|</font></td></tr></tbody></table><table align=\"right\" bgcolor=\"\#ffffff\"\
-|<table cellspacing=0 cellpadding=0 align=right bgcolor=\#ffffff border=0)@$0 id="ads"@
+|<table cellspacing=0 cellpadding=0 align=right bgcolor=\#ffffff border=0\
+|<table style=\"clear:both\" align=right width=25% cellspacing=\"0\" cellpadding=\"0\"\
+ border=\"0\" bgcolor=\"\#ffffff\")@$0 id="ads"@
s@(<br clear=all><table)( border=0 cellpadding=9><tr><td)@$1 id="toolbar"$2@
#################################################################################
#
#################################################################################
FILTER: yahoo CSS-based block for Yahoo text ads. Also removes a width limitation.
-s@</head>@<style type="text/css">\n\
+s@</head>@\n<style type="text/css">\n\
/* Style sheet inserted by Privoxy's yahoo filter. */\n\
- \#symadbn, \#ymadbn, \#yschsec, \#yschanswr, .yschftad,\
- .yschspn, .yschspns {display: none !important;}\n\
- \#yschpri {width: 100% }\n</style>\n$0@
+ \#symadbn, \#ymadbn, .yschbox, \#yschsec, .yschhd, \#yschanswr, .yschftad,\
+ .yschspn, .yschspns, \#ygrp-sponsored-links {display: none !important;}\n\
+ \#yschpri, \#yschweb {width: 100% !important; max-width: 100% !important;}\n\
+ \#yschqcon, \#yschtg {width: auto !important; /* No useless horizontal scrollbar please */}\n\
+</style>\n$0@
#################################################################################
#
s@</head>@<style type="text/css">\n\
/* Style sheet inserted by Privoxy's msn filter. */\n\
.msn_ads {display: none !important;}\n\
- \#results, .flank, .SearchSection .not\
- {width: 100% !important; min-width: 100% !important;\
- max-width: 100% !important;}\n</style>\n$0@
+ \#results, .flank, .results_area_flank, .results_area_stroke, .SearchSection .not\
+ {width: 99% /*!important*/; min-width: 99% !important;\
+ max-width: 100% !important; /* width:100% sometimes causes horizontal scrollbars */}\n\
+ /* Make continue links harder to miss */\n\
+ \#pagination_bottom a {padding: .3em .5em .3em .5em; border: 1px solid \#e6e6e6;}\n\
+ \#pagination_bottom li, \#pagination_bottom li .selected, li .nextPage \
+ {margin: 0 !important; cursor: auto; border: none; padding:.1em;}\n\
+ \#pagination_bottom li .prevPage {padding-right: 1.5em !important;}\n\
+ \#pagination_bottom li .selected {border: none;}\n\
+ .selected a {background-color: \#d2eaf6; border: 1px solid \#b7d8ee;}\n\
+ /* Remove "suggestions". They are next to worthless but partly overlap with the search results */\n\
+ .suggestion, \#nys_right {clear: both; display:none;}\n\
+ </style>\n$0@
s@(<div[^>]*) id=(["']?)ads_[^\2]*\2@$1 class="msn_ads"@Uig
s@(<a[^>]*href=\")http://g.msn.com/.*\?(http://.*)(&&DI=.*)(\")@$1$2$4@Ug
s@(<a[^>]*)gping=\".*\"@$1 title="URL cleaned up by Privoxy's msn filter"@Ug
\#sidebar {width: 29% }\n\
.post-body {overflow: auto;}\n\
.blogComments {width: 100%; overflow: auto;}\n</style>\n$0@
-s@<body.*(<div id="(content|wrap4|wrapper))@<body><!-- Privoxy's\
- blogspot filter ditched some garbage here -->$1@Us
+s@<body.*(?:<div id="space-for-ie"></div>|(<div id="(?:content|wrap4|wrapper)))@<body>\
+ <!-- Privoxy's blogspot filter ditched some garbage here -->$1@Us
s@(<div style=\"[^\"]*width:)30em@$1 100%@
s@background:url\(\"http://www.blogblog.com/rounders[^\"]*\"\).*;@/*$0*/@Ug
s@(background:\#[a-f\d]{3})( url\(\"http://www.blogblog.com/rounders[^\"]*\"\).*;)@$1 ;/*$2*/@Ug
+#################################################################################
+#
+# x-httpd-php-to-html: Header filter to change the Content-Type from
+# x-httpd-php to html. "Content-Type: x-httpd-php"
+# is set by clueless PHP users and causes many
+# browsers do open a download menu instead of
+# rendering the page.
+#
+#################################################################################
+FILTER: x-httpd-php-to-html Header filter to change the Content-Type from x-httpd-php to html.
+s@^(Content-Type:) application/x-httpd-php@$1 text/html@
+
#################################################################################
#
# html-to-xml: Header filter to change the Content-Type from html to xml.
FILTER: xml-to-html Header filter to change the Content-Type from xml to html.
s@^(Content-Type:) (?:application|text)/(?:xhtml\+)?xml(;.*)?$@$1 text/html$2@
+#################################################################################
+#
+# hide-tor-exit-notation: Header filter to remove the Tor exit node notation
+# in Host and Referer headers.
+#
+# Note: If Privoxy and Tor are chained and Privoxy is configured to
+# use socks4a, one can use http://www.example.org.foobar.exit/
+# to access the host www.example.org through Tor exit node foobar.
+#
+# As the HTTP client isn't aware of this notation, it treats the
+# whole string "www.example.org.foobar.exit" as host and uses it
+# for the "Host" and "Referer" headers. From the server's point of
+# view the resulting headers are invalid and can cause problems.
+#
+# An invalid "Referer" header can trigger "hot-linking" protections,
+# an invalid "Host" header will make it impossible for the server to
+# find the right vhost (several domains hosted on the same IP address).
+#
+# This filter removes the "foo.exit" part in those headers
+# to prevent the mentioned problems. Note that it only modifies
+# the HTTP headers, it doesn't make it impossible for the server
+# to detect your Tor exit node based on the IP address the request is
+# coming from.
+#
+#################################################################################
+FILTER: hide-tor-exit-notation Header filter to remove the Tor exit node notation in Host and Referer headers.
+s@^((?:Referer|Host):\s*(?:https?://)?[^/]*)\.[^\./]*?\.exit@$1@i
+
+
##############################################################################
#
# Revisions :
# $Log: default.filter,v $
+# Revision 1.32 2006/11/10 18:04:04 fabiankeil
+# Have no-ping print the ping warning in red.
+#
+# Modified yahoo to keep in sync with recent
+# CSS changes and to suppress a useless horizontal
+# scrollbar.
+#
+# msn now makes sure that the continue-link boxes
+# act as links (the original CSS just changes the cursor).
+#
+# Changed fun filter regex to leave microsoft links alone.
+# Fixes BR 1019996.
+#
+# Revision 1.31 2006/10/21 13:12:28 fabiankeil
+# Added no-ping and hide-tor-exit-notation.
+#
+# Adjusted jumping-windows to break less.
+# Fixes BR 1146134.
+#
+# Revision 1.30 2006/10/18 12:36:50 fabiankeil
+# google filter now cleans Google groups as well.
+#
+# Revision 1.29 2006/10/11 14:03:17 fabiankeil
+# Changed img-reorder regex to only move width
+# attributes if they are following at least one
+# whitespace. Fixes BR 1328455.
+#
+# Revision 1.28 2006/10/11 13:31:13 fabiankeil
+# Added Anduin Withers' js-annoyances fix
+# for not messing up escaped quotes. Fixes BR 999765.
+#
+# Improved blogspot filter to make it less likely that
+# the blogspot banner at the top of the page is missed.
+#
+# Revision 1.27 2006/10/08 17:00:51 fabiankeil
+# Modified webbugs filter to create a comment around the offending
+# image instead of removing it entirely.
+#
+# Adjusted regex to only match if there's at least one whitespace
+# before the width and height attributes. Makes it more likely that
+# they are indeed attributes, and not part of the value of another attribute.
+# Solves BR 1035587.
+#
+# Thanks to Martin Thomas for diagnosing the cause of the problem.
+#
+# Revision 1.26 2006/10/06 18:06:16 fabiankeil
+# Added header filter x-httpd-php-to-html
+# and reverted another img-reorder whitespace
+# problem.
+#
+# Revision 1.25 2006/10/06 15:26:09 fabiankeil
+# Bumped copyright year.
+#
+# Reverted parts of the last img-reorder change
+# which were intended to remove superfluous whitespace
+# but had the side effect to mess up some tags.
+#
+# Modified banners-by-size and banners-by-link to
+# use border value "0" instead of "\0". Fixes BR 1100065.
+#
# Revision 1.24 2006/10/06 11:25:31 fabiankeil
# Taught img-reorder not to break img tags
# with empty src attributes. Fixes BR 1089474.