From fd681ed3880a0a1d067f2c32a81e8657fd1194cd Mon Sep 17 00:00:00 2001 From: Dan Helfman Date: Fri, 31 Oct 2008 11:57:45 -0700 Subject: [PATCH] Changes to html cleaner to make all external links nofollow to discourage forum spammers. --- controller/Html_cleaner.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/controller/Html_cleaner.py b/controller/Html_cleaner.py index 5eb7049..4d0e2f7 100644 --- a/controller/Html_cleaner.py +++ b/controller/Html_cleaner.py @@ -144,7 +144,7 @@ class Html_cleaner(HTMLParser): # "on" tags, like "onhover," would not be smart. Also be very careful # of "background" and "style." self.allowed_attributes = { - 'a': [ 'href', 'target' ], + 'a': [ 'href', 'target', 'rel' ], 'p': [ 'align' ], 'img': [ 'src', 'alt', 'border', 'title', "class" ], 'table': [ 'cellpadding', 'cellspacing', 'border', 'width', 'height' ], @@ -197,9 +197,13 @@ class Html_cleaner(HTMLParser): else: bt += ' %s=%s' % \ (xssescape(attribute), quoteattr(attrs[attribute])) - if self.require_link_target and tag == "a" and not attrs.get( 'target' ) and \ + if tag == "a" and \ ( not attrs.get( 'href' ) or not self.NOTE_LINK_URL_PATTERN.search( attrs.get( 'href' ) ) ): - bt += ' target="_new"' + if self.require_link_target and not attrs.get( 'target' ): + bt += ' target="_new"' + rel = attrs.get( 'rel' ) + if not rel or rel != "nofollow": + bt += ' rel="nofollow"' if bt == "