witten
/
luminotes
Archived
1
0
Fork 0

Html_cleaner now strips out all unknown HTML tags instead of just escaping them.

This commit is contained in:
Dan Helfman 2009-02-25 19:12:14 -08:00
parent ee36e49332
commit 970bc77def
3 changed files with 119 additions and 74 deletions

3
NEWS
View File

@ -2,6 +2,9 @@
* Changed the order of exported HTML and CSV notebooks so that after all the * Changed the order of exported HTML and CSV notebooks so that after all the
"startup" notes are included, the remaining notes are included in "startup" notes are included, the remaining notes are included in
alphabetical order (instead of reverse chronological order). alphabetical order (instead of reverse chronological order).
* Instead of converting unsupported HTML tags to plain text when a note is
saved, Luminotes now simply strips out all unsupported tags. This further
improves copy and pasting text from programs like MS Word.
* Fixed a compatibility problem with newer versions of SQLite. (Luminotes * Fixed a compatibility problem with newer versions of SQLite. (Luminotes
was using a reserved keyword as an identifier.) This only affected those was using a reserved keyword as an identifier.) This only affected those
people who installed Luminotes Server themselves. people who installed Luminotes Server themselves.

View File

@ -112,31 +112,6 @@ class Html_cleaner(HTMLParser):
'colgroup', 'colgroup',
] ]
# A list of tags that are forcibly removed from the input. Tags that
# are not in permitted_tags and not in stripped_tags are simply
# escaped.
self.stripped_tags = [
'span',
'blink',
'marquee',
'bgsound',
'meta',
'object',
'iframe',
'script',
'noscript',
'applet',
'embed',
'style',
'link',
'html',
'title',
'head',
'body',
'o',
'm',
]
# A list of tags that require no closing tag. # A list of tags that require no closing tag.
self.requires_no_close = [ 'img', 'br' ] self.requires_no_close = [ 'img', 'br' ]
@ -168,58 +143,53 @@ class Html_cleaner(HTMLParser):
def handle_charref(self, ref): def handle_charref(self, ref):
if len(ref) < 7 and ref.isdigit(): if len(ref) < 7 and ref.isdigit():
self.result.append( '&#%s;' % ref ) self.result.append( '&#%s;' % ref )
else:
self.result.append( xssescape('&#%s' % ref) )
def handle_entityref(self, ref): def handle_entityref(self, ref):
if ref in entitydefs: if ref in entitydefs:
self.result.append( '&%s;' % ref ) self.result.append( '&%s;' % ref )
else:
self.result.append( xssescape('&%s' % ref) )
def handle_comment(self, comment): def handle_comment(self, comment):
pass # strip comments pass # strip comments
def handle_starttag(self, tag, method, attrs): def handle_starttag(self, tag, method, attrs):
if tag not in self.permitted_tags: if tag not in self.permitted_tags:
if tag not in self.stripped_tags: return
self.result.append( xssescape("<%s>" % tag) )
else: bt = "<" + tag
bt = "<" + tag if tag in self.allowed_attributes:
if tag in self.allowed_attributes: attrs = dict(attrs)
attrs = dict(attrs) self.allowed_attributes_here = \
self.allowed_attributes_here = \ [x for x in self.allowed_attributes[tag] if x in attrs \
[x for x in self.allowed_attributes[tag] if x in attrs \ and len(attrs[x]) > 0]
and len(attrs[x]) > 0] for attribute in self.allowed_attributes_here:
for attribute in self.allowed_attributes_here: if attribute in ['href', 'src', 'background']:
if attribute in ['href', 'src', 'background']: if self.url_is_acceptable(attrs[attribute]):
if self.url_is_acceptable(attrs[attribute]): bt += ' %s="%s"' % (attribute, attrs[attribute])
bt += ' %s="%s"' % (attribute, attrs[attribute]) else:
else: bt += ' %s=%s' % \
bt += ' %s=%s' % \ (xssescape(attribute), quoteattr(attrs[attribute]))
(xssescape(attribute), quoteattr(attrs[attribute])) if tag == "a" and \
if tag == "a" and \ ( not attrs.get( 'href' ) or not self.NOTE_LINK_URL_PATTERN.search( attrs.get( 'href' ) ) ):
( not attrs.get( 'href' ) or not self.NOTE_LINK_URL_PATTERN.search( attrs.get( 'href' ) ) ): if self.require_link_target and not attrs.get( 'target' ):
if self.require_link_target and not attrs.get( 'target' ): bt += ' target="_new"'
bt += ' target="_new"' rel = attrs.get( 'rel' )
rel = attrs.get( 'rel' ) if not rel or rel != "nofollow":
if not rel or rel != "nofollow": bt += ' rel="nofollow"'
bt += ' rel="nofollow"' if bt == "<a" or bt == "<img":
if bt == "<a" or bt == "<img": return
return if tag in self.requires_no_close:
if tag in self.requires_no_close: bt += " /"
bt += " /" bt += ">"
bt += ">" self.result.append( bt )
self.result.append( bt ) self.open_tags.insert(0, tag)
self.open_tags.insert(0, tag)
def handle_endtag(self, tag, attrs): def handle_endtag(self, tag, attrs):
tag = tag.split( ":" )[ 0 ] tag = tag.split( ":" )[ 0 ]
bracketed = "</%s>" % tag bracketed = "</%s>" % tag
if tag not in self.permitted_tags: if tag not in self.permitted_tags:
if tag not in self.stripped_tags: return
self.result.append( xssescape(bracketed) )
elif tag in self.open_tags: if tag in self.open_tags:
self.result.append( bracketed ) self.result.append( bracketed )
self.open_tags.remove(tag) self.open_tags.remove(tag)
@ -248,15 +218,3 @@ class Html_cleaner(HTMLParser):
if endtag not in self.requires_no_close: if endtag not in self.requires_no_close:
self.result.append( "</%s>" % endtag ) self.result.append( "</%s>" % endtag )
return "".join( self.result ) return "".join( self.result )
def xtags(self):
"""Returns a printable string informing the user which tags are allowed"""
self.permitted_tags.sort()
tg = ""
for x in self.permitted_tags:
tg += "<" + x
if x in self.allowed_attributes:
for y in self.allowed_attributes[x]:
tg += ' %s=""' % y
tg += "> "
return xssescape(tg.strip())

View File

@ -3154,6 +3154,48 @@ class Test_notebooks( Test_controller ):
# before_position should be ignored for such notebooks # before_position should be ignored for such notebooks
self.test_save_new_note_in_notebook_with_read_write_for_own_notes( after_note_id, before_note_id ) self.test_save_new_note_in_notebook_with_read_write_for_own_notes( after_note_id, before_note_id )
def test_save_new_note_with_allowed_tags( self ):
self.login()
# save a completely new note
title_with_tags = u"<h3>my funny title</h3>"
body = u"<p>this is a <b>note</b></p>"
new_note = Note.create( "55", title_with_tags + body )
previous_revision = new_note.revision
result = self.http_post( "/notebooks/save_note/", dict(
notebook_id = self.notebook.object_id,
note_id = new_note.object_id,
contents = new_note.contents,
startup = False,
previous_revision = None,
), session_id = self.session_id )
assert result[ "new_revision" ]
assert result[ "new_revision" ] != previous_revision
assert result[ "new_revision" ].user_id == self.user.object_id
assert result[ "new_revision" ].username == self.username
assert result[ "previous_revision" ] == None
user = self.database.load( User, self.user.object_id )
assert user.storage_bytes > 0
assert result[ "storage_bytes" ] == user.storage_bytes
assert result[ "rank" ] == 0.0
# make sure the new title is now loadable
result = self.http_post( "/notebooks/load_note_by_title/", dict(
notebook_id = self.notebook.object_id,
note_title = new_note.title,
), session_id = self.session_id )
note = result[ "note" ]
expected_contents = title_with_tags + body
assert note.object_id == new_note.object_id
assert note.title == new_note.title
assert note.contents == expected_contents
assert note.user_id == self.user.object_id
def test_save_new_note_with_disallowed_tags( self ): def test_save_new_note_with_disallowed_tags( self ):
self.login() self.login()
@ -3240,6 +3282,48 @@ class Test_notebooks( Test_controller ):
assert note.contents == expected_contents assert note.contents == expected_contents
assert note.user_id == self.user.object_id assert note.user_id == self.user.object_id
def test_save_new_note_with_unknown_tags( self ):
self.login()
# save a completely new note
title_with_tags = u"<h3>my funny title</h3>"
junk = u"foo<whee>blah</whee>bar"
new_note = Note.create( "55", title_with_tags + junk )
previous_revision = new_note.revision
result = self.http_post( "/notebooks/save_note/", dict(
notebook_id = self.notebook.object_id,
note_id = new_note.object_id,
contents = new_note.contents,
startup = False,
previous_revision = None,
), session_id = self.session_id )
assert result[ "new_revision" ]
assert result[ "new_revision" ] != previous_revision
assert result[ "new_revision" ].user_id == self.user.object_id
assert result[ "new_revision" ].username == self.username
assert result[ "previous_revision" ] == None
user = self.database.load( User, self.user.object_id )
assert user.storage_bytes > 0
assert result[ "storage_bytes" ] == user.storage_bytes
assert result[ "rank" ] == 0.0
# make sure the new title is now loadable
result = self.http_post( "/notebooks/load_note_by_title/", dict(
notebook_id = self.notebook.object_id,
note_title = new_note.title,
), session_id = self.session_id )
note = result[ "note" ]
expected_contents = title_with_tags + u"fooblahbar"
assert note.object_id == new_note.object_id
assert note.title == new_note.title
assert note.contents == expected_contents
assert note.user_id == self.user.object_id
def test_save_new_note_with_bad_characters( self ): def test_save_new_note_with_bad_characters( self ):
self.login() self.login()