Html_cleaner now strips out all unknown HTML tags instead of just escaping them.
This commit is contained in:
parent
ee36e49332
commit
970bc77def
3
NEWS
3
NEWS
|
@ -2,6 +2,9 @@
|
|||
* Changed the order of exported HTML and CSV notebooks so that after all the
|
||||
"startup" notes are included, the remaining notes are included in
|
||||
alphabetical order (instead of reverse chronological order).
|
||||
* Instead of converting unsupported HTML tags to plain text when a note is
|
||||
saved, Luminotes now simply strips out all unsupported tags. This further
|
||||
improves copy and pasting text from programs like MS Word.
|
||||
* Fixed a compatibility problem with newer versions of SQLite. (Luminotes
|
||||
was using a reserved keyword as an identifier.) This only affected those
|
||||
people who installed Luminotes Server themselves.
|
||||
|
|
|
@ -112,31 +112,6 @@ class Html_cleaner(HTMLParser):
|
|||
'colgroup',
|
||||
]
|
||||
|
||||
# A list of tags that are forcibly removed from the input. Tags that
|
||||
# are not in permitted_tags and not in stripped_tags are simply
|
||||
# escaped.
|
||||
self.stripped_tags = [
|
||||
'span',
|
||||
'blink',
|
||||
'marquee',
|
||||
'bgsound',
|
||||
'meta',
|
||||
'object',
|
||||
'iframe',
|
||||
'script',
|
||||
'noscript',
|
||||
'applet',
|
||||
'embed',
|
||||
'style',
|
||||
'link',
|
||||
'html',
|
||||
'title',
|
||||
'head',
|
||||
'body',
|
||||
'o',
|
||||
'm',
|
||||
]
|
||||
|
||||
# A list of tags that require no closing tag.
|
||||
self.requires_no_close = [ 'img', 'br' ]
|
||||
|
||||
|
@ -168,58 +143,53 @@ class Html_cleaner(HTMLParser):
|
|||
def handle_charref(self, ref):
|
||||
if len(ref) < 7 and ref.isdigit():
|
||||
self.result.append( '&#%s;' % ref )
|
||||
else:
|
||||
self.result.append( xssescape('&#%s' % ref) )
|
||||
|
||||
def handle_entityref(self, ref):
|
||||
if ref in entitydefs:
|
||||
self.result.append( '&%s;' % ref )
|
||||
else:
|
||||
self.result.append( xssescape('&%s' % ref) )
|
||||
|
||||
def handle_comment(self, comment):
|
||||
pass # strip comments
|
||||
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
if tag not in self.permitted_tags:
|
||||
if tag not in self.stripped_tags:
|
||||
self.result.append( xssescape("<%s>" % tag) )
|
||||
else:
|
||||
bt = "<" + tag
|
||||
if tag in self.allowed_attributes:
|
||||
attrs = dict(attrs)
|
||||
self.allowed_attributes_here = \
|
||||
[x for x in self.allowed_attributes[tag] if x in attrs \
|
||||
and len(attrs[x]) > 0]
|
||||
for attribute in self.allowed_attributes_here:
|
||||
if attribute in ['href', 'src', 'background']:
|
||||
if self.url_is_acceptable(attrs[attribute]):
|
||||
bt += ' %s="%s"' % (attribute, attrs[attribute])
|
||||
else:
|
||||
bt += ' %s=%s' % \
|
||||
(xssescape(attribute), quoteattr(attrs[attribute]))
|
||||
if tag == "a" and \
|
||||
( not attrs.get( 'href' ) or not self.NOTE_LINK_URL_PATTERN.search( attrs.get( 'href' ) ) ):
|
||||
if self.require_link_target and not attrs.get( 'target' ):
|
||||
bt += ' target="_new"'
|
||||
rel = attrs.get( 'rel' )
|
||||
if not rel or rel != "nofollow":
|
||||
bt += ' rel="nofollow"'
|
||||
if bt == "<a" or bt == "<img":
|
||||
return
|
||||
if tag in self.requires_no_close:
|
||||
bt += " /"
|
||||
bt += ">"
|
||||
self.result.append( bt )
|
||||
self.open_tags.insert(0, tag)
|
||||
return
|
||||
|
||||
bt = "<" + tag
|
||||
if tag in self.allowed_attributes:
|
||||
attrs = dict(attrs)
|
||||
self.allowed_attributes_here = \
|
||||
[x for x in self.allowed_attributes[tag] if x in attrs \
|
||||
and len(attrs[x]) > 0]
|
||||
for attribute in self.allowed_attributes_here:
|
||||
if attribute in ['href', 'src', 'background']:
|
||||
if self.url_is_acceptable(attrs[attribute]):
|
||||
bt += ' %s="%s"' % (attribute, attrs[attribute])
|
||||
else:
|
||||
bt += ' %s=%s' % \
|
||||
(xssescape(attribute), quoteattr(attrs[attribute]))
|
||||
if tag == "a" and \
|
||||
( not attrs.get( 'href' ) or not self.NOTE_LINK_URL_PATTERN.search( attrs.get( 'href' ) ) ):
|
||||
if self.require_link_target and not attrs.get( 'target' ):
|
||||
bt += ' target="_new"'
|
||||
rel = attrs.get( 'rel' )
|
||||
if not rel or rel != "nofollow":
|
||||
bt += ' rel="nofollow"'
|
||||
if bt == "<a" or bt == "<img":
|
||||
return
|
||||
if tag in self.requires_no_close:
|
||||
bt += " /"
|
||||
bt += ">"
|
||||
self.result.append( bt )
|
||||
self.open_tags.insert(0, tag)
|
||||
|
||||
def handle_endtag(self, tag, attrs):
|
||||
tag = tag.split( ":" )[ 0 ]
|
||||
bracketed = "</%s>" % tag
|
||||
if tag not in self.permitted_tags:
|
||||
if tag not in self.stripped_tags:
|
||||
self.result.append( xssescape(bracketed) )
|
||||
elif tag in self.open_tags:
|
||||
return
|
||||
|
||||
if tag in self.open_tags:
|
||||
self.result.append( bracketed )
|
||||
self.open_tags.remove(tag)
|
||||
|
||||
|
@ -248,15 +218,3 @@ class Html_cleaner(HTMLParser):
|
|||
if endtag not in self.requires_no_close:
|
||||
self.result.append( "</%s>" % endtag )
|
||||
return "".join( self.result )
|
||||
|
||||
def xtags(self):
|
||||
"""Returns a printable string informing the user which tags are allowed"""
|
||||
self.permitted_tags.sort()
|
||||
tg = ""
|
||||
for x in self.permitted_tags:
|
||||
tg += "<" + x
|
||||
if x in self.allowed_attributes:
|
||||
for y in self.allowed_attributes[x]:
|
||||
tg += ' %s=""' % y
|
||||
tg += "> "
|
||||
return xssescape(tg.strip())
|
||||
|
|
|
@ -3154,6 +3154,48 @@ class Test_notebooks( Test_controller ):
|
|||
# before_position should be ignored for such notebooks
|
||||
self.test_save_new_note_in_notebook_with_read_write_for_own_notes( after_note_id, before_note_id )
|
||||
|
||||
def test_save_new_note_with_allowed_tags( self ):
|
||||
self.login()
|
||||
|
||||
# save a completely new note
|
||||
title_with_tags = u"<h3>my funny title</h3>"
|
||||
body = u"<p>this is a <b>note</b></p>"
|
||||
new_note = Note.create( "55", title_with_tags + body )
|
||||
previous_revision = new_note.revision
|
||||
|
||||
result = self.http_post( "/notebooks/save_note/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
note_id = new_note.object_id,
|
||||
contents = new_note.contents,
|
||||
startup = False,
|
||||
previous_revision = None,
|
||||
), session_id = self.session_id )
|
||||
|
||||
assert result[ "new_revision" ]
|
||||
assert result[ "new_revision" ] != previous_revision
|
||||
assert result[ "new_revision" ].user_id == self.user.object_id
|
||||
assert result[ "new_revision" ].username == self.username
|
||||
assert result[ "previous_revision" ] == None
|
||||
user = self.database.load( User, self.user.object_id )
|
||||
assert user.storage_bytes > 0
|
||||
assert result[ "storage_bytes" ] == user.storage_bytes
|
||||
assert result[ "rank" ] == 0.0
|
||||
|
||||
# make sure the new title is now loadable
|
||||
result = self.http_post( "/notebooks/load_note_by_title/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
note_title = new_note.title,
|
||||
), session_id = self.session_id )
|
||||
|
||||
note = result[ "note" ]
|
||||
|
||||
expected_contents = title_with_tags + body
|
||||
|
||||
assert note.object_id == new_note.object_id
|
||||
assert note.title == new_note.title
|
||||
assert note.contents == expected_contents
|
||||
assert note.user_id == self.user.object_id
|
||||
|
||||
def test_save_new_note_with_disallowed_tags( self ):
|
||||
self.login()
|
||||
|
||||
|
@ -3240,6 +3282,48 @@ class Test_notebooks( Test_controller ):
|
|||
assert note.contents == expected_contents
|
||||
assert note.user_id == self.user.object_id
|
||||
|
||||
def test_save_new_note_with_unknown_tags( self ):
|
||||
self.login()
|
||||
|
||||
# save a completely new note
|
||||
title_with_tags = u"<h3>my funny title</h3>"
|
||||
junk = u"foo<whee>blah</whee>bar"
|
||||
new_note = Note.create( "55", title_with_tags + junk )
|
||||
previous_revision = new_note.revision
|
||||
|
||||
result = self.http_post( "/notebooks/save_note/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
note_id = new_note.object_id,
|
||||
contents = new_note.contents,
|
||||
startup = False,
|
||||
previous_revision = None,
|
||||
), session_id = self.session_id )
|
||||
|
||||
assert result[ "new_revision" ]
|
||||
assert result[ "new_revision" ] != previous_revision
|
||||
assert result[ "new_revision" ].user_id == self.user.object_id
|
||||
assert result[ "new_revision" ].username == self.username
|
||||
assert result[ "previous_revision" ] == None
|
||||
user = self.database.load( User, self.user.object_id )
|
||||
assert user.storage_bytes > 0
|
||||
assert result[ "storage_bytes" ] == user.storage_bytes
|
||||
assert result[ "rank" ] == 0.0
|
||||
|
||||
# make sure the new title is now loadable
|
||||
result = self.http_post( "/notebooks/load_note_by_title/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
note_title = new_note.title,
|
||||
), session_id = self.session_id )
|
||||
|
||||
note = result[ "note" ]
|
||||
|
||||
expected_contents = title_with_tags + u"fooblahbar"
|
||||
|
||||
assert note.object_id == new_note.object_id
|
||||
assert note.title == new_note.title
|
||||
assert note.contents == expected_contents
|
||||
assert note.user_id == self.user.object_id
|
||||
|
||||
def test_save_new_note_with_bad_characters( self ):
|
||||
self.login()
|
||||
|
||||
|
|
Reference in New Issue