Html_nuker now converts some character/entity refs to their ascii equivalents,
which allows searching for things that contain non-alphanumeric characters encoded as char/entity refs.
This commit is contained in:
parent
6bb233f8da
commit
24a7205d3a
|
@ -4,7 +4,7 @@ from formatter import AbstractFormatter, NullWriter
|
|||
|
||||
class Html_nuker( HTMLParser ):
|
||||
"""
|
||||
Nukes HTML of all tags.
|
||||
Nukes HTML of all tags, and optionally all entity/characters references.
|
||||
"""
|
||||
def __init__( self, allow_refs = False ):
|
||||
HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )
|
||||
|
@ -16,12 +16,23 @@ class Html_nuker( HTMLParser ):
|
|||
self.result.append( data )
|
||||
|
||||
def handle_charref( self, ref ):
|
||||
ref = int( ref )
|
||||
if self.allow_refs:
|
||||
self.result.append( "&#%s;" % ref )
|
||||
# convert ascii references to their character equivalents
|
||||
elif ref >= 32 and ref < 128:
|
||||
self.result.append( chr( ref ) )
|
||||
|
||||
def handle_entityref( self, ref ):
|
||||
if self.allow_refs:
|
||||
self.result.append( "&%s;" % ref )
|
||||
else:
|
||||
self.result.append( {
|
||||
"amp": "&",
|
||||
"lt": "<",
|
||||
"gt": ">",
|
||||
"quot": '"',
|
||||
}.get ( ref ) )
|
||||
|
||||
def handle_comment( self, comment ):
|
||||
pass
|
||||
|
|
|
@ -635,6 +635,24 @@ class Test_notebooks( Test_controller ):
|
|||
|
||||
assert len( notes ) == 0
|
||||
|
||||
def test_search_character_refs( self ):
|
||||
self.login()
|
||||
|
||||
note3 = Note( "55", u"<h3>foo: bar</h3>baz" )
|
||||
self.notebook.add_note( note3 )
|
||||
|
||||
search_text = "oo: b"
|
||||
|
||||
result = self.http_post( "/notebooks/search/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
search_text = search_text,
|
||||
), session_id = self.session_id )
|
||||
|
||||
notes = result.get( "notes" )
|
||||
|
||||
assert len( notes ) == 1
|
||||
assert notes[ 0 ].object_id == note3.object_id
|
||||
|
||||
def test_recent_notes( self ):
|
||||
self.login()
|
||||
|
||||
|
|
Reference in New Issue