Html_nuker now converts some character/entity refs to their ascii equivalents,
which allows searching for things that contain non-alphanumeric characters encoded as char/entity refs.
This commit is contained in:
parent
6bb233f8da
commit
24a7205d3a
|
@ -4,7 +4,7 @@ from formatter import AbstractFormatter, NullWriter
|
||||||
|
|
||||||
class Html_nuker( HTMLParser ):
|
class Html_nuker( HTMLParser ):
|
||||||
"""
|
"""
|
||||||
Nukes HTML of all tags.
|
Nukes HTML of all tags, and optionally all entity/characters references.
|
||||||
"""
|
"""
|
||||||
def __init__( self, allow_refs = False ):
|
def __init__( self, allow_refs = False ):
|
||||||
HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )
|
HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )
|
||||||
|
@ -16,12 +16,23 @@ class Html_nuker( HTMLParser ):
|
||||||
self.result.append( data )
|
self.result.append( data )
|
||||||
|
|
||||||
def handle_charref( self, ref ):
|
def handle_charref( self, ref ):
|
||||||
|
ref = int( ref )
|
||||||
if self.allow_refs:
|
if self.allow_refs:
|
||||||
self.result.append( "&#%s;" % ref )
|
self.result.append( "&#%s;" % ref )
|
||||||
|
# convert ascii references to their character equivalents
|
||||||
|
elif ref >= 32 and ref < 128:
|
||||||
|
self.result.append( chr( ref ) )
|
||||||
|
|
||||||
def handle_entityref( self, ref ):
|
def handle_entityref( self, ref ):
|
||||||
if self.allow_refs:
|
if self.allow_refs:
|
||||||
self.result.append( "&%s;" % ref )
|
self.result.append( "&%s;" % ref )
|
||||||
|
else:
|
||||||
|
self.result.append( {
|
||||||
|
"amp": "&",
|
||||||
|
"lt": "<",
|
||||||
|
"gt": ">",
|
||||||
|
"quot": '"',
|
||||||
|
}.get ( ref ) )
|
||||||
|
|
||||||
def handle_comment( self, comment ):
|
def handle_comment( self, comment ):
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -635,6 +635,24 @@ class Test_notebooks( Test_controller ):
|
||||||
|
|
||||||
assert len( notes ) == 0
|
assert len( notes ) == 0
|
||||||
|
|
||||||
|
def test_search_character_refs( self ):
|
||||||
|
self.login()
|
||||||
|
|
||||||
|
note3 = Note( "55", u"<h3>foo: bar</h3>baz" )
|
||||||
|
self.notebook.add_note( note3 )
|
||||||
|
|
||||||
|
search_text = "oo: b"
|
||||||
|
|
||||||
|
result = self.http_post( "/notebooks/search/", dict(
|
||||||
|
notebook_id = self.notebook.object_id,
|
||||||
|
search_text = search_text,
|
||||||
|
), session_id = self.session_id )
|
||||||
|
|
||||||
|
notes = result.get( "notes" )
|
||||||
|
|
||||||
|
assert len( notes ) == 1
|
||||||
|
assert notes[ 0 ].object_id == note3.object_id
|
||||||
|
|
||||||
def test_recent_notes( self ):
|
def test_recent_notes( self ):
|
||||||
self.login()
|
self.login()
|
||||||
|
|
||||||
|
|
Reference in New Issue