witten
/
luminotes
Archived
1
0
Fork 0

Html_nuker now converts some character/entity refs to their ascii equivalents,

which allows searching for things that contain non-alphanumeric characters
encoded as char/entity refs.
This commit is contained in:
Dan Helfman 2007-07-19 19:03:40 +00:00
parent 6bb233f8da
commit 24a7205d3a
2 changed files with 30 additions and 1 deletions

View File

@ -4,7 +4,7 @@ from formatter import AbstractFormatter, NullWriter
class Html_nuker( HTMLParser ):
"""
Nukes HTML of all tags.
Nukes HTML of all tags, and optionally all entity/characters references.
"""
def __init__( self, allow_refs = False ):
HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )
@ -16,12 +16,23 @@ class Html_nuker( HTMLParser ):
self.result.append( data )
def handle_charref( self, ref ):
ref = int( ref )
if self.allow_refs:
self.result.append( "&#%s;" % ref )
# convert ascii references to their character equivalents
elif ref >= 32 and ref < 128:
self.result.append( chr( ref ) )
def handle_entityref( self, ref ):
if self.allow_refs:
self.result.append( "&%s;" % ref )
else:
self.result.append( {
"amp": "&",
"lt": "<",
"gt": ">",
"quot": '"',
}.get ( ref ) )
def handle_comment( self, comment ):
pass

View File

@ -635,6 +635,24 @@ class Test_notebooks( Test_controller ):
assert len( notes ) == 0
def test_search_character_refs( self ):
self.login()
note3 = Note( "55", u"<h3>foo: bar</h3>baz" )
self.notebook.add_note( note3 )
search_text = "oo: b"
result = self.http_post( "/notebooks/search/", dict(
notebook_id = self.notebook.object_id,
search_text = search_text,
), session_id = self.session_id )
notes = result.get( "notes" )
assert len( notes ) == 1
assert notes[ 0 ].object_id == note3.object_id
def test_recent_notes( self ):
self.login()