Revamped searching to use PostgreSQL's tsearch2 full-text searching.
This commit is contained in:
parent
4ca87af17c
commit
37e886f27c
3
INSTALL
3
INSTALL
|
@ -14,7 +14,8 @@ First, install the prerequisites:
|
||||||
In Debian GNU/Linux, you can issue the following command to install these
|
In Debian GNU/Linux, you can issue the following command to install these
|
||||||
packages:
|
packages:
|
||||||
|
|
||||||
apt-get install python2.4 python-cherrypy postgresql-8.1 python-psycopg2 python-simplejson python-tz
|
apt-get install python2.4 python-cherrypy postgresql-8.1 \
|
||||||
|
postgresql-contrib-8.1 python-psycopg2 python-simplejson python-tz
|
||||||
|
|
||||||
|
|
||||||
development mode
|
development mode
|
||||||
|
|
|
@ -576,14 +576,15 @@ class Notebooks( object ):
|
||||||
@grab_user_id
|
@grab_user_id
|
||||||
@validate(
|
@validate(
|
||||||
notebook_id = Valid_id(),
|
notebook_id = Valid_id(),
|
||||||
search_text = Valid_string( min = 0, max = 100 ),
|
search_text = unicode,
|
||||||
user_id = Valid_id( none_okay = True ),
|
user_id = Valid_id( none_okay = True ),
|
||||||
)
|
)
|
||||||
def search( self, notebook_id, search_text, user_id ):
|
def search( self, notebook_id, search_text, user_id ):
|
||||||
"""
|
"""
|
||||||
Search the notes within a particular notebook for the given search text. Note that the search
|
Search the notes within a particular notebook for the given search text. Note that the search
|
||||||
is case-insensitive, and all HTML tags are ignored. The matching notes are returned with title
|
is case-insensitive, and all HTML tags are ignored. Notes with title matches are generally
|
||||||
matches first, followed by all other matches.
|
ranked higher than matches that are only in the note contents. The returned notes have their
|
||||||
|
normal contents replaced with summary contents with the search terms highlighted.
|
||||||
|
|
||||||
@type notebook_id: unicode
|
@type notebook_id: unicode
|
||||||
@param notebook_id: id of notebook to search
|
@param notebook_id: id of notebook to search
|
||||||
|
@ -595,6 +596,7 @@ class Notebooks( object ):
|
||||||
@return: { 'notes': [ matching notes ] }
|
@return: { 'notes': [ matching notes ] }
|
||||||
@raise Access_error: the current user doesn't have access to the given notebook
|
@raise Access_error: the current user doesn't have access to the given notebook
|
||||||
@raise Validation_error: one of the arguments is invalid
|
@raise Validation_error: one of the arguments is invalid
|
||||||
|
@raise Search_error: the provided search_text is invalid
|
||||||
"""
|
"""
|
||||||
if not self.__users.check_access( user_id, notebook_id ):
|
if not self.__users.check_access( user_id, notebook_id ):
|
||||||
raise Access_error()
|
raise Access_error()
|
||||||
|
@ -604,26 +606,17 @@ class Notebooks( object ):
|
||||||
if not notebook:
|
if not notebook:
|
||||||
raise Access_error()
|
raise Access_error()
|
||||||
|
|
||||||
search_text = search_text.lower()
|
MAX_SEARCH_TEXT_LENGTH = 256
|
||||||
if len( search_text ) == 0:
|
if len( search_text ) > MAX_SEARCH_TEXT_LENGTH:
|
||||||
return dict( notes = [] )
|
raise Validation_error( u"search_text", None, unicode, message = u"is too long" )
|
||||||
|
|
||||||
title_matches = []
|
if len( search_text ) == 0:
|
||||||
content_matches = []
|
raise Validation_error( u"search_text", None, unicode, message = u"is missing" )
|
||||||
nuker = Html_nuker()
|
|
||||||
|
|
||||||
notes = self.__database.select_many( Note, notebook.sql_search_notes( search_text ) )
|
notes = self.__database.select_many( Note, notebook.sql_search_notes( search_text ) )
|
||||||
|
|
||||||
# further narrow the search results by making sure notes still match after all HTML tags are
|
|
||||||
# stripped out
|
|
||||||
for note in notes:
|
|
||||||
if search_text in nuker.nuke( note.title ).lower():
|
|
||||||
title_matches.append( note )
|
|
||||||
elif search_text in nuker.nuke( note.contents ).lower():
|
|
||||||
content_matches.append( note )
|
|
||||||
|
|
||||||
return dict(
|
return dict(
|
||||||
notes = title_matches + content_matches,
|
notes = notes,
|
||||||
)
|
)
|
||||||
|
|
||||||
@expose( view = Json )
|
@expose( view = Json )
|
||||||
|
|
|
@ -1475,7 +1475,23 @@ class Test_notebooks( Test_controller ):
|
||||||
|
|
||||||
notes = result.get( "notes" )
|
notes = result.get( "notes" )
|
||||||
|
|
||||||
assert len( notes ) == 0
|
assert result[ "error" ]
|
||||||
|
assert u"missing" in result[ "error" ]
|
||||||
|
|
||||||
|
def test_long_search( self ):
|
||||||
|
self.login()
|
||||||
|
|
||||||
|
search_text = "w" * 257
|
||||||
|
|
||||||
|
result = self.http_post( "/notebooks/search/", dict(
|
||||||
|
notebook_id = self.notebook.object_id,
|
||||||
|
search_text = search_text,
|
||||||
|
), session_id = self.session_id )
|
||||||
|
|
||||||
|
notes = result.get( "notes" )
|
||||||
|
|
||||||
|
assert result[ "error" ]
|
||||||
|
assert u"too long" in result[ "error" ]
|
||||||
|
|
||||||
def test_search_with_no_results( self ):
|
def test_search_with_no_results( self ):
|
||||||
self.login()
|
self.login()
|
||||||
|
@ -1512,20 +1528,6 @@ class Test_notebooks( Test_controller ):
|
||||||
assert notes[ 0 ].object_id == note3.object_id
|
assert notes[ 0 ].object_id == note3.object_id
|
||||||
assert notes[ 1 ].object_id == self.note.object_id
|
assert notes[ 1 ].object_id == self.note.object_id
|
||||||
|
|
||||||
def test_search_html_tags( self ):
|
|
||||||
self.login()
|
|
||||||
|
|
||||||
search_text = "h3"
|
|
||||||
|
|
||||||
result = self.http_post( "/notebooks/search/", dict(
|
|
||||||
notebook_id = self.notebook.object_id,
|
|
||||||
search_text = search_text,
|
|
||||||
), session_id = self.session_id )
|
|
||||||
|
|
||||||
notes = result.get( "notes" )
|
|
||||||
|
|
||||||
assert len( notes ) == 0
|
|
||||||
|
|
||||||
def test_search_character_refs( self ):
|
def test_search_character_refs( self ):
|
||||||
self.login()
|
self.login()
|
||||||
|
|
||||||
|
|
|
@ -105,9 +105,9 @@ class Note( Persistent ):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sql_load( object_id, revision = None ):
|
def sql_load( object_id, revision = None ):
|
||||||
if revision:
|
if revision:
|
||||||
return "select * from note where id = %s and revision = %s;" % ( quote( object_id ), quote( revision ) )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note where id = %s and revision = %s;" % ( quote( object_id ), quote( revision ) )
|
||||||
|
|
||||||
return "select * from note_current where id = %s;" % quote( object_id )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where id = %s;" % quote( object_id )
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sql_id_exists( object_id, revision = None ):
|
def sql_id_exists( object_id, revision = None ):
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import re
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from Note import Note
|
from Note import Note
|
||||||
from Persistent import Persistent, quote
|
from Persistent import Persistent, quote
|
||||||
|
@ -7,6 +8,10 @@ class Notebook( Persistent ):
|
||||||
"""
|
"""
|
||||||
A collection of wiki notes.
|
A collection of wiki notes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
WHITESPACE_PATTERN = re.compile( r"\s+" )
|
||||||
|
SEARCH_OPERATORS = re.compile( r"[&|!()]" )
|
||||||
|
|
||||||
def __init__( self, object_id, revision = None, name = None, trash_id = None, read_write = True ):
|
def __init__( self, object_id, revision = None, name = None, trash_id = None, read_write = True ):
|
||||||
"""
|
"""
|
||||||
Create a new notebook with the given id and name.
|
Create a new notebook with the given id and name.
|
||||||
|
@ -78,19 +83,19 @@ class Notebook( Persistent ):
|
||||||
"""
|
"""
|
||||||
Return a SQL string to load a list of all the notes within this notebook.
|
Return a SQL string to load a list of all the notes within this notebook.
|
||||||
"""
|
"""
|
||||||
return "select * from note_current where notebook_id = %s order by revision desc;" % quote( self.object_id )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s order by revision desc;" % quote( self.object_id )
|
||||||
|
|
||||||
def sql_load_non_startup_notes( self ):
|
def sql_load_non_startup_notes( self ):
|
||||||
"""
|
"""
|
||||||
Return a SQL string to load a list of the non-startup notes within this notebook.
|
Return a SQL string to load a list of the non-startup notes within this notebook.
|
||||||
"""
|
"""
|
||||||
return "select * from note_current where notebook_id = %s and startup = 'f' order by revision desc;" % quote( self.object_id )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and startup = 'f' order by revision desc;" % quote( self.object_id )
|
||||||
|
|
||||||
def sql_load_startup_notes( self ):
|
def sql_load_startup_notes( self ):
|
||||||
"""
|
"""
|
||||||
Return a SQL string to load a list of the startup notes within this notebook.
|
Return a SQL string to load a list of the startup notes within this notebook.
|
||||||
"""
|
"""
|
||||||
return "select * from note_current where notebook_id = %s and startup = 't' order by rank;" % quote( self.object_id )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and startup = 't' order by rank;" % quote( self.object_id )
|
||||||
|
|
||||||
def sql_load_recent_notes( self, start = 0, count = 10 ):
|
def sql_load_recent_notes( self, start = 0, count = 10 ):
|
||||||
"""
|
"""
|
||||||
|
@ -104,7 +109,9 @@ class Notebook( Persistent ):
|
||||||
return \
|
return \
|
||||||
"""
|
"""
|
||||||
select
|
select
|
||||||
note_current.*, note_creation.revision as creation
|
note_current.id, note_current.revision, note_current.title, note_current.contents,
|
||||||
|
note_current.notebook_id, note_current.startup, note_current.deleted_from_id,
|
||||||
|
note_current.rank, note_creation.revision as creation
|
||||||
from
|
from
|
||||||
note_current,
|
note_current,
|
||||||
( select id, min( revision ) as revision from note where notebook_id = %s group by id ) as note_creation
|
( select id, min( revision ) as revision from note where notebook_id = %s group by id ) as note_creation
|
||||||
|
@ -120,7 +127,7 @@ class Notebook( Persistent ):
|
||||||
@type note_id: unicode
|
@type note_id: unicode
|
||||||
@param note_id: id of note to load
|
@param note_id: id of note to load
|
||||||
"""
|
"""
|
||||||
return "select * from note_current where notebook_id = %s and id = %s;" % ( quote( self.object_id ), quote( note_id ) )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and id = %s;" % ( quote( self.object_id ), quote( note_id ) )
|
||||||
|
|
||||||
def sql_load_note_by_title( self, title ):
|
def sql_load_note_by_title( self, title ):
|
||||||
"""
|
"""
|
||||||
|
@ -129,19 +136,34 @@ class Notebook( Persistent ):
|
||||||
@type note_id: unicode
|
@type note_id: unicode
|
||||||
@param note_id: title of note to load
|
@param note_id: title of note to load
|
||||||
"""
|
"""
|
||||||
return "select * from note_current where notebook_id = %s and title = %s;" % ( quote( self.object_id ), quote( title ) )
|
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and title = %s;" % ( quote( self.object_id ), quote( title ) )
|
||||||
|
|
||||||
def sql_search_notes( self, search_text ):
|
def sql_search_notes( self, search_text ):
|
||||||
"""
|
"""
|
||||||
Return a SQL string to search for notes whose contents contain the given search_text. This
|
Return a SQL string to perform a full-text search for notes whose contents contain the given
|
||||||
is a case-insensitive search.
|
search_text. This is a case-insensitive search.
|
||||||
|
|
||||||
@type search_text: unicode
|
@type search_text: unicode
|
||||||
@param search_text: text to search for within the notes
|
@param search_text: text to search for within the notes
|
||||||
"""
|
"""
|
||||||
|
# strip out all search operators
|
||||||
|
search_text = self.SEARCH_OPERATORS.sub( u"", search_text ).strip()
|
||||||
|
|
||||||
|
# join all words with boolean "and" operator
|
||||||
|
search_text = u"&".join( self.WHITESPACE_PATTERN.split( search_text ) )
|
||||||
|
|
||||||
|
print search_text
|
||||||
return \
|
return \
|
||||||
"select * from note_current where notebook_id = %s and contents ilike %s;" % \
|
"""
|
||||||
( quote( self.object_id ), quote( "%" + search_text + "%" ) )
|
select id, revision, title, headline( drop_html_tags( contents ), query ), notebook_id, startup, deleted_from_id from (
|
||||||
|
select
|
||||||
|
id, revision, title, contents, notebook_id, startup, deleted_from_id, query, rank_cd( search, query ) as rank
|
||||||
|
from
|
||||||
|
note_current, to_tsquery( 'default', %s ) query
|
||||||
|
where
|
||||||
|
notebook_id = %s and query @@ search order by rank desc limit 20
|
||||||
|
) as sub;
|
||||||
|
""" % ( quote( search_text ), quote( self.object_id ) )
|
||||||
|
|
||||||
def sql_highest_rank( self ):
|
def sql_highest_rank( self ):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -989,25 +989,6 @@ Wiki.prototype.display_search_results = function ( result ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: highlight the search term within the search results, idealy showing
|
|
||||||
// a section of the note contents including the search term
|
|
||||||
|
|
||||||
// if there's only one search result, automatically feel lucky^Wfortunate
|
|
||||||
if ( result.notes.length == 1 ) {
|
|
||||||
var note = result.notes[ 0 ]
|
|
||||||
|
|
||||||
// if the editor is already open, highlight it and bail
|
|
||||||
var iframe = getElement( "note_" + note.object_id );
|
|
||||||
if ( iframe ) {
|
|
||||||
iframe.editor.highlight();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise, create an editor for the one note
|
|
||||||
this.create_editor( note.object_id, note.contents, note.deleted_from_id, note.revision, undefined, this.notebook.read_write, true, true );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// otherwise, there are multiple search results, so create a "magic" search results note. but
|
// otherwise, there are multiple search results, so create a "magic" search results note. but
|
||||||
// first close any open search results notes
|
// first close any open search results notes
|
||||||
if ( this.search_results_editor )
|
if ( this.search_results_editor )
|
||||||
|
@ -1018,26 +999,24 @@ Wiki.prototype.display_search_results = function ( result ) {
|
||||||
var note = result.notes[ i ]
|
var note = result.notes[ i ]
|
||||||
if ( !note.title ) continue;
|
if ( !note.title ) continue;
|
||||||
|
|
||||||
var contents_node = createDOM( "span", {} );
|
if ( note.contents.length == 0 ) {
|
||||||
contents_node.innerHTML = note.contents;
|
|
||||||
contents = strip( scrapeText( contents_node ) );
|
|
||||||
|
|
||||||
// remove the title from the scraped contents text
|
|
||||||
if ( contents.indexOf( note.title ) == 0 )
|
|
||||||
contents = contents.substr( note.title.length );
|
|
||||||
|
|
||||||
if ( contents.length == 0 ) {
|
|
||||||
var preview = "empty note";
|
var preview = "empty note";
|
||||||
} else {
|
} else {
|
||||||
var max_preview_length = 160;
|
var preview = note.contents;
|
||||||
var preview = contents.substr( 0, max_preview_length ) + ( ( contents.length > max_preview_length ) ? "..." : "" );
|
|
||||||
|
// if the preview appears not to end with a complete sentence, add "..."
|
||||||
|
if ( !/[?!.]\s*$/.test( preview ) )
|
||||||
|
preview = preview + " <b>...</b>";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var preview_span = createDOM( "span" );
|
||||||
|
preview_span.innerHTML = preview;
|
||||||
|
|
||||||
appendChildNodes( list,
|
appendChildNodes( list,
|
||||||
createDOM( "p", {},
|
createDOM( "p", {},
|
||||||
createDOM( "a", { "href": "/notebooks/" + this.notebook_id + "?note_id=" + note.object_id }, note.title ),
|
createDOM( "a", { "href": "/notebooks/" + this.notebook_id + "?note_id=" + note.object_id }, note.title ),
|
||||||
createDOM( "br" ),
|
createDOM( "br" ),
|
||||||
createDOM( "span", {}, preview )
|
preview_span
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,6 @@ class Search_form( Form ):
|
||||||
Form.__init__(
|
Form.__init__(
|
||||||
self,
|
self,
|
||||||
Strong( u"search: " ),
|
Strong( u"search: " ),
|
||||||
Input( type = u"text", name = u"search_text", id = u"search_text", size = 30, maxlength = 100 ),
|
Input( type = u"text", name = u"search_text", id = u"search_text", size = 30, maxlength = 512 ),
|
||||||
id = u"search_form",
|
id = u"search_form",
|
||||||
)
|
)
|
||||||
|
|
Reference in New Issue