Revamped searching to use PostgreSQL's tsearch2 full-text searching.
This commit is contained in:
parent
4ca87af17c
commit
37e886f27c
3
INSTALL
3
INSTALL
|
@ -14,7 +14,8 @@ First, install the prerequisites:
|
|||
In Debian GNU/Linux, you can issue the following command to install these
|
||||
packages:
|
||||
|
||||
apt-get install python2.4 python-cherrypy postgresql-8.1 python-psycopg2 python-simplejson python-tz
|
||||
apt-get install python2.4 python-cherrypy postgresql-8.1 \
|
||||
postgresql-contrib-8.1 python-psycopg2 python-simplejson python-tz
|
||||
|
||||
|
||||
development mode
|
||||
|
|
|
@ -576,14 +576,15 @@ class Notebooks( object ):
|
|||
@grab_user_id
|
||||
@validate(
|
||||
notebook_id = Valid_id(),
|
||||
search_text = Valid_string( min = 0, max = 100 ),
|
||||
search_text = unicode,
|
||||
user_id = Valid_id( none_okay = True ),
|
||||
)
|
||||
def search( self, notebook_id, search_text, user_id ):
|
||||
"""
|
||||
Search the notes within a particular notebook for the given search text. Note that the search
|
||||
is case-insensitive, and all HTML tags are ignored. The matching notes are returned with title
|
||||
matches first, followed by all other matches.
|
||||
is case-insensitive, and all HTML tags are ignored. Notes with title matches are generally
|
||||
ranked higher than matches that are only in the note contents. The returned notes have their
|
||||
normal contents replaced with summary contents with the search terms highlighted.
|
||||
|
||||
@type notebook_id: unicode
|
||||
@param notebook_id: id of notebook to search
|
||||
|
@ -595,6 +596,7 @@ class Notebooks( object ):
|
|||
@return: { 'notes': [ matching notes ] }
|
||||
@raise Access_error: the current user doesn't have access to the given notebook
|
||||
@raise Validation_error: one of the arguments is invalid
|
||||
@raise Search_error: the provided search_text is invalid
|
||||
"""
|
||||
if not self.__users.check_access( user_id, notebook_id ):
|
||||
raise Access_error()
|
||||
|
@ -604,26 +606,17 @@ class Notebooks( object ):
|
|||
if not notebook:
|
||||
raise Access_error()
|
||||
|
||||
search_text = search_text.lower()
|
||||
if len( search_text ) == 0:
|
||||
return dict( notes = [] )
|
||||
MAX_SEARCH_TEXT_LENGTH = 256
|
||||
if len( search_text ) > MAX_SEARCH_TEXT_LENGTH:
|
||||
raise Validation_error( u"search_text", None, unicode, message = u"is too long" )
|
||||
|
||||
title_matches = []
|
||||
content_matches = []
|
||||
nuker = Html_nuker()
|
||||
if len( search_text ) == 0:
|
||||
raise Validation_error( u"search_text", None, unicode, message = u"is missing" )
|
||||
|
||||
notes = self.__database.select_many( Note, notebook.sql_search_notes( search_text ) )
|
||||
|
||||
# further narrow the search results by making sure notes still match after all HTML tags are
|
||||
# stripped out
|
||||
for note in notes:
|
||||
if search_text in nuker.nuke( note.title ).lower():
|
||||
title_matches.append( note )
|
||||
elif search_text in nuker.nuke( note.contents ).lower():
|
||||
content_matches.append( note )
|
||||
|
||||
return dict(
|
||||
notes = title_matches + content_matches,
|
||||
notes = notes,
|
||||
)
|
||||
|
||||
@expose( view = Json )
|
||||
|
|
|
@ -1475,7 +1475,23 @@ class Test_notebooks( Test_controller ):
|
|||
|
||||
notes = result.get( "notes" )
|
||||
|
||||
assert len( notes ) == 0
|
||||
assert result[ "error" ]
|
||||
assert u"missing" in result[ "error" ]
|
||||
|
||||
def test_long_search( self ):
|
||||
self.login()
|
||||
|
||||
search_text = "w" * 257
|
||||
|
||||
result = self.http_post( "/notebooks/search/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
search_text = search_text,
|
||||
), session_id = self.session_id )
|
||||
|
||||
notes = result.get( "notes" )
|
||||
|
||||
assert result[ "error" ]
|
||||
assert u"too long" in result[ "error" ]
|
||||
|
||||
def test_search_with_no_results( self ):
|
||||
self.login()
|
||||
|
@ -1512,20 +1528,6 @@ class Test_notebooks( Test_controller ):
|
|||
assert notes[ 0 ].object_id == note3.object_id
|
||||
assert notes[ 1 ].object_id == self.note.object_id
|
||||
|
||||
def test_search_html_tags( self ):
|
||||
self.login()
|
||||
|
||||
search_text = "h3"
|
||||
|
||||
result = self.http_post( "/notebooks/search/", dict(
|
||||
notebook_id = self.notebook.object_id,
|
||||
search_text = search_text,
|
||||
), session_id = self.session_id )
|
||||
|
||||
notes = result.get( "notes" )
|
||||
|
||||
assert len( notes ) == 0
|
||||
|
||||
def test_search_character_refs( self ):
|
||||
self.login()
|
||||
|
||||
|
|
|
@ -105,9 +105,9 @@ class Note( Persistent ):
|
|||
@staticmethod
|
||||
def sql_load( object_id, revision = None ):
|
||||
if revision:
|
||||
return "select * from note where id = %s and revision = %s;" % ( quote( object_id ), quote( revision ) )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note where id = %s and revision = %s;" % ( quote( object_id ), quote( revision ) )
|
||||
|
||||
return "select * from note_current where id = %s;" % quote( object_id )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where id = %s;" % quote( object_id )
|
||||
|
||||
@staticmethod
|
||||
def sql_id_exists( object_id, revision = None ):
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import re
|
||||
from copy import copy
|
||||
from Note import Note
|
||||
from Persistent import Persistent, quote
|
||||
|
@ -7,6 +8,10 @@ class Notebook( Persistent ):
|
|||
"""
|
||||
A collection of wiki notes.
|
||||
"""
|
||||
|
||||
WHITESPACE_PATTERN = re.compile( r"\s+" )
|
||||
SEARCH_OPERATORS = re.compile( r"[&|!()]" )
|
||||
|
||||
def __init__( self, object_id, revision = None, name = None, trash_id = None, read_write = True ):
|
||||
"""
|
||||
Create a new notebook with the given id and name.
|
||||
|
@ -78,19 +83,19 @@ class Notebook( Persistent ):
|
|||
"""
|
||||
Return a SQL string to load a list of all the notes within this notebook.
|
||||
"""
|
||||
return "select * from note_current where notebook_id = %s order by revision desc;" % quote( self.object_id )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s order by revision desc;" % quote( self.object_id )
|
||||
|
||||
def sql_load_non_startup_notes( self ):
|
||||
"""
|
||||
Return a SQL string to load a list of the non-startup notes within this notebook.
|
||||
"""
|
||||
return "select * from note_current where notebook_id = %s and startup = 'f' order by revision desc;" % quote( self.object_id )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and startup = 'f' order by revision desc;" % quote( self.object_id )
|
||||
|
||||
def sql_load_startup_notes( self ):
|
||||
"""
|
||||
Return a SQL string to load a list of the startup notes within this notebook.
|
||||
"""
|
||||
return "select * from note_current where notebook_id = %s and startup = 't' order by rank;" % quote( self.object_id )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and startup = 't' order by rank;" % quote( self.object_id )
|
||||
|
||||
def sql_load_recent_notes( self, start = 0, count = 10 ):
|
||||
"""
|
||||
|
@ -104,7 +109,9 @@ class Notebook( Persistent ):
|
|||
return \
|
||||
"""
|
||||
select
|
||||
note_current.*, note_creation.revision as creation
|
||||
note_current.id, note_current.revision, note_current.title, note_current.contents,
|
||||
note_current.notebook_id, note_current.startup, note_current.deleted_from_id,
|
||||
note_current.rank, note_creation.revision as creation
|
||||
from
|
||||
note_current,
|
||||
( select id, min( revision ) as revision from note where notebook_id = %s group by id ) as note_creation
|
||||
|
@ -120,7 +127,7 @@ class Notebook( Persistent ):
|
|||
@type note_id: unicode
|
||||
@param note_id: id of note to load
|
||||
"""
|
||||
return "select * from note_current where notebook_id = %s and id = %s;" % ( quote( self.object_id ), quote( note_id ) )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and id = %s;" % ( quote( self.object_id ), quote( note_id ) )
|
||||
|
||||
def sql_load_note_by_title( self, title ):
|
||||
"""
|
||||
|
@ -129,19 +136,34 @@ class Notebook( Persistent ):
|
|||
@type note_id: unicode
|
||||
@param note_id: title of note to load
|
||||
"""
|
||||
return "select * from note_current where notebook_id = %s and title = %s;" % ( quote( self.object_id ), quote( title ) )
|
||||
return "select id, revision, title, contents, notebook_id, startup, deleted_from_id, rank from note_current where notebook_id = %s and title = %s;" % ( quote( self.object_id ), quote( title ) )
|
||||
|
||||
def sql_search_notes( self, search_text ):
|
||||
"""
|
||||
Return a SQL string to search for notes whose contents contain the given search_text. This
|
||||
is a case-insensitive search.
|
||||
Return a SQL string to perform a full-text search for notes whose contents contain the given
|
||||
search_text. This is a case-insensitive search.
|
||||
|
||||
@type search_text: unicode
|
||||
@param search_text: text to search for within the notes
|
||||
"""
|
||||
# strip out all search operators
|
||||
search_text = self.SEARCH_OPERATORS.sub( u"", search_text ).strip()
|
||||
|
||||
# join all words with boolean "and" operator
|
||||
search_text = u"&".join( self.WHITESPACE_PATTERN.split( search_text ) )
|
||||
|
||||
print search_text
|
||||
return \
|
||||
"select * from note_current where notebook_id = %s and contents ilike %s;" % \
|
||||
( quote( self.object_id ), quote( "%" + search_text + "%" ) )
|
||||
"""
|
||||
select id, revision, title, headline( drop_html_tags( contents ), query ), notebook_id, startup, deleted_from_id from (
|
||||
select
|
||||
id, revision, title, contents, notebook_id, startup, deleted_from_id, query, rank_cd( search, query ) as rank
|
||||
from
|
||||
note_current, to_tsquery( 'default', %s ) query
|
||||
where
|
||||
notebook_id = %s and query @@ search order by rank desc limit 20
|
||||
) as sub;
|
||||
""" % ( quote( search_text ), quote( self.object_id ) )
|
||||
|
||||
def sql_highest_rank( self ):
|
||||
"""
|
||||
|
|
|
@ -989,25 +989,6 @@ Wiki.prototype.display_search_results = function ( result ) {
|
|||
return;
|
||||
}
|
||||
|
||||
// TODO: highlight the search term within the search results, idealy showing
|
||||
// a section of the note contents including the search term
|
||||
|
||||
// if there's only one search result, automatically feel lucky^Wfortunate
|
||||
if ( result.notes.length == 1 ) {
|
||||
var note = result.notes[ 0 ]
|
||||
|
||||
// if the editor is already open, highlight it and bail
|
||||
var iframe = getElement( "note_" + note.object_id );
|
||||
if ( iframe ) {
|
||||
iframe.editor.highlight();
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise, create an editor for the one note
|
||||
this.create_editor( note.object_id, note.contents, note.deleted_from_id, note.revision, undefined, this.notebook.read_write, true, true );
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise, there are multiple search results, so create a "magic" search results note. but
|
||||
// first close any open search results notes
|
||||
if ( this.search_results_editor )
|
||||
|
@ -1018,26 +999,24 @@ Wiki.prototype.display_search_results = function ( result ) {
|
|||
var note = result.notes[ i ]
|
||||
if ( !note.title ) continue;
|
||||
|
||||
var contents_node = createDOM( "span", {} );
|
||||
contents_node.innerHTML = note.contents;
|
||||
contents = strip( scrapeText( contents_node ) );
|
||||
|
||||
// remove the title from the scraped contents text
|
||||
if ( contents.indexOf( note.title ) == 0 )
|
||||
contents = contents.substr( note.title.length );
|
||||
|
||||
if ( contents.length == 0 ) {
|
||||
if ( note.contents.length == 0 ) {
|
||||
var preview = "empty note";
|
||||
} else {
|
||||
var max_preview_length = 160;
|
||||
var preview = contents.substr( 0, max_preview_length ) + ( ( contents.length > max_preview_length ) ? "..." : "" );
|
||||
var preview = note.contents;
|
||||
|
||||
// if the preview appears not to end with a complete sentence, add "..."
|
||||
if ( !/[?!.]\s*$/.test( preview ) )
|
||||
preview = preview + " <b>...</b>";
|
||||
}
|
||||
|
||||
var preview_span = createDOM( "span" );
|
||||
preview_span.innerHTML = preview;
|
||||
|
||||
appendChildNodes( list,
|
||||
createDOM( "p", {},
|
||||
createDOM( "a", { "href": "/notebooks/" + this.notebook_id + "?note_id=" + note.object_id }, note.title ),
|
||||
createDOM( "br" ),
|
||||
createDOM( "span", {}, preview )
|
||||
preview_span
|
||||
)
|
||||
);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,6 @@ class Search_form( Form ):
|
|||
Form.__init__(
|
||||
self,
|
||||
Strong( u"search: " ),
|
||||
Input( type = u"text", name = u"search_text", id = u"search_text", size = 30, maxlength = 100 ),
|
||||
Input( type = u"text", name = u"search_text", id = u"search_text", size = 30, maxlength = 512 ),
|
||||
id = u"search_form",
|
||||
)
|
||||
|
|
Reference in New Issue