diff --git a/model/Notebook.py b/model/Notebook.py index 58b5a18..5b3839b 100644 --- a/model/Notebook.py +++ b/model/Notebook.py @@ -360,11 +360,15 @@ class Notebook( Persistent ): self.__name = name self.update_revision() + HTML_REFERENCE_PATTERN = re.compile( "&[a-zA-Z]+;|&#\d+;" ) FRIENDLY_ID_STRIP_PATTERN = re.compile( "[^a-zA-Z0-9\-]+" ) def __friendly_id( self ): - friendly_id = self.WHITESPACE_PATTERN.sub( u"-", self.__name.lower() ) - return self.FRIENDLY_ID_STRIP_PATTERN.sub( u"", friendly_id ) + # convert to lowercase, remove HTML character/entity refs, collapse whitespace to dashes, strip + # other punctuation. strip leading/trailing dashes + friendly_id = self.HTML_REFERENCE_PATTERN.sub( u" ", self.__name.lower() ) + friendly_id = self.WHITESPACE_PATTERN.sub( u"-", friendly_id ) + return self.FRIENDLY_ID_STRIP_PATTERN.sub( u"", friendly_id ).strip( "-" ) def __set_read_write( self, read_write ): # The read_write member isn't actually saved to the database, so setting it doesn't need to diff --git a/model/delta/1.5.8.sql b/model/delta/1.5.8.sql new file mode 100644 index 0000000..a0de041 --- /dev/null +++ b/model/delta/1.5.8.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE FUNCTION friendly_id(text) RETURNS text + AS $_$select trim( both '-' from + regexp_replace( + regexp_replace( + regexp_replace( + lower( $1 ), + '&[a-zA-Z]+;|&#\\d+;', ' ', 'g' + ), + '\\s+', '-', 'g' + ), + '[^a-zA-Z0-9\\-]', '', 'g' + ) + );$_$ + LANGUAGE sql IMMUTABLE; +reindex index notebook_friendly_id_index; diff --git a/model/schema.sql b/model/schema.sql index 2e7543a..8072ab9 100644 --- a/model/schema.sql +++ b/model/schema.sql @@ -26,7 +26,18 @@ create function log_note_revision() returns trigger as $_$ $_$ language plpgsql; ALTER FUNCTION public.log_note_revision() OWNER TO luminotes; CREATE FUNCTION friendly_id(text) RETURNS text - AS $_$select regexp_replace( regexp_replace( lower( $1 ), '\\s+', '-', 'g' ), '[^a-zA-Z0-9\\-]', '', 'g' );$_$ + AS $_$select trim( both '-' from + regexp_replace( + regexp_replace( + regexp_replace( + lower( $1 ), + '&[a-zA-Z]+;|&#\\d+;', ' ', 'g' + ), + '\\s+', '-', 'g' + ), + '[^a-zA-Z0-9\\-]', '', 'g' + ) + );$_$ LANGUAGE sql IMMUTABLE; ALTER FUNCTION public.friendly_id(text) OWNER TO luminotes; CREATE TABLE file ( diff --git a/model/test/Test_notebook.py b/model/test/Test_notebook.py index 994160a..1cbb076 100644 --- a/model/test/Test_notebook.py +++ b/model/test/Test_notebook.py @@ -177,6 +177,14 @@ class Test_notebook( object ): self.notebook.name = u"This is Bob's notebook!" assert self.notebook.friendly_id == u"this-is-bobs-notebook" + def test_friendly_id_with_html_entity_reference( self ): + self.notebook.name = u"This is Bob's "notebook"!" + assert self.notebook.friendly_id == u"this-is-bobs-notebook" + + def test_friendly_id_with_html_character_reference( self ): + self.notebook.name = u"This is Bob's ¥ notebook!" + assert self.notebook.friendly_id == u"this-is-bobs-notebook" + def test_set_read_write( self ): original_revision = self.notebook.revision self.notebook.read_write = Notebook.READ_WRITE_FOR_OWN_NOTES