Browse Source

HTML entities/characters like """ are now stripped from notebook names before turning them into friendly ids.

This means that blog posts now have better URLs if they the post titles contain HTML entities/characters.
Dan Helfman 9 years ago
parent
commit
fc3849b8be
4 changed files with 41 additions and 3 deletions
  1. 6
    2
      model/Notebook.py
  2. 15
    0
      model/delta/1.5.8.sql
  3. 12
    1
      model/schema.sql
  4. 8
    0
      model/test/Test_notebook.py

+ 6
- 2
model/Notebook.py View File

@@ -360,11 +360,15 @@ class Notebook( Persistent ):
360 360
     self.__name = name
361 361
     self.update_revision()
362 362
 
363
+  HTML_REFERENCE_PATTERN = re.compile( "&[a-zA-Z]+;|&#\d+;" )
363 364
   FRIENDLY_ID_STRIP_PATTERN = re.compile( "[^a-zA-Z0-9\-]+" )
364 365
 
365 366
   def __friendly_id( self ):
366
-    friendly_id = self.WHITESPACE_PATTERN.sub( u"-", self.__name.lower() )
367
-    return self.FRIENDLY_ID_STRIP_PATTERN.sub( u"", friendly_id )
367
+    # convert to lowercase, remove HTML character/entity refs, collapse whitespace to dashes, strip
368
+    # other punctuation. strip leading/trailing dashes
369
+    friendly_id = self.HTML_REFERENCE_PATTERN.sub( u" ", self.__name.lower() )
370
+    friendly_id = self.WHITESPACE_PATTERN.sub( u"-", friendly_id )
371
+    return self.FRIENDLY_ID_STRIP_PATTERN.sub( u"", friendly_id ).strip( "-" )
368 372
 
369 373
   def __set_read_write( self, read_write ):
370 374
     # The read_write member isn't actually saved to the database, so setting it doesn't need to

+ 15
- 0
model/delta/1.5.8.sql View File

@@ -0,0 +1,15 @@
1
+CREATE OR REPLACE FUNCTION friendly_id(text) RETURNS text
2
+    AS $_$select trim( both '-' from
3
+      regexp_replace(
4
+        regexp_replace(
5
+          regexp_replace(
6
+            lower( $1 ),
7
+            '&[a-zA-Z]+;|&#\\d+;', ' ', 'g'
8
+          ),
9
+          '\\s+', '-', 'g'
10
+        ),
11
+        '[^a-zA-Z0-9\\-]', '', 'g'
12
+      )
13
+    );$_$
14
+    LANGUAGE sql IMMUTABLE;
15
+reindex index notebook_friendly_id_index;

+ 12
- 1
model/schema.sql View File

@@ -26,7 +26,18 @@ create function log_note_revision() returns trigger as $_$
26 26
   $_$ language plpgsql;
27 27
 ALTER FUNCTION public.log_note_revision() OWNER TO luminotes;
28 28
 CREATE FUNCTION friendly_id(text) RETURNS text
29
-    AS $_$select regexp_replace( regexp_replace( lower( $1 ), '\\s+', '-', 'g' ), '[^a-zA-Z0-9\\-]', '', 'g' );$_$
29
+    AS $_$select trim( both '-' from
30
+      regexp_replace(
31
+        regexp_replace(
32
+          regexp_replace(
33
+            lower( $1 ),
34
+            '&[a-zA-Z]+;|&#\\d+;', ' ', 'g'
35
+          ),
36
+          '\\s+', '-', 'g'
37
+        ),
38
+        '[^a-zA-Z0-9\\-]', '', 'g'
39
+      )
40
+    );$_$
30 41
     LANGUAGE sql IMMUTABLE;
31 42
 ALTER FUNCTION public.friendly_id(text) OWNER TO luminotes;
32 43
 CREATE TABLE file (

+ 8
- 0
model/test/Test_notebook.py View File

@@ -177,6 +177,14 @@ class Test_notebook( object ):
177 177
     self.notebook.name = u"This is Bob's  notebook!"
178 178
     assert self.notebook.friendly_id == u"this-is-bobs-notebook"
179 179
 
180
+  def test_friendly_id_with_html_entity_reference( self ):
181
+    self.notebook.name = u"This is Bob's "notebook"!"
182
+    assert self.notebook.friendly_id == u"this-is-bobs-notebook"
183
+
184
+  def test_friendly_id_with_html_character_reference( self ):
185
+    self.notebook.name = u"This is Bob's ¥ notebook!"
186
+    assert self.notebook.friendly_id == u"this-is-bobs-notebook"
187
+
180 188
   def test_set_read_write( self ):
181 189
     original_revision = self.notebook.revision
182 190
     self.notebook.read_write = Notebook.READ_WRITE_FOR_OWN_NOTES

Loading…
Cancel
Save