Browse Source

Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included.

Dan Helfman 9 years ago
parent
commit
df8ffd538a
2 changed files with 16 additions and 3 deletions
  1. 3
    3
      controller/Html_differ.py
  2. 13
    0
      controller/test/Test_html_differ.py

+ 3
- 3
controller/Html_differ.py View File

@@ -82,9 +82,9 @@ class Html_differ( HTMLParser ):
82 82
     ( a, b ) = self.prepare_lists( a, b )
83 83
     return self.diff_lists( a, b )
84 84
 
85
-  SINGLE_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*\s*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
86
-  START_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*>" )      # e.g. '<i>' or '<a href="foo">'
87
-  END_TAG_PATTERN = re.compile( "</(\w+)>" )                  # e.g. '</i>' or '</a>'
85
+  SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
86
+  START_TAG_PATTERN = re.compile( "<(\w+).*>" )   # e.g. '<i>' or '<a href="foo">'
87
+  END_TAG_PATTERN = re.compile( "</(\w+)>" )      # e.g. '</i>' or '</a>'
88 88
 
89 89
   @staticmethod
90 90
   def track_open_tags( item, open_tags ):

+ 13
- 0
controller/test/Test_html_differ.py View File

@@ -1,3 +1,4 @@
1
+import time
1 2
 from controller.Html_differ import Html_differ
2 3
 
3 4
 
@@ -228,6 +229,18 @@ class Test_html_differ( object ):
228 229
     assert new_a == a
229 230
     assert new_b == b
230 231
 
232
+  def test_prepare_lists_with_style_and_timing( self ):
233
+    # An older version of the code took a really long time to parse certain
234
+    # lengthy style strings due to a backtracking regular expression, so check
235
+    # for that regression.
236
+    start_time = time.time()
237
+    a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
238
+    b = [ 'foo ', 'bar ', 'baz ', '<span style="' + 'a: b' * 20 + '">' , 'quux' ]
239
+
240
+    result = self.differ.prepare_lists( a, b )
241
+
242
+    assert time.time() - start_time < 0.5
243
+
231 244
   def test_diff_lists_with_insert( self ):
232 245
     a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
233 246
     b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]

Loading…
Cancel
Save