Browse Source

Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included.

master
Dan Helfman 10 years ago
parent
commit
df8ffd538a
2 changed files with 16 additions and 3 deletions
  1. +3
    -3
      controller/Html_differ.py
  2. +13
    -0
      controller/test/Test_html_differ.py

+ 3
- 3
controller/Html_differ.py View File

@@ -82,9 +82,9 @@ class Html_differ( HTMLParser ):
( a, b ) = self.prepare_lists( a, b )
return self.diff_lists( a, b )

SINGLE_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*\s*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
START_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*>" ) # e.g. '<i>' or '<a href="foo">'
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
START_TAG_PATTERN = re.compile( "<(\w+).*>" ) # e.g. '<i>' or '<a href="foo">'
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'

@staticmethod
def track_open_tags( item, open_tags ):


+ 13
- 0
controller/test/Test_html_differ.py View File

@@ -1,3 +1,4 @@
import time
from controller.Html_differ import Html_differ


@@ -228,6 +229,18 @@ class Test_html_differ( object ):
assert new_a == a
assert new_b == b

def test_prepare_lists_with_style_and_timing( self ):
# An older version of the code took a really long time to parse certain
# lengthy style strings due to a backtracking regular expression, so check
# for that regression.
start_time = time.time()
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
b = [ 'foo ', 'bar ', 'baz ', '<span style="' + 'a: b' * 20 + '">' , 'quux' ]

result = self.differ.prepare_lists( a, b )

assert time.time() - start_time < 0.5

def test_diff_lists_with_insert( self ):
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]


Loading…
Cancel
Save