Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included.
This commit is contained in:
parent
c75b4d930d
commit
df8ffd538a
|
@ -82,9 +82,9 @@ class Html_differ( HTMLParser ):
|
|||
( a, b ) = self.prepare_lists( a, b )
|
||||
return self.diff_lists( a, b )
|
||||
|
||||
SINGLE_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*\s*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
|
||||
START_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*>" ) # e.g. '<i>' or '<a href="foo">'
|
||||
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
|
||||
SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
|
||||
START_TAG_PATTERN = re.compile( "<(\w+).*>" ) # e.g. '<i>' or '<a href="foo">'
|
||||
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
|
||||
|
||||
@staticmethod
|
||||
def track_open_tags( item, open_tags ):
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import time
|
||||
from controller.Html_differ import Html_differ
|
||||
|
||||
|
||||
|
@ -228,6 +229,18 @@ class Test_html_differ( object ):
|
|||
assert new_a == a
|
||||
assert new_b == b
|
||||
|
||||
def test_prepare_lists_with_style_and_timing( self ):
|
||||
# An older version of the code took a really long time to parse certain
|
||||
# lengthy style strings due to a backtracking regular expression, so check
|
||||
# for that regression.
|
||||
start_time = time.time()
|
||||
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
|
||||
b = [ 'foo ', 'bar ', 'baz ', '<span style="' + 'a: b' * 20 + '">' , 'quux' ]
|
||||
|
||||
result = self.differ.prepare_lists( a, b )
|
||||
|
||||
assert time.time() - start_time < 0.5
|
||||
|
||||
def test_diff_lists_with_insert( self ):
|
||||
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
|
||||
b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]
|
||||
|
|
Reference in New Issue