Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included.
This commit is contained in:
parent
c75b4d930d
commit
df8ffd538a
|
@ -82,9 +82,9 @@ class Html_differ( HTMLParser ):
|
||||||
( a, b ) = self.prepare_lists( a, b )
|
( a, b ) = self.prepare_lists( a, b )
|
||||||
return self.diff_lists( a, b )
|
return self.diff_lists( a, b )
|
||||||
|
|
||||||
SINGLE_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*\s*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
|
SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
|
||||||
START_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*>" ) # e.g. '<i>' or '<a href="foo">'
|
START_TAG_PATTERN = re.compile( "<(\w+).*>" ) # e.g. '<i>' or '<a href="foo">'
|
||||||
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
|
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def track_open_tags( item, open_tags ):
|
def track_open_tags( item, open_tags ):
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import time
|
||||||
from controller.Html_differ import Html_differ
|
from controller.Html_differ import Html_differ
|
||||||
|
|
||||||
|
|
||||||
|
@ -228,6 +229,18 @@ class Test_html_differ( object ):
|
||||||
assert new_a == a
|
assert new_a == a
|
||||||
assert new_b == b
|
assert new_b == b
|
||||||
|
|
||||||
|
def test_prepare_lists_with_style_and_timing( self ):
|
||||||
|
# An older version of the code took a really long time to parse certain
|
||||||
|
# lengthy style strings due to a backtracking regular expression, so check
|
||||||
|
# for that regression.
|
||||||
|
start_time = time.time()
|
||||||
|
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
|
||||||
|
b = [ 'foo ', 'bar ', 'baz ', '<span style="' + 'a: b' * 20 + '">' , 'quux' ]
|
||||||
|
|
||||||
|
result = self.differ.prepare_lists( a, b )
|
||||||
|
|
||||||
|
assert time.time() - start_time < 0.5
|
||||||
|
|
||||||
def test_diff_lists_with_insert( self ):
|
def test_diff_lists_with_insert( self ):
|
||||||
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
|
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
|
||||||
b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]
|
b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]
|
||||||
|
|
Reference in New Issue