witten
/
luminotes
Archived
1
0
Fork 0

Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included.

This commit is contained in:
Dan Helfman 2009-10-20 20:49:06 -07:00
parent c75b4d930d
commit df8ffd538a
2 changed files with 16 additions and 3 deletions

View File

@ -82,9 +82,9 @@ class Html_differ( HTMLParser ):
( a, b ) = self.prepare_lists( a, b )
return self.diff_lists( a, b )
SINGLE_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*\s*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
START_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*>" ) # e.g. '<i>' or '<a href="foo">'
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
START_TAG_PATTERN = re.compile( "<(\w+).*>" ) # e.g. '<i>' or '<a href="foo">'
END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'
@staticmethod
def track_open_tags( item, open_tags ):

View File

@ -1,3 +1,4 @@
import time
from controller.Html_differ import Html_differ
@ -228,6 +229,18 @@ class Test_html_differ( object ):
assert new_a == a
assert new_b == b
def test_prepare_lists_with_style_and_timing( self ):
# An older version of the code took a really long time to parse certain
# lengthy style strings due to a backtracking regular expression, so check
# for that regression.
start_time = time.time()
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
b = [ 'foo ', 'bar ', 'baz ', '<span style="' + 'a: b' * 20 + '">' , 'quux' ]
result = self.differ.prepare_lists( a, b )
assert time.time() - start_time < 0.5
def test_diff_lists_with_insert( self ):
a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]