From df8ffd538aca3e3f6d897b46f5b4b9765da5c1c1 Mon Sep 17 00:00:00 2001 From: Dan Helfman Date: Tue, 20 Oct 2009 20:49:06 -0700 Subject: [PATCH] Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included. --- controller/Html_differ.py | 6 +++--- controller/test/Test_html_differ.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/controller/Html_differ.py b/controller/Html_differ.py index b735598..330c951 100644 --- a/controller/Html_differ.py +++ b/controller/Html_differ.py @@ -82,9 +82,9 @@ class Html_differ( HTMLParser ): ( a, b ) = self.prepare_lists( a, b ) return self.diff_lists( a, b ) - SINGLE_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*\s*/>" ) # e.g. '
' or '
' or '' - START_TAG_PATTERN = re.compile( "<(\w+)(\s+[^>]*)*>" ) # e.g. '' or '' - END_TAG_PATTERN = re.compile( "" ) # e.g. '' or '' + SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '
' or '
' or '' + START_TAG_PATTERN = re.compile( "<(\w+).*>" ) # e.g. '' or '' + END_TAG_PATTERN = re.compile( "" ) # e.g. '' or '' @staticmethod def track_open_tags( item, open_tags ): diff --git a/controller/test/Test_html_differ.py b/controller/test/Test_html_differ.py index cb7ca01..34b9529 100644 --- a/controller/test/Test_html_differ.py +++ b/controller/test/Test_html_differ.py @@ -1,3 +1,4 @@ +import time from controller.Html_differ import Html_differ @@ -228,6 +229,18 @@ class Test_html_differ( object ): assert new_a == a assert new_b == b + def test_prepare_lists_with_style_and_timing( self ): + # An older version of the code took a really long time to parse certain + # lengthy style strings due to a backtracking regular expression, so check + # for that regression. + start_time = time.time() + a = [ 'foo ', 'bar ', 'baz ', 'quux' ] + b = [ 'foo ', 'bar ', 'baz ', '' , 'quux' ] + + result = self.differ.prepare_lists( a, b ) + + assert time.time() - start_time < 0.5 + def test_diff_lists_with_insert( self ): a = [ 'foo ', 'bar ', 'baz ', 'quux' ] b = [ 'foo ', 'bar ', 'whee ', 'baz ', 'quux' ]