luminotes/controller/Html_differ.py

import re
from difflib import SequenceMatcher
from htmllib import HTMLParser
from formatter import AbstractFormatter, NullWriter
from xml.sax.saxutils import quoteattr


class Html_differ( HTMLParser ):
  """
  Generates an HTML diff for two HTML strings. It assumed that the input HTML is already cleaned.
  """
  def __init__( self ):
    HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )
    self.result = []
    self.requires_no_close = [ 'img', 'br' ]

  WORD_AND_WHITESPACE_PATTERN = re.compile( "\S*\s*" )

  def handle_data( self, data ):
    # this turns "foo bar baz" into [ "foo ", "bar ", "baz" ] and extends the result with it
    self.result.extend( self.WORD_AND_WHITESPACE_PATTERN.findall( data ) )

  def handle_charref( self, ref ):
    self.result.append( '&#%s;' % ref )

  def handle_entityref( self, ref ):
    self.result.append( '&%s;' % ref )

  def handle_comment( self, comment ):
    pass # ignore comments

  def handle_starttag( self, tag, method, attrs ):
    self.result.append( self.get_starttag_text() )
      
  def handle_endtag( self, tag, attrs ):
    if tag not in self.requires_no_close:
      self.result.append( "</%s>" % tag )
      
  def unknown_starttag( self, tag, attr ):
    self.handle_starttag( tag, None, attr )

  def unknown_endtag( self, tag ):
    self.handle_endtag( tag, None )

  # used to replace, for instance, "<br/>" with "<br />"
  INVALID_TAG_PATTERN = re.compile( "(\S)/>" )
  INVALID_TAG_FIX = "\\1 />"

  def convert_html_to_list( self, html ):
    """
    Given an HTML string, produce a list of its constituent elements (tags and text).

    @type html: unicode
    @param html: HTML string to parse
    @rtype: [ unicode, ... ]
    @return: parsed list of HTML elements
    """
    self.reset()
    self.result = []
    html = self.INVALID_TAG_PATTERN.sub( self.INVALID_TAG_FIX, html )
    self.feed( html )
    return [ x for x in self.result if x != "" ]

  def diff( self, html_a, html_b ):
    """
    Return a composite HTML diff of the given HTML input strings. The returned string contains the
    entirety of the input strings, but with deleted/modified text from html_a wrapped in <del> tags,
    and inserted/modified text from html_b wrapped in <ins> tags.

    @type html_a: unicode
    @param html_a: original HTML string
    @type html_b: unicode
    @param html-b: modified HTML string
    @rtype: unicode
    @return: composite HTML diff
    """
    # parse the two html strings into lists
    a = self.convert_html_to_list( html_a )
    b = self.convert_html_to_list( html_b )

    # prepare the two lists for diffing, and then diff 'em
    ( a, b ) = self.prepare_lists( a, b )
    return self.diff_lists( a, b )

  SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'
  START_TAG_PATTERN = re.compile( "<(\w+).*>" )   # e.g. '<i>' or '<a href="foo">'
  END_TAG_PATTERN = re.compile( "</(\w+)>" )      # e.g. '</i>' or '</a>'

  @staticmethod
  def track_open_tags( item, open_tags ):
    """
    Add or remove from the open_tags list based on whether the given item contains a start or end
    tag. If item does not contain any tag, then open_tags remains unchanged.

    @type item: unicode
    @param item: chunk of HTML, containing either an HTML tag or just text
    @type open_tags: [ unicode, ... ]
    @param open_tags: list of open tags
    """
    match = Html_differ.SINGLE_TAG_PATTERN.search( item )
    if match: return

    match = Html_differ.START_TAG_PATTERN.search( item )
    if match:
      open_tags.append( match.group( 1 ) )
      return

    match = Html_differ.END_TAG_PATTERN.search( item )
    if not match: return

    tag = match.group( 1 )
    if match and tag in open_tags:
      open_tags.remove( tag )

  def prepare_lists( self, a, b ):
    """
    Prepare the two lists for diffing by merging together adjacent elements that occur within
    modified start and end HTML tags.

    For instance, if:
      a = [ 'foo ', 'bar ', 'baz ', 'quux' ]
      b = [ 'foo ', '<i>', 'bar ', 'baz', '</i> ', 'quux' ]
    then the returned lists are as follows:
      a = [ 'foo ', 'bar baz ', 'quux' ]
      b = [ 'foo ', '<i>bar baz</i> ', 'quux' ]

    Merging these elements together ensures that they're diffed as a single unit. Failing to perform
    this step would mean that when a phrase in list a becomes italicized in list b, then it wouldn't
    show up as modified in the resulting diff.

    @type a: [ unicode, ... ]
    @type b: [ unicode, ... ]
    @rtype: ( [ unicode, ... ], [ unicode, ... ] )
    @return: tuple of resulting list a and list b
    """
    matcher = SequenceMatcher( None, a, b )
    result_a = []
    result_b = []
    open_tags = []      # modified start tags
    open_del_items = [] # deleted items within modified start and end tags
    open_ins_items = [] # inserted items within modified start and end tags

    for ( change_type, i1, i2, j1, j2 ) in matcher.get_opcodes():
      if change_type == "equal":
        equal_items = b[ j1:j2 ]
        if len( open_tags ) == 0:
          result_a.extend( equal_items )
          result_b.extend( equal_items )
        else:
          open_del_items.extend( equal_items )
          open_ins_items.extend( equal_items )
        continue

      # go through the altered items looking for start and end tags
      orig_len_open_tags = len( open_tags )
      for i in range( i1, i2 ):
        Html_differ.track_open_tags( a[ i ], open_tags )
      for j in range( j1, j2 ):
        Html_differ.track_open_tags( b[ j ], open_tags )

      all_tags_got_closed = ( orig_len_open_tags > 0 and len( open_tags ) == 0 )

      if change_type == "replace":
        open_del_items.extend( a[ i1:i2 ] )
        open_ins_items.extend( b[ j1:j2 ] )
      elif change_type == "delete":
        open_del_items.extend( a[ i1:i2 ] )
      elif change_type == "insert":
        open_ins_items.extend( b[ j1:j2 ] )

      if all_tags_got_closed:
        # if all tags were just closed, then merge the items that were in those tags
        if len( open_del_items ) > 0:
          result_a.append( ''.join( open_del_items ) )
        if len( open_ins_items ) > 0:
          result_b.append( ''.join( open_ins_items ) )
        open_del_items = []
        open_ins_items = []
      elif len( open_tags ) == 0:
        result_a.extend( open_del_items )
        result_b.extend( open_ins_items )
        open_del_items = []
        open_ins_items = []

    if len( open_del_items ):
      result_a.extend( open_del_items )
    if len( open_ins_items ):
      result_b.extend( open_ins_items )

    return ( result_a, result_b )

  def diff_lists( self, a, b ):
    """
    Diff two prepared lists and return the result as an HTML string.

    @type a: [ unicode, ... ]
    @type b: [ unicode, ... ]
    @rtype: unicode
    @return: composite HTML diff
    """
    matcher = SequenceMatcher( None, a, b )
    result = []
    open_tags = []

    # inspired by http://www.aaronsw.com/2002/diff/
    for ( change_type, i1, i2, j1, j2 ) in matcher.get_opcodes():
      if change_type == "replace":
        result.append(
          '<del class="diff modified">' + ''.join( a[ i1:i2 ] ) + '</del>' + \
          '<ins class="diff modified">' + ''.join( b[ j1:j2 ] ) + '</ins>'
        )
      elif change_type == "delete":
        result.append( '<del class="diff">' + ''.join( a[ i1:i2 ] ) + '</del>' )
      elif change_type == "insert":
        result.append( '<ins class="diff">' + ''.join( b[ j1:j2 ] ) + '</ins>' )
      elif change_type == "equal":
        result.append( ''.join( b[ j1:j2 ] ) )

    return "".join( result )
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`import re`
			`from difflib import SequenceMatcher`
			`from htmllib import HTMLParser`
			`from formatter import AbstractFormatter, NullWriter`
			`from xml.sax.saxutils import quoteattr`


			`class Html_differ( HTMLParser ):`
			`"""`
			`Generates an HTML diff for two HTML strings. It assumed that the input HTML is already cleaned.`
			`"""`
			`def __init__( self ):`
			`HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )`
			`self.result = []`
			`self.requires_no_close = [ 'img', 'br' ]`

			`WORD_AND_WHITESPACE_PATTERN = re.compile( "\S\s" )`

			`def handle_data( self, data ):`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`# this turns "foo bar baz" into [ "foo ", "bar ", "baz" ] and extends the result with it`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`self.result.extend( self.WORD_AND_WHITESPACE_PATTERN.findall( data ) )`

			`def handle_charref( self, ref ):`
			`self.result.append( '&#%s;' % ref )`

			`def handle_entityref( self, ref ):`
			`self.result.append( '&%s;' % ref )`

			`def handle_comment( self, comment ):`
			`pass # ignore comments`

			`def handle_starttag( self, tag, method, attrs ):`
			`self.result.append( self.get_starttag_text() )`

			`def handle_endtag( self, tag, attrs ):`
			`if tag not in self.requires_no_close:`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`self.result.append( "</%s>" % tag )`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00
			`def unknown_starttag( self, tag, attr ):`
			`self.handle_starttag( tag, None, attr )`

			`def unknown_endtag( self, tag ):`
			`self.handle_endtag( tag, None )`

			`# used to replace, for instance, "<br/>" with "<br />"`
			`INVALID_TAG_PATTERN = re.compile( "(\S)/>" )`
			`INVALID_TAG_FIX = "\\1 />"`

Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`def convert_html_to_list( self, html ):`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`"""`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`Given an HTML string, produce a list of its constituent elements (tags and text).`

			`@type html: unicode`
			`@param html: HTML string to parse`
			`@rtype: [ unicode, ... ]`
			`@return: parsed list of HTML elements`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`"""`
			`self.reset()`
			`self.result = []`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`html = self.INVALID_TAG_PATTERN.sub( self.INVALID_TAG_FIX, html )`
			`self.feed( html )`
			`return [ x for x in self.result if x != "" ]`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`def diff( self, html_a, html_b ):`
			`"""`
			`Return a composite HTML diff of the given HTML input strings. The returned string contains the`
			`entirety of the input strings, but with deleted/modified text from html_a wrapped in <del> tags,`
			`and inserted/modified text from html_b wrapped in <ins> tags.`

			`@type html_a: unicode`
			`@param html_a: original HTML string`
			`@type html_b: unicode`
			`@param html-b: modified HTML string`
			`@rtype: unicode`
			`@return: composite HTML diff`
			`"""`
			`# parse the two html strings into lists`
			`a = self.convert_html_to_list( html_a )`
			`b = self.convert_html_to_list( html_b )`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`# prepare the two lists for diffing, and then diff 'em`
			`( a, b ) = self.prepare_lists( a, b )`
			`return self.diff_lists( a, b )`

Fix for problem with HTML differ that would actually hang the entire server when fed certain lengthy HTML strings. Unit tests included. 2009-10-21 03:49:06 +00:00			`SINGLE_TAG_PATTERN = re.compile( "<(\w+).*/>" ) # e.g. '<br/>' or '<br />' or '<img src="foo" />'`
			`START_TAG_PATTERN = re.compile( "<(\w+).*>" ) # e.g. '<i>' or '<a href="foo">'`
			`END_TAG_PATTERN = re.compile( "</(\w+)>" ) # e.g. '</i>' or '</a>'`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`@staticmethod`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`def track_open_tags( item, open_tags ):`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`"""`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`Add or remove from the open_tags list based on whether the given item contains a start or end`
			`tag. If item does not contain any tag, then open_tags remains unchanged.`

			`@type item: unicode`
			`@param item: chunk of HTML, containing either an HTML tag or just text`
			`@type open_tags: [ unicode, ... ]`
			`@param open_tags: list of open tags`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`"""`
More diff fixes. This time for "<br/>" tags. 2008-05-03 06:10:09 +00:00			`match = Html_differ.SINGLE_TAG_PATTERN.search( item )`
			`if match: return`

Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`match = Html_differ.START_TAG_PATTERN.search( item )`
			`if match:`
			`open_tags.append( match.group( 1 ) )`
			`return`

			`match = Html_differ.END_TAG_PATTERN.search( item )`
			`if not match: return`

			`tag = match.group( 1 )`
			`if match and tag in open_tags:`
			`open_tags.remove( tag )`

Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`def prepare_lists( self, a, b ):`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`"""`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`Prepare the two lists for diffing by merging together adjacent elements that occur within`
			`modified start and end HTML tags.`

			`For instance, if:`
			`a = [ 'foo ', 'bar ', 'baz ', 'quux' ]`
			`b = [ 'foo ', '<i>', 'bar ', 'baz', '</i> ', 'quux' ]`
			`then the returned lists are as follows:`
			`a = [ 'foo ', 'bar baz ', 'quux' ]`
			`b = [ 'foo ', '<i>bar baz</i> ', 'quux' ]`

			`Merging these elements together ensures that they're diffed as a single unit. Failing to perform`
			`this step would mean that when a phrase in list a becomes italicized in list b, then it wouldn't`
			`show up as modified in the resulting diff.`

			`@type a: [ unicode, ... ]`
			`@type b: [ unicode, ... ]`
			`@rtype: ( [ unicode, ... ], [ unicode, ... ] )`
			`@return: tuple of resulting list a and list b`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`"""`
			`matcher = SequenceMatcher( None, a, b )`
			`result_a = []`
			`result_b = []`
			`open_tags = [] # modified start tags`
			`open_del_items = [] # deleted items within modified start and end tags`
			`open_ins_items = [] # inserted items within modified start and end tags`

			`for ( change_type, i1, i2, j1, j2 ) in matcher.get_opcodes():`
			`if change_type == "equal":`
			`equal_items = b[ j1:j2 ]`
			`if len( open_tags ) == 0:`
			`result_a.extend( equal_items )`
			`result_b.extend( equal_items )`
			`else:`
			`open_del_items.extend( equal_items )`
			`open_ins_items.extend( equal_items )`
			`continue`

			`# go through the altered items looking for start and end tags`
More diff fixes. This time for "<br/>" tags. 2008-05-03 06:10:09 +00:00			`orig_len_open_tags = len( open_tags )`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`for i in range( i1, i2 ):`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`Html_differ.track_open_tags( a[ i ], open_tags )`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`for j in range( j1, j2 ):`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`Html_differ.track_open_tags( b[ j ], open_tags )`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00
More diff fixes. This time for "<br/>" tags. 2008-05-03 06:10:09 +00:00			`all_tags_got_closed = ( orig_len_open_tags > 0 and len( open_tags ) == 0 )`

Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`if change_type == "replace":`
			`open_del_items.extend( a[ i1:i2 ] )`
			`open_ins_items.extend( b[ j1:j2 ] )`
			`elif change_type == "delete":`
			`open_del_items.extend( a[ i1:i2 ] )`
			`elif change_type == "insert":`
			`open_ins_items.extend( b[ j1:j2 ] )`

More diff fixes. This time for "<br/>" tags. 2008-05-03 06:10:09 +00:00			`if all_tags_got_closed:`
			`# if all tags were just closed, then merge the items that were in those tags`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`if len( open_del_items ) > 0:`
			`result_a.append( ''.join( open_del_items ) )`
			`if len( open_ins_items ) > 0:`
			`result_b.append( ''.join( open_ins_items ) )`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`open_del_items = []`
			`open_ins_items = []`
More diff fixes. This time for "<br/>" tags. 2008-05-03 06:10:09 +00:00			`elif len( open_tags ) == 0:`
			`result_a.extend( open_del_items )`
			`result_b.extend( open_ins_items )`
			`open_del_items = []`
			`open_ins_items = []`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00
Fixed yet another diff-breaking edge case. This one had to do with inserting italicized text right before some existing italicized text. 2008-05-03 07:40:46 +00:00			`if len( open_del_items ):`
			`result_a.extend( open_del_items )`
			`if len( open_ins_items ):`
			`result_b.extend( open_ins_items )`

Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`return ( result_a, result_b )`

Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00			`def diff_lists( self, a, b ):`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`"""`
			`Diff two prepared lists and return the result as an HTML string.`
Completed Html_differ unit tests. 2008-05-02 23:59:03 +00:00
			`@type a: [ unicode, ... ]`
			`@type b: [ unicode, ... ]`
			`@rtype: unicode`
			`@return: composite HTML diff`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`"""`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`matcher = SequenceMatcher( None, a, b )`
			`result = []`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`open_tags = []`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00
			`# inspired by http://www.aaronsw.com/2002/diff/`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`for ( change_type, i1, i2, j1, j2 ) in matcher.get_opcodes():`
			`if change_type == "replace":`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`result.append(`
			`'<del class="diff modified">' + ''.join( a[ i1:i2 ] ) + '</del>' + \`
			`'<ins class="diff modified">' + ''.join( b[ j1:j2 ] ) + '</ins>'`
			`)`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`elif change_type == "delete":`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`result.append( '<del class="diff">' + ''.join( a[ i1:i2 ] ) + '</del>' )`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`elif change_type == "insert":`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`result.append( '<ins class="diff">' + ''.join( b[ j1:j2 ] ) + '</ins>' )`
Updated to handle the following case: * If a phrase becomes italicized, it should show the non-italic version in red and immediately after, the italic version in green. This required doing a pass on the HTML to merge certain elements together before doing the actual diff. 2008-05-02 21:03:46 +00:00			`elif change_type == "equal":`
Implemented new HTML diffing class to produce an HTMLized diff of two HTML strings. Not perfect yet. For instance, if string a contains "foo bar baz" and string b contains "foo <i>bar</i> baz", the resulting output does not show "bar" in strikeout and "<i>bar</i>" in green, as one might expect. 2008-05-02 18:58:36 +00:00			`result.append( ''.join( b[ j1:j2 ] ) )`

			`return "".join( result )`