witten
/
luminotes
Archived
1
0
Fork 0
This repository has been archived on 2023-12-16. You can view files and clone it, but cannot push or open issues or pull requests.
luminotes/controller/Html_nuker.py

53 lines
1.1 KiB
Python

from htmllib import HTMLParser
from formatter import AbstractFormatter, NullWriter
class Html_nuker( HTMLParser ):
"""
Nukes HTML of all tags.
"""
def __init__( self, allow_refs = False ):
HTMLParser.__init__( self, AbstractFormatter( NullWriter() ) )
self.result = []
self.allow_refs = allow_refs
def handle_data( self, data ):
if data and "<" not in data and ">" not in data:
self.result.append( data )
def handle_charref( self, ref ):
if self.allow_refs:
self.result.append( ref )
def handle_entityref( self, ref ):
if self.allow_refs:
self.result.append( ref )
def handle_comment( self, comment ):
pass
def handle_starttag( self, tag, method, attrs ):
pass
def handle_endtag( self, tag, attrs ):
pass
def unknown_starttag( self, tag, attributes ):
pass
def unknown_endtag( self, tag ):
pass
def nuke( self, rawstring ):
"""
Nukes the given string of all HTML tags.
"""
if rawstring is None:
return u""
self.reset()
self.result = []
self.feed( rawstring )
return u"".join( self.result )