diff --git a/novel_compiler/novel_compiler.py b/novel_compiler/novel_compiler.py index f2ac649..e96dbbc 100644 --- a/novel_compiler/novel_compiler.py +++ b/novel_compiler/novel_compiler.py @@ -5,6 +5,7 @@ from novel_stats.novel_stats import count_words import tempfile import MarkdownPP import json +from lxml import etree TITLE_MARKER = '# ' AUTHOR_MARKER = '### ' @@ -18,42 +19,41 @@ class Chapter: self.heading = heading self.paragraphs = [] -def md_re_parser(md_paragraph): - # Correct xml tags - pre = '' - post = '' - it_pre = '' - bf_pre = '' - bfit_pre = '' +class ParseTarget: + TAGS = {'em':'italic', 'strong':'bold'} + def __init__(self): + self.cur = {key: False for key in self.TAGS} + self.par = RichText() + def start(self, tag, attrib): + if tag in self.TAGS: + self.cur[tag] = True + def end(self, tag): + if tag in self.TAGS: + self.cur[tag] = False + def data(self, data): + tags = {self.TAGS[tag]:self.cur[tag] for tag in self.TAGS} + self.par.add(data, **tags) + def close(self): + return self.par - # Tag replacement +def md_re_parser(md_paragraph, break_mark): + if md_paragraph == break_mark: + return None html = markdown.markdown(md_paragraph) - html = html.replace('

', pre) - html = html.replace('

', post) - html = html.replace('', post+bfit_pre) - html = html.replace('', post+pre) - html = html.replace('', post+it_pre) - html = html.replace('', post+pre) - html = html.replace('', post+bf_pre) - html = html.replace('', post+pre) - - # xml cleanup - while pre+post in html: - html = html.replace(pre+post,'') - - # convert to a rich text paragraph - par = RichText() - par.xml = html - if len(html) == 0: - print(md_paragraph) + target = ParseTarget() + parser = etree.XMLParser(target=target) + par = etree.XML(html, parser) return par + def novel_parser(source_file, context = None): if not context: context = {'author_address': 'Street\nTown, State ZIP\nCountry', 'author_email': 'name@email.com', 'author_phone': 'PhoneNumber(s)', - 'author_website': 'https://www.author.com'} + 'author_website': 'https://www.author.com', + 'md_break_mark': '-*-', + 'docx_break_mark': '#'} context['chapters'] = [] wc = 0 @@ -79,7 +79,7 @@ def novel_parser(source_file, context = None): stripped = line.strip() if stripped: wc += count_words(stripped) - chapter.paragraphs.append(md_re_parser(stripped)) + chapter.paragraphs.append(md_re_parser(stripped, context['md_break_mark'])) context['chapters'].append(chapter) source_file.close() diff --git a/novel_compiler/template.docx b/novel_compiler/template.docx index 801e5e5..14f1037 100644 Binary files a/novel_compiler/template.docx and b/novel_compiler/template.docx differ diff --git a/setup.py b/setup.py index 9ffa33e..8b8c791 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( ], packages=find_packages(), entry_points={"console_scripts": ["novel-compiler = novel_compiler.novel_compiler:main",]}, - install_requires=('docxtpl','markdown','novel_stats','MarkdownPP'), + install_requires=('docxtpl','markdown','novel_stats','MarkdownPP', 'lxml'), include_package_data=True, python_requires='>3.7.0', )