updated markdown parsing and added scene breaks

2022-01-21 14:50:12 +01:00 · 2022-01-21 14:50:12 +01:00 · e4f187e292
parent 314341ff9e
commit e4f187e292
3 changed files with 29 additions and 29 deletions
--- a/novel_compiler/novel_compiler.py
+++ b/novel_compiler/novel_compiler.py
@ -5,6 +5,7 @@ from novel_stats.novel_stats import count_words
 import tempfile
 import MarkdownPP
 import json
 from lxml import etree
 TITLE_MARKER = '# '
 AUTHOR_MARKER = '### '
@ -18,42 +19,41 @@ class Chapter:
        self.heading = heading
        self.paragraphs = []
-def md_re_parser(md_paragraph):
+class ParseTarget:
-    # Correct xml tags
+    TAGS = {'em':'italic', 'strong':'bold'}
-    pre = '<w:r><w:t xml:space="preserve">'
+    def __init__(self):
-    post = '</w:t></w:r>'
+        self.cur = {key: False for key in self.TAGS}
-    it_pre = '<w:r><w:rPr><w:i/></w:rPr><w:t xml:space="preserve">'
+        self.par = RichText()
-    bf_pre = '<w:r><w:rPr><w:b/></w:rPr><w:t xml:space="preserve">'
+    def start(self, tag, attrib):
-    bfit_pre = '<w:r><w:rPr><w:b/><w:i/></w:rPr><w:t xml:space="preserve">'
+        if tag in self.TAGS:
            self.cur[tag] = True
    def end(self, tag):
        if tag in self.TAGS:
            self.cur[tag] = False
    def data(self, data):
        tags = {self.TAGS[tag]:self.cur[tag] for tag in self.TAGS}
        self.par.add(data, **tags)
    def close(self):
        return self.par
-    # Tag replacement
+def md_re_parser(md_paragraph, break_mark):
    if md_paragraph == break_mark:
        return None
    html = markdown.markdown(md_paragraph)
-    html = html.replace('<p>', pre)
+    target = ParseTarget()
-    html = html.replace('</p>', post)
+    parser = etree.XMLParser(target=target)
-    html = html.replace('<strong><em>', post+bfit_pre)
+    par = etree.XML(html, parser)
    html = html.replace('</strong></em>', post+pre)
    html = html.replace('<em>', post+it_pre)
    html = html.replace('</em>', post+pre)
    html = html.replace('<strong>', post+bf_pre)
    html = html.replace('</strong>', post+pre)
    # xml cleanup
    while pre+post in html:
        html = html.replace(pre+post,'')
    # convert to a rich text paragraph
    par = RichText()
    par.xml = html
    if len(html) == 0:
        print(md_paragraph)
    return par
 def novel_parser(source_file, context = None):
    if not context:
        context = {'author_address': 'Street\nTown, State ZIP\nCountry',
                    'author_email': 'name@email.com',
                    'author_phone': 'PhoneNumber(s)',
-                    'author_website': 'https://www.author.com'}
+                    'author_website': 'https://www.author.com',
                    'md_break_mark': '-*-',
                    'docx_break_mark': '#'}
    context['chapters'] = []
    wc = 0
@ -79,7 +79,7 @@ def novel_parser(source_file, context = None):
            stripped = line.strip()
            if stripped:
                wc += count_words(stripped)
-                chapter.paragraphs.append(md_re_parser(stripped))
+                chapter.paragraphs.append(md_re_parser(stripped, context['md_break_mark']))
    context['chapters'].append(chapter)
    source_file.close()
--- a/novel_compiler/template.docx
+++ b/novel_compiler/template.docx
--- a/setup.py
+++ b/setup.py
@ -20,7 +20,7 @@ setup(
    ],
    packages=find_packages(),
    entry_points={"console_scripts": ["novel-compiler = novel_compiler.novel_compiler:main",]},
-    install_requires=('docxtpl','markdown','novel_stats','MarkdownPP'),
+    install_requires=('docxtpl','markdown','novel_stats','MarkdownPP', 'lxml'),
    include_package_data=True,
    python_requires='>3.7.0',
 )