updated markdown parsing and added scene breaks

This commit is contained in:
Dmytro Yeroshkin 2022-01-21 14:50:12 +01:00
parent 314341ff9e
commit e4f187e292
3 changed files with 29 additions and 29 deletions

View File

@ -5,6 +5,7 @@ from novel_stats.novel_stats import count_words
import tempfile import tempfile
import MarkdownPP import MarkdownPP
import json import json
from lxml import etree
TITLE_MARKER = '# ' TITLE_MARKER = '# '
AUTHOR_MARKER = '### ' AUTHOR_MARKER = '### '
@ -18,42 +19,41 @@ class Chapter:
self.heading = heading self.heading = heading
self.paragraphs = [] self.paragraphs = []
def md_re_parser(md_paragraph): class ParseTarget:
# Correct xml tags TAGS = {'em':'italic', 'strong':'bold'}
pre = '<w:r><w:t xml:space="preserve">' def __init__(self):
post = '</w:t></w:r>' self.cur = {key: False for key in self.TAGS}
it_pre = '<w:r><w:rPr><w:i/></w:rPr><w:t xml:space="preserve">' self.par = RichText()
bf_pre = '<w:r><w:rPr><w:b/></w:rPr><w:t xml:space="preserve">' def start(self, tag, attrib):
bfit_pre = '<w:r><w:rPr><w:b/><w:i/></w:rPr><w:t xml:space="preserve">' if tag in self.TAGS:
self.cur[tag] = True
def end(self, tag):
if tag in self.TAGS:
self.cur[tag] = False
def data(self, data):
tags = {self.TAGS[tag]:self.cur[tag] for tag in self.TAGS}
self.par.add(data, **tags)
def close(self):
return self.par
# Tag replacement def md_re_parser(md_paragraph, break_mark):
if md_paragraph == break_mark:
return None
html = markdown.markdown(md_paragraph) html = markdown.markdown(md_paragraph)
html = html.replace('<p>', pre) target = ParseTarget()
html = html.replace('</p>', post) parser = etree.XMLParser(target=target)
html = html.replace('<strong><em>', post+bfit_pre) par = etree.XML(html, parser)
html = html.replace('</strong></em>', post+pre)
html = html.replace('<em>', post+it_pre)
html = html.replace('</em>', post+pre)
html = html.replace('<strong>', post+bf_pre)
html = html.replace('</strong>', post+pre)
# xml cleanup
while pre+post in html:
html = html.replace(pre+post,'')
# convert to a rich text paragraph
par = RichText()
par.xml = html
if len(html) == 0:
print(md_paragraph)
return par return par
def novel_parser(source_file, context = None): def novel_parser(source_file, context = None):
if not context: if not context:
context = {'author_address': 'Street\nTown, State ZIP\nCountry', context = {'author_address': 'Street\nTown, State ZIP\nCountry',
'author_email': 'name@email.com', 'author_email': 'name@email.com',
'author_phone': 'PhoneNumber(s)', 'author_phone': 'PhoneNumber(s)',
'author_website': 'https://www.author.com'} 'author_website': 'https://www.author.com',
'md_break_mark': '-*-',
'docx_break_mark': '#'}
context['chapters'] = [] context['chapters'] = []
wc = 0 wc = 0
@ -79,7 +79,7 @@ def novel_parser(source_file, context = None):
stripped = line.strip() stripped = line.strip()
if stripped: if stripped:
wc += count_words(stripped) wc += count_words(stripped)
chapter.paragraphs.append(md_re_parser(stripped)) chapter.paragraphs.append(md_re_parser(stripped, context['md_break_mark']))
context['chapters'].append(chapter) context['chapters'].append(chapter)
source_file.close() source_file.close()

Binary file not shown.

View File

@ -20,7 +20,7 @@ setup(
], ],
packages=find_packages(), packages=find_packages(),
entry_points={"console_scripts": ["novel-compiler = novel_compiler.novel_compiler:main",]}, entry_points={"console_scripts": ["novel-compiler = novel_compiler.novel_compiler:main",]},
install_requires=('docxtpl','markdown','novel_stats','MarkdownPP'), install_requires=('docxtpl','markdown','novel_stats','MarkdownPP', 'lxml'),
include_package_data=True, include_package_data=True,
python_requires='>3.7.0', python_requires='>3.7.0',
) )