From e4f187e292fdda88980322b30c40121127db3c42 Mon Sep 17 00:00:00 2001 From: Dmytro Yeroshkin Date: Fri, 21 Jan 2022 14:50:12 +0100 Subject: [PATCH] updated markdown parsing and added scene breaks --- novel_compiler/novel_compiler.py | 56 +++++++++++++++---------------- novel_compiler/template.docx | Bin 21940 -> 22039 bytes setup.py | 2 +- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/novel_compiler/novel_compiler.py b/novel_compiler/novel_compiler.py index f2ac649..e96dbbc 100644 --- a/novel_compiler/novel_compiler.py +++ b/novel_compiler/novel_compiler.py @@ -5,6 +5,7 @@ from novel_stats.novel_stats import count_words import tempfile import MarkdownPP import json +from lxml import etree TITLE_MARKER = '# ' AUTHOR_MARKER = '### ' @@ -18,42 +19,41 @@ class Chapter: self.heading = heading self.paragraphs = [] -def md_re_parser(md_paragraph): - # Correct xml tags - pre = '' - post = '' - it_pre = '' - bf_pre = '' - bfit_pre = '' +class ParseTarget: + TAGS = {'em':'italic', 'strong':'bold'} + def __init__(self): + self.cur = {key: False for key in self.TAGS} + self.par = RichText() + def start(self, tag, attrib): + if tag in self.TAGS: + self.cur[tag] = True + def end(self, tag): + if tag in self.TAGS: + self.cur[tag] = False + def data(self, data): + tags = {self.TAGS[tag]:self.cur[tag] for tag in self.TAGS} + self.par.add(data, **tags) + def close(self): + return self.par - # Tag replacement +def md_re_parser(md_paragraph, break_mark): + if md_paragraph == break_mark: + return None html = markdown.markdown(md_paragraph) - html = html.replace('

', pre) - html = html.replace('

', post) - html = html.replace('', post+bfit_pre) - html = html.replace('', post+pre) - html = html.replace('', post+it_pre) - html = html.replace('', post+pre) - html = html.replace('', post+bf_pre) - html = html.replace('', post+pre) - - # xml cleanup - while pre+post in html: - html = html.replace(pre+post,'') - - # convert to a rich text paragraph - par = RichText() - par.xml = html - if len(html) == 0: - print(md_paragraph) + target = ParseTarget() + parser = etree.XMLParser(target=target) + par = etree.XML(html, parser) return par + def novel_parser(source_file, context = None): if not context: context = {'author_address': 'Street\nTown, State ZIP\nCountry', 'author_email': 'name@email.com', 'author_phone': 'PhoneNumber(s)', - 'author_website': 'https://www.author.com'} + 'author_website': 'https://www.author.com', + 'md_break_mark': '-*-', + 'docx_break_mark': '#'} context['chapters'] = [] wc = 0 @@ -79,7 +79,7 @@ def novel_parser(source_file, context = None): stripped = line.strip() if stripped: wc += count_words(stripped) - chapter.paragraphs.append(md_re_parser(stripped)) + chapter.paragraphs.append(md_re_parser(stripped, context['md_break_mark'])) context['chapters'].append(chapter) source_file.close() diff --git a/novel_compiler/template.docx b/novel_compiler/template.docx index 801e5e50f6a24f9a95f47b2c9b0240361704f2b2..14f103730c9b87f6943c9459cc313793789c9483 100644 GIT binary patch delta 4546 zcmZA5WmFVQ_W*ONz_wSJ9VN`q)1|%H2O+7 zPC>6OmM*85Nrj1nY>Mp2H$({qLhk#gQzoW8%1Kzw^bs1`hKZ7H`#`FB3h$Nc3#O#; zM_fApL<;bTFXv4}Xj?uK6mNwl-jd7ie$8!#I<|OXlDUswhTQ;# zLpD&8qy|M}I_z~rcbCjT$~%r-dW90OV!+hC0tIVyaMg8U7zo3@N4x{j32mhg8bwoh zQPoN?H-<_7-em*vAIwDM;Tpm~nh1V%ucSJF^m8q0IrhtVH0R!TB>rCC=r+T5j-GVp z7i)U{*2UAmEIFHPpVF$MiR?!lUvI?;P=r>ew#p+;CfKe6(712bvbCZ?8GU{ZCw{}3 zIo6-`r7zY~Ki0$bzdG^|1h{THDIBKs-#v_s;*3>CPLCn*udo743{uSADz;MLjDx)R z09_rcIJ@{E(H9?ekxMPNgg|ny3G>68ZW#{~QTN4EC|QCC1x$2)4-siM|A;U*U`r~- z@MfZulp1=qWFXgY^5fe|Ty;)wNQJYpk@z0Pvj?>c2Rk6+Xa(wDZ)kq%I&>O^I1<@a z4!KYpogTR6H*g--+&AV2Ik@562!l9ZR@IvIcPbnv)4n6bA=3kTXNm$2-+@8$y&Y}$ zi{6*A2?l%<;K{WO)z}$`x(&K*!9OjGyM-X}+6<7uTh$sMpSL$|<9j_xq?sHnF8-rW z%U^zFxcxk?pWE7~K(FR#BX<_Uh}CFdA5nU6A9H!Hvf-R_`LO+9emulbol#=MBqvP0 zW`%m&(|6vD5}A#TigrMoojkpxD~VMNgNrd5233d6TpfJj=oYBAUu>GE8cYt3PdWBt zA>EfSYY$)%Ka<-pygTH9H-27}=R$A7(kBq`AUhyn!2GW8y{HZoVgkA!IcmdlP_AW{ zIr{3(2Ep~;e;NeS<*t7LSYKzy<*ZfH57F6D3(a?N5+EV{eorDMJ?s1;WsaLWtCgk% zNk^R|l{Oi4b@3Ns@N-G=MT~5=Y7^4mRy{(C#zu2x7%eq~JqaS0bsW=?O?+AGRRPIc zu*npU|0dkE0?Yhu^>h_w!k^UWl;bdn z`O+=QVC3S2sic0Mnfp(=IWrQmRUZ34oH1M_{+48~;{24?!`)*G5wgA^J#}HciKf=@ z-c0Ip;Gtg9HqB(#(iANg?%-A1-e!Habuc#dRyp$7k;l*zy!p-^S~xW8&+2j;osF(b z=%;)l!!>E2?F}CG!9h2i%Yjja2EX_!xQbWvu35H4`PcIHcL#sA5!n8OYFTFZqsOlv zrXgPB+F71Ws~R0-lkm|>>T6T}96qdz4arD9HA;EsQFZsKu3rD`f!s7HtEj84io;rw z;w49^-RMKO$|ZYU4P^x$0i}sUUnR*i93_e`{@)Fq(9@0yZUqK*XYYu%xU2K znaQC|r()rJ8e^^vItBw-6NcvO&k~yDP@RmMdRyLomwn%JOD0w6C=9| zR;38SHl9!CCbK#he~MbBQ+Ni}G2j(Rn8DKsl@gFuJ|w&L7{FTypQfTS1Tay8 zkTE%q39mvMJOCv8Ve#gm2^Z$Ac)xKO6A2#C(v)?L2@mGS$q$}oE!jD_@ip0*l#LWE z07=$tIo$|F3izsd_|G0?IZU~5I3r=SB2w=XRX4V#U-A~*>-%%hOYjAbA+;|dR%ZuVE(YF|^If%Pb5dUoWX_q+sE)GompMfi?f13Z>W zu?*KZAgVh@}GYV&P?9J*%0E)u(X?c=5(J#pC+>-L%2Kuf{41se$S2_&D}nRJYTna#y6y!boSSg_^MFNeLS+XXjo9FfdIW zvyX8-mDg}vq7XSRaKax50-qwxf}}ktD$JFPWk{eE+bM1Ryu{zYla_ATNLBZ`y#_QESoD>YbOOr4d67)x?mdU=g++-h3i} zHR8|RZ_#_{uMHT!p6Qi0mGq}~bjXn0>Alu}zamhYwKd_#;p67gm8~E9Ncc@+arR(+ z7^{l!vTmK5J?BN(+ClYkeYyH9FgnYzKi@5Vjhz+&`xtAbSq^sTUVJW>VT_9WhAmR} zqej{xoj{KPYLpT>K~6m|@Nf>$*z$%gVTN`O=D`hHMk}Nf`*YYQ0#)9-day$L`~R>t zEt~tx_BEuAA+Ur|k|z3wtrulSjQ?_ZI-@(opG{YNYDwQ?OXF&8GQGV zVrv=F!lPp%)xB=aR(sr2`AKv@5an;f6ZgDdW8OPnaz{}{6Ao#i13^z19`<7I#oQ~e zp&V`TXIjr`b3#zs=oLS?C@X=6DZRE(ED3R@1(?Z5j0CaxCL+?SrI86~ zT}Y?J$74Mg0Ik=vPg}+2mKdlE$Lp>a4#=;I{;rsk<{nTFtEG`%)KBdox~GOW92oDO zgZTxXbt;0_OK?@uiY0{jF8U?=l)|a3vL#_~a5E{+{kz(j{I=Eo)$RMgEVlQvr*gGk z{gf>Y@-25eZke$9eg0Ao)WIhKPVqxR8yqQ@Jih=7vF7O-EUW-F8@5cddI^qAfB(2Y z0joIKW(bMvKq+k|bLOq<+&aWOnkaF82f}3LIr>xdF}KQOaB|#bu&kA=#89r=>vS;4y7<}gX!9V!Zc-Exz8HmxNsNkEpcDD4u=fefH^+YLHGQ5b4TuNr>LTz1m zmVywEf_p=$-K@uCP`={bOjsJb_C zx~(iPP*zjF?2OAkPuDsmaqrv0?3d{2_o|vClnk)qlJju}7B61L5eSP0a;wy5dIxTK zzAg2}e_us2#E7kQvbRu1*LbWXA*>+A z3zmlvy}eI4OT>At^kAYy^DwMQadBgGqiz_*XwN^o6!G3oZAOHf80=gu^_rl?*8Xij z5Ai_4YJ+SIhbX?U2^&HvQuyW3Q(Pq|iA0^|J|A7T=m4ltSb%&PC_`DNn`LE(w5`5_ zor0CEET_Oi*R78uJygZ+_|6fV$}Ih~yv_ALp%VR%|~jMV5$W z>pD<;)mYc*U|3Nm&qP^Cj(iT^KeS?aKq&{ubkht4P9P%wzP7X=dHnW zYSU`zhe-qMptFlr{exE@jA4qWp^`~h`2L@tQ_Zwwt~poJ+w=gy^)(29YNVvVKNo{3 z*U=cTlt2N;vO>XfY=!J-b=Fc}V}AG{f?QF}rRm^Ou{KUw zRTG5aAkLJl&>(yA8MRYj`B?3-PWG362hQ=JC1i(JLX={)WAM{~%!lxiAM{LT2|Q%2E@}s z_X2-;wx4e>$xMc;7H4M0jEVF$hXi5!N~tQwNiZ!3<>TZ0i!S>T3StNFt_|LBhgzab z8YFohRPgl8D{<`jB|APlkST7P%6v1m;(c3e><0^m#>0?Vq1UQ=i$EVrq0xX+!{fl- zBWjs5E1ut|k~U~xU^+*jS5Kl4^ICGkLvE%Ta^j*2a+`^~Q^3YUKTnI<)mvhuL{dQ< zFSfj!IhE=UCWGzzkvXNn@#>FTv00KO2B6*gt;VPu<_r-e*l?jbn*ZI delta 4473 zcmY+IWmFVew8v+Np@!~8a%fZ-8YBgV8cIN=n?XuaU@k)|J<`p9pn#;Llu}9~C_}e| zq;$>G`__8vzWrf;ID752&syhq_WqxfQ3-6Q0M^hFk*1MlG@^(BfE<~6cA_GDq{U-{ zRT;WlVK|=^(X96`0#U7&y~KvLcQ%V~tvM)0GLB8nmI@c@AyBIA0<*P@wp{|!Tumz# zjPA;Z|F8%SlfwhZ4K#dCsg+rFa~b*p1|evyf39o|nUgPf`H0+;yR!E-N@%$>e&n4> zZ!GxQnNXhn4tSFE#3w9oF#OyS`5I%-mregRH_PE81Jd1MEdIBX07qI>d8xWlLE`P; zMkVytk!Alw5HBeg9XBa-cjT>!@`}RBD?ZG*8jl z#^o}rP`V>dgC}mk-Q^d$HVzEEW9y<()akaV#Ez`UXihtho)wQiUefR(0Rkqd`ij?F z+at)!=>sWH+eo}Ny$A!9CA`Uc6?KWa@875X!9$t2T44+6_oi*8jQVMVrcwu!`|~YV z>LlT~Ao5cVQErTlkq=K+ZyJG{L+?@T!r?3tSEJ)YFGIfXDIbTS7op@{A9P-sDmV^) z(Q6_viqRt@=U(z_noj_TM~=CAy4hH>1rJb|Mx1I2*dc#W-w!s952v+T#FoOb-{J`5 z_311lssS+yDXkoY9A;_T3Hp&r;r!1*Ou!@Vwa6Q>@&feiHa@r3#~)<}+p2Tx2<8A} ztxVblK_jrSW8s6qc28jWDdt6=MUGqN5X){@tc+sy^|J=f$3^(%PJ&M#*4FG=^ei|v z?lQQ=$RQbmBTU-E2USpaL8L>zwPUSkEU`CPXCP|LqOJ1|Y)q`eqfiyY-eAI%=xK8L z>iOQ69j-y}RY&sh>2m|cUZE1`OZZ0_hs*Ih0nEP+HZOJf=%^xDL87zCf=Lxv!;e`z z#zQ|>-5*1C`x3a9)XgL|3&Y*zp}mjQ-jZyMy+)=5I9BG}UosnuF~6kq_L7dQyU+ML~*ZL`~D|uXFX7oHdGd zeDTrqHFgdMXk8EyE_(vh#lO`MhZ@0avyc#k)qXx5@!u+&{S0;XG}3ZDd;gK2ImaN7 z8<}KzD%pN67R7Kc2y<%PeE$N09riVPLu@qxYH<$>K7&q%`}e+;VB6 zkLfYyB%;s_QZi7lliFQmc$6Ut_@+yIzl0{!#Y6hz!;l!sa_>Op_;iSgc`d#sRhbrZ z-9A%yZ?gM2zSw1<7r{U<6DV}@*lrRmR}oz3LN{PJ#q%_YC(r8{G?DI9`?q(AYe56k z!|hVq9f9WL$k{ImhG6Ea^2X#lC;5YMgfNFq*5Ptl^Eb-w60t|fGinA4Q<%-SHM>u2 z_fnD(*^+d%1T8DxRRs;f&cRQ%c3>Dowd-nL_i$g$W55*IV35LJd9TVILm`q7ZJhUVbxf?LyG!X5o5}N!zHX|OI!R8m zYMBP8RI4d&FLv!7=KbTjaQ0rUXPsumov#NRA7ZRAihdqdUe-*EHQOjTF`pYLdn;pJ zk0yJWuo<7CvByrLb?eJm@Yz>dFB}v;SNf6f%4aU`!_QzO*ql9-B}fB7F~KdFqIHLd zu0~BvML?y+(9)Z(twlhdX$aNU`#FUCEBtEZ?z87sxZKIGGCpzG($&sDXVTm!Hy1r9 zWK08TpFfS3&_{FJ6|ps!n8^DD5X> zG4Oc4hF9Ken(7Bm@w!}-GCbfdVa-|^q$OgQo&o%FH{A{oR%FC<8E!qV%_lKFvv>B- zIfv`m(t$0TsA+kF9Z@{jNbWL;1<6ZXm5vEh?Q+mhlpw-UkK6sUZ)i|b(DDiG*obsUipt1nyHkQ^q zR{w5aWak+|^bfaJ2?9|YyvXM60nh~ACm>g!gTrS}=D}#7w{=mX+QW3s!M6sz-Y%Te zHV&LuE<+>_%J_%HiX%Oq6n*24xL4Aq9-!`%1^>amH=u9F8(=LQS)fB#S2kWfo`2bB z)=C*qX0E4_$Ix<{eYcu9wQ=@tuEK7!;ihO41NP_u5&dF{q#2n5*+Ux5r>aTqjm-Ee zf-0}Xd8&1X;Fc(U8bOChJpY09pCaYowJQ>6q|Y3PZep(bH9~&ln_y1c?dLX&FJ#}| z=)kOhsUm4FD97Kh&LoQp`P|P9dLM0vyY0XpShT!KCw}a?Tb$dP8I!V4x3^sCC5HJ# z4EWJhgPv^to)oMzMFx3?>s$9-u2ejzJVBtozQIXSIX}JRh1|4T z;edast#@ZCU?gePx-SGJ1OT4a(@I?7A)9VW#MfbKxSQ#e)o2*)%CuP~iwEG#@2*%G zNJ0ot>P4cnx+ue)!!ps^aem_5s5GXOA+jg-nh~W< z<}4VBlvPmQuSlQ|zCFTrMfBdPYYa>{KK>_dREOun)~CiNv+*MEm)x|eA|CShkTWG> zIzILk6Qu=?n!LX7HP(7`klA(?wL?*txo4IqpEk0ffGd1mz7IWU;vbKw8CS4mqW(dW zp{d-mFFVluO@X*_D6>fcJQsC(f1hg;=vVp1{iMTB_#|0(dxX39B->b_#q{^t6W7X% zP}LluH7%3RfNb4NHIWx|2LNA1zWZ8KO8yT;RU%BtGVs?)Yin#dUBNjs zYA*e(3YKkAXHw;_2Xh(895uVqW}vrz%c1wod)B`yto=iE8U6fTNtG@yG9>NbQO>Qo zXC8;I%+yJyK(G zL|!4}&$S*yjpW}>EiCJ5N{njvk4i#_i$AA-^LiErlG=!9QEDNxe|l(O2lw?-DUy5C zAFP;OGs)r&J ze1VYA$lFr3U@2{6<*T1G_?6Y9Rcx^m^aYVJYt`VDoYB18d6UO}EAI816Ywu-K~k!; zgtw(VtfBQ?TKqtyp23_1RrG$^Ic3VeMZZP4;cIdlYdp9m!DISn<`?GR!@akjqj@+mKgjudW_`TuJ(}`B#iE`Ik)A>jUt^Rtg}S!NFN@(N8Aw?BAXUR9O5kI% z8!8+n9C_lkT7{oPs!Efl3yFN3%&7jzkfn#rg~KUDqV^n#Xy0Hm?krAxCEbCS?Tih0 z2W(jza23dGg{M^Vl``?ow^hFY3PL6eyKM>)eVgeT2Xemz&VU-)W0 z;HPwM>fA$$y%$5CF1?w?-hw>r+ren>0Ozv zMbhmPkhO^dgyOX9C2M&1n=R|ATfDy-3m;$zLtj9^1Q})Cdi8#=Kc_MHY*vEmBjctr z@yGs#ydD?wY?bB0fg%sEx%P<2hq^c;UX^F@&DM*hbQRBYjGt$RdikH;@%}x5-3Huc z-Zw))v+785PZPY`=wFm~T7}q|s;;v*wnxy*tJ+B)trFG7DLqiff6GmpVTMRzO3xK& zkI^qBiDK8Vektt0@)eGqPDxJ;5K=U?RbGH7hK=&}OZ!?>IK+ z#6Q%At1(upCMzGo2h{nS;i2+%9norFTJHp_%u81 zFLihl46U33Y+s4rF~`tPc&!IuFa{mH+Z~Udte>GW1W~Qis4!$AD9J}C(DkKfA3Rh@ z0zktsCHAhyz$bkwA${7-(N^zf&UgW|Jkp&J^n!uO^7>njKRs_;Fv{Dgc1` zzjXkm=mldXASs&NTKx*`o>0J<`;ordj056qNod5s; diff --git a/setup.py b/setup.py index 9ffa33e..8b8c791 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( ], packages=find_packages(), entry_points={"console_scripts": ["novel-compiler = novel_compiler.novel_compiler:main",]}, - install_requires=('docxtpl','markdown','novel_stats','MarkdownPP'), + install_requires=('docxtpl','markdown','novel_stats','MarkdownPP', 'lxml'), include_package_data=True, python_requires='>3.7.0', )