format-novel/format_novel/format_novel.py

129 lines
3.6 KiB
Python
Executable File

#!/usr/bin/python
import argparse
import sys
import docx
def filter_chapters(document, chapter_numbers):
if not chapter_numbers:
return
chapter_number = 0
for paragraph in document.paragraphs:
if paragraph.style.name == 'Heading 2':
chapter_number += 1
if chapter_number not in chapter_numbers:
element = paragraph._element
element.getparent().remove(element)
def set_default_font(document):
document.styles['Normal'].font.name = 'Times New Roman'
def add_header_text(document, text):
document.sections[0].header.paragraphs[0].text = text
def create_element(name):
return docx.oxml.OxmlElement(name)
def create_attribute(element, name, value):
element.set(docx.oxml.ns.qn(name), value)
def add_header_page_number(document):
run = document.sections[0].header.paragraphs[0].add_run()
fldChar1 = create_element('w:fldChar')
create_attribute(fldChar1, 'w:fldCharType', 'begin')
instrText = create_element('w:instrText')
create_attribute(instrText, 'xml:space', 'preserve')
instrText.text = "PAGE"
fldChar2 = create_element('w:fldChar')
create_attribute(fldChar2, 'w:fldCharType', 'end')
run._r.append(fldChar1)
run._r.append(instrText)
run._r.append(fldChar2)
def skip_page_number_on_first_page(document):
document.sections[0].different_first_page_header_footer = True
page_number_type = docx.oxml.OxmlElement('w:pgNumType')
page_number_type.set(docx.oxml.ns.qn('w:start'), "0")
document.sections[0]._sectPr.append(page_number_type)
def right_align_header(document):
header_paragraph = document.sections[0].header.paragraphs[0]
header_paragraph.paragraph_format.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.RIGHT
def double_space_and_indent(paragraphs):
this_paragraph_plaintext = False
previous_paragraph_plaintext = False
for paragraph in paragraphs:
paragraph.paragraph_format.line_spacing = 2
paragraph.paragraph_format.space_before = docx.shared.Inches(0)
paragraph.paragraph_format.space_after = docx.shared.Inches(0)
# To reduce page count, allow splitting paragraphs across pages.
paragraph.paragraph_format.widow_control = False
this_paragraph_plaintext = not paragraph.style.name.startswith('Heading')
# Indent first lines of paragraphs except for styled ones (headings, etc.) and initial
# paragraphs.
if this_paragraph_plaintext and previous_paragraph_plaintext:
paragraph.paragraph_format.first_line_indent = docx.shared.Inches(0.5)
previous_paragraph_plaintext = this_paragraph_plaintext
def parse_arguments(unparsed_arguments):
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument(
metavar='FILENAME', dest='document_filename', help='Novel document to format',
)
parser.add_argument(
'-c',
'--chapters',
metavar='CHAPTER',
nargs='+',
type=int,
help='Numbers of chapters to include in output',
)
return parser.parse_args(unparsed_arguments)
def main():
arguments = parse_arguments(sys.argv[1:])
document = docx.Document(arguments.document_filename)
filter_chapters(document, arguments.chapters)
set_default_font(document)
# add_header_text(document, 'Author / Project Title / ')
add_header_page_number(document)
if not arguments.chapters:
skip_page_number_on_first_page(document)
right_align_header(document)
double_space_and_indent(document.paragraphs)
document.save(arguments.document_filename)
if __name__ == '__main__':
main()