format-novel/format_novel/format_novel.py

187 lines
5.7 KiB
Python
Executable File

#!/usr/bin/python
import argparse
import os
import sys
import docx
def expand_chapter_range(chapter_request):
if '-' not in chapter_request:
return (int(chapter_request),)
chapter_range = chapter_request.split('-')
return tuple(range(int(chapter_range[0]), int(chapter_range[1]) + 1))
def filter_chapters(document, chapter_requests):
if not chapter_requests:
return
chapter_numbers = {
chapter_number
for request in chapter_requests
for chapter_number in expand_chapter_range(request)
}
current_chapter_number = 0
for paragraph in document.paragraphs:
if paragraph.style.name.startswith('Heading'):
current_chapter_number += 1
if current_chapter_number not in chapter_numbers:
element = paragraph._element
element.getparent().remove(element)
def set_default_font(document):
document.styles['Normal'].font.name = 'Times New Roman'
def add_header_text(document, text):
document.sections[0].header.paragraphs[0].text = text
def create_element(name):
return docx.oxml.OxmlElement(name)
def create_attribute(element, name, value):
element.set(docx.oxml.ns.qn(name), value)
def add_header_page_number(document):
run = document.sections[0].header.paragraphs[0].add_run()
fldChar1 = create_element('w:fldChar')
create_attribute(fldChar1, 'w:fldCharType', 'begin')
instrText = create_element('w:instrText')
create_attribute(instrText, 'xml:space', 'preserve')
instrText.text = "PAGE"
fldChar2 = create_element('w:fldChar')
create_attribute(fldChar2, 'w:fldCharType', 'end')
run._r.append(fldChar1)
run._r.append(instrText)
run._r.append(fldChar2)
def skip_page_number_on_first_page(document):
document.sections[0].different_first_page_header_footer = True
page_number_type = docx.oxml.OxmlElement('w:pgNumType')
page_number_type.set(docx.oxml.ns.qn('w:start'), "0")
document.sections[0]._sectPr.append(page_number_type)
def right_align_header(document):
header_paragraph = document.sections[0].header.paragraphs[0]
header_paragraph.paragraph_format.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.RIGHT
def double_space_and_indent(paragraphs, format_all):
previous_paragraph = None
this_paragraph_plaintext = False
previous_paragraph_plaintext = None
reached_first_heading = False
for paragraph in paragraphs:
this_paragraph_plaintext = not (
paragraph.style.name.startswith('Heading')
or paragraph.style.name == '0 Block'
or paragraph.text == '#'
or paragraph.style.paragraph_format.left_indent
or paragraph.style.paragraph_format.right_indent
)
if not this_paragraph_plaintext:
reached_first_heading = True
# Skip past the title page until we reach the first chapter/other heading.
if not reached_first_heading and not format_all:
continue
paragraph.paragraph_format.line_spacing = 2
paragraph.paragraph_format.space_before = docx.shared.Inches(0)
paragraph.paragraph_format.space_after = docx.shared.Inches(0)
# To reduce page count, allow splitting paragraphs across pages.
paragraph.paragraph_format.widow_control = False
# Indent first lines of paragraphs except for styled ones (headings, etc.) and initial
# paragraphs.
if this_paragraph_plaintext and previous_paragraph_plaintext:
paragraph.paragraph_format.first_line_indent = docx.shared.Inches(0.5)
if (
previous_paragraph
and paragraph.style == previous_paragraph.style
and not paragraph.text
and not previous_paragraph.text
):
previous_paragraph._element.getparent().remove(previous_paragraph._element)
previous_paragraph = paragraph
previous_paragraph_plaintext = this_paragraph_plaintext
def parse_arguments(unparsed_arguments):
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument(
metavar='FILENAME', dest='document_filename', help='Novel document to format',
)
parser.add_argument(
'-c',
'--chapters',
metavar='CHAPTER',
nargs='+',
help='Chapter numbers or ranges to include in output, defaults to all',
)
parser.add_argument(
'-a',
'--author',
metavar='LAST NAME',
help='Author last name to show in page headers, defaults to parsing the first line of the manuscript for the name',
)
parser.add_argument(
'-t',
'--title',
help='Project title to show in page headers, defaults to the manuscript filename without the file extension',
)
parser.add_argument(
'-f',
'--format-all',
help='Format the whole document, even the title page (if any)',
action='store_true',
default=False,
)
return parser.parse_args(unparsed_arguments)
def main():
arguments = parse_arguments(sys.argv[1:])
document = docx.Document(arguments.document_filename)
author_last_name = arguments.author or document.paragraphs[0].text.split('\n')[0].split(' ')[-1]
project_title = arguments.title or os.path.splitext(arguments.document_filename)[0]
filter_chapters(document, arguments.chapters)
set_default_font(document)
add_header_text(document, f'{author_last_name} / {project_title} / ')
add_header_page_number(document)
if not arguments.chapters:
skip_page_number_on_first_page(document)
right_align_header(document)
double_space_and_indent(document.paragraphs, arguments.format_all)
document.save(arguments.document_filename)
if __name__ == '__main__':
main()