#!/usr/bin/python import argparse import os import sys import docx def expand_chapter_range(chapter_request): if '-' not in chapter_request: return (int(chapter_request),) chapter_range = chapter_request.split('-') return tuple(range(int(chapter_range[0]), int(chapter_range[1]) + 1)) def filter_chapters(document, chapter_requests): if not chapter_requests: return chapter_numbers = { chapter_number for request in chapter_requests for chapter_number in expand_chapter_range(request) } current_chapter_number = 0 for paragraph in document.paragraphs: if paragraph.style.name.startswith('Heading'): current_chapter_number += 1 if current_chapter_number not in chapter_numbers: element = paragraph._element element.getparent().remove(element) def set_default_font(document): document.styles['Normal'].font.name = 'Times New Roman' def add_header_text(document, text): document.sections[0].header.paragraphs[0].text = text def create_element(name): return docx.oxml.OxmlElement(name) def create_attribute(element, name, value): element.set(docx.oxml.ns.qn(name), value) def add_header_page_number(document): run = document.sections[0].header.paragraphs[0].add_run() fldChar1 = create_element('w:fldChar') create_attribute(fldChar1, 'w:fldCharType', 'begin') instrText = create_element('w:instrText') create_attribute(instrText, 'xml:space', 'preserve') instrText.text = "PAGE" fldChar2 = create_element('w:fldChar') create_attribute(fldChar2, 'w:fldCharType', 'end') run._r.append(fldChar1) run._r.append(instrText) run._r.append(fldChar2) def skip_page_number_on_first_page(document): document.sections[0].different_first_page_header_footer = True page_number_type = docx.oxml.OxmlElement('w:pgNumType') page_number_type.set(docx.oxml.ns.qn('w:start'), "0") document.sections[0]._sectPr.append(page_number_type) def right_align_header(document): header_paragraph = document.sections[0].header.paragraphs[0] header_paragraph.paragraph_format.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.RIGHT def double_space_and_indent(paragraphs, format_all): previous_paragraph = None this_paragraph_plaintext = False previous_paragraph_plaintext = None reached_first_heading = False for paragraph in paragraphs: this_paragraph_plaintext = not ( paragraph.style.name.startswith('Heading') or paragraph.style.name == '0 Block' or paragraph.text == '#' or paragraph.style.paragraph_format.left_indent or paragraph.style.paragraph_format.right_indent ) if not this_paragraph_plaintext: reached_first_heading = True # Skip past the title page until we reach the first chapter/other heading. if not reached_first_heading and not format_all: continue paragraph.paragraph_format.line_spacing = 2 paragraph.paragraph_format.space_before = docx.shared.Inches(0) paragraph.paragraph_format.space_after = docx.shared.Inches(0) # To reduce page count, allow splitting paragraphs across pages. paragraph.paragraph_format.widow_control = False # Indent first lines of paragraphs except for styled ones (headings, etc.) and initial # paragraphs. if this_paragraph_plaintext and previous_paragraph_plaintext: paragraph.paragraph_format.first_line_indent = docx.shared.Inches(0.5) if ( previous_paragraph and paragraph.style == previous_paragraph.style and not paragraph.text and not previous_paragraph.text ): previous_paragraph._element.getparent().remove(previous_paragraph._element) previous_paragraph = paragraph previous_paragraph_plaintext = this_paragraph_plaintext def parse_arguments(unparsed_arguments): parser = argparse.ArgumentParser(add_help=True) parser.add_argument( metavar='FILENAME', dest='document_filename', help='Novel document to format', ) parser.add_argument( '-c', '--chapters', metavar='CHAPTER', nargs='+', help='Chapter numbers or ranges to include in output, defaults to all', ) parser.add_argument( '-a', '--author', metavar='LAST NAME', help='Author last name to show in page headers, defaults to parsing the first line of the manuscript for the name', ) parser.add_argument( '-t', '--title', help='Project title to show in page headers, defaults to the manuscript filename without the file extension', ) parser.add_argument( '-f', '--format-all', help='Format the whole document, even the title page (if any)', action='store_true', default=False, ) return parser.parse_args(unparsed_arguments) def main(): arguments = parse_arguments(sys.argv[1:]) document = docx.Document(arguments.document_filename) author_last_name = arguments.author or document.paragraphs[0].text.split('\n')[0].split(' ')[-1] project_title = arguments.title or os.path.splitext(arguments.document_filename)[0] filter_chapters(document, arguments.chapters) set_default_font(document) add_header_text(document, f'{author_last_name} / {project_title} / ') add_header_page_number(document) if not arguments.chapters: skip_page_number_on_first_page(document) right_align_header(document) double_space_and_indent(document.paragraphs, arguments.format_all) document.save(arguments.document_filename) if __name__ == '__main__': main()