#!/usr/bin/python import argparse import sys import docx def filter_chapters(document, chapter_numbers): if not chapter_numbers: return chapter_number = 0 for paragraph in document.paragraphs: if paragraph.style.name == 'Heading 2': chapter_number += 1 if chapter_number not in chapter_numbers: element = paragraph._element element.getparent().remove(element) def set_default_font(document): document.styles['Normal'].font.name = 'Times New Roman' def add_header_text(document, text): document.sections[0].header.paragraphs[0].text = text def create_element(name): return docx.oxml.OxmlElement(name) def create_attribute(element, name, value): element.set(docx.oxml.ns.qn(name), value) def add_header_page_number(document): run = document.sections[0].header.paragraphs[0].add_run() fldChar1 = create_element('w:fldChar') create_attribute(fldChar1, 'w:fldCharType', 'begin') instrText = create_element('w:instrText') create_attribute(instrText, 'xml:space', 'preserve') instrText.text = "PAGE" fldChar2 = create_element('w:fldChar') create_attribute(fldChar2, 'w:fldCharType', 'end') run._r.append(fldChar1) run._r.append(instrText) run._r.append(fldChar2) def skip_page_number_on_first_page(document): document.sections[0].different_first_page_header_footer = True page_number_type = docx.oxml.OxmlElement('w:pgNumType') page_number_type.set(docx.oxml.ns.qn('w:start'), "0") document.sections[0]._sectPr.append(page_number_type) def right_align_header(document): header_paragraph = document.sections[0].header.paragraphs[0] header_paragraph.paragraph_format.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.RIGHT def double_space_and_indent(paragraphs): this_paragraph_plaintext = False previous_paragraph_plaintext = False for paragraph in paragraphs: paragraph.paragraph_format.line_spacing = 2 paragraph.paragraph_format.space_before = docx.shared.Inches(0) paragraph.paragraph_format.space_after = docx.shared.Inches(0) # To reduce page count, allow splitting paragraphs across pages. paragraph.paragraph_format.widow_control = False this_paragraph_plaintext = not paragraph.style.name.startswith('Heading') # Indent first lines of paragraphs except for styled ones (headings, etc.) and initial # paragraphs. if this_paragraph_plaintext and previous_paragraph_plaintext: paragraph.paragraph_format.first_line_indent = docx.shared.Inches(0.5) previous_paragraph_plaintext = this_paragraph_plaintext def parse_arguments(unparsed_arguments): parser = argparse.ArgumentParser(add_help=True) parser.add_argument( metavar='FILENAME', dest='document_filename', help='Novel document to format', ) parser.add_argument( '-c', '--chapters', metavar='CHAPTER', nargs='+', type=int, help='Numbers of chapters to include in output', ) return parser.parse_args(unparsed_arguments) def main(): arguments = parse_arguments(sys.argv[1:]) document = docx.Document(arguments.document_filename) filter_chapters(document, arguments.chapters) set_default_font(document) # add_header_text(document, 'Author / Project Title / ') add_header_page_number(document) if not arguments.chapters: skip_page_number_on_first_page(document) right_align_header(document) double_space_and_indent(document.paragraphs) document.save(arguments.document_filename) if __name__ == '__main__': main()