#!/usr/bin/python import argparse import collections import tempfile CHAPTER_MARKER = '## ' STATUS_MARKER = '[status]: # ' ACT_MARKER = '[act]: # ' # Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert. COMMENT_MARKER = '[//]: # ' def count_words(line): count = 0 for word in line.strip().split(' '): if not word.strip() or word == '*' or word.startswith('#'): continue count += 1 return count def main(): # Better argument parsing parser = argparse.ArgumentParser() parser.add_argument( '-c', '--chapter', action='store_true', help='output chapter-by-chapter breakdown of word counts, including how many words in each chapter are tagged with which status', ) parser.add_argument( '-a', '--act', action='store_true', help='output act-by-act breakdown of word counts (total only)', ) parser.add_argument( '-pp', action='store_true', help='run markdown pre-processor, this allows for a multi-file input (e.g. each chapter in its own file), but requires the MarkdownPP python library', ) parser.add_argument( 'markdown_file', type=argparse.FileType('r'), help='The markdown file for the novel, main file if a multi-file novel', ) arguments = parser.parse_args() mdfile = None if arguments.pp: # -pp flag to allow Markdown Preprocessing primarily to allow multi-file novel formatting # this is implemented using a temporary file created using python's buit-in tempfile library import MarkdownPP mdfile = tempfile.TemporaryFile(mode='w+') MarkdownPP.MarkdownPP( input=arguments.markdown_file, output=mdfile, modules=list(MarkdownPP.modules) ) mdfile.seek(0) else: mdfile = arguments.markdown_file chapter_heading = None act_heading = None total_word_count = 0 word_count_by_chapter = collections.defaultdict(int) word_count_by_status = collections.defaultdict(int) word_count_by_act = collections.defaultdict(int) status_by_chapter = collections.defaultdict(lambda: collections.defaultdict(int)) current_status = None for line in mdfile.readlines(): if line.startswith(CHAPTER_MARKER): word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading] total_word_count += word_count_by_chapter[chapter_heading] chapter_heading = line[len(CHAPTER_MARKER) :].strip('()\n') # Count the words in chapter heading, because the chapter number and title count as words. if chapter_heading: word_count_by_chapter[chapter_heading] = count_words(chapter_heading) current_status = None # Modified to allow multiple statuses in a single chapter, can swap back and forth. elif line.startswith(STATUS_MARKER): if current_status is None: current_status = line[len(STATUS_MARKER) :].strip('()\n') if chapter_heading: status_by_chapter[chapter_heading][current_status] = count_words( chapter_heading ) else: current_status = line[len(STATUS_MARKER) :].strip('()\n') elif line.startswith(ACT_MARKER): act_heading = line[len(ACT_MARKER) :].strip('()\n') word_count_by_act[act_heading] = count_words(act_heading) elif line.startswith(COMMENT_MARKER): # Don't count the words in a comment. pass else: line_word_count = count_words(line) word_count_by_chapter[chapter_heading] += line_word_count if current_status: word_count_by_status[current_status] += line_word_count status_by_chapter[chapter_heading][current_status] += line_word_count mdfile.close() # Do some final accounting after the last chapter. word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading] total_word_count += word_count_by_chapter[chapter_heading] # -c or --chapter to give a chapter-by-chapter word count summary. if arguments.chapter: for chapter_heading, chapter_word_count in word_count_by_chapter.items(): if chapter_heading is None: continue if len(status_by_chapter[chapter_heading]) > 1: print(f'chapter {chapter_heading}:') for chapter_status, status_count in status_by_chapter[chapter_heading].items(): print(f'\t {status_count:,} ({chapter_status})') print(f'\t {chapter_word_count:,} words (total)') elif len(status_by_chapter[chapter_heading]) == 1: chapter_status = list(status_by_chapter[chapter_heading].keys())[0] print(f'chapter {chapter_heading}: {chapter_word_count:,} ({chapter_status})') else: print(f'chapter {chapter_heading}: {chapter_word_count:,}') print() # -a or --act to give an act-by-act word count summary. if arguments.act: for act_heading, act_word_count in word_count_by_act.items(): if act_heading is None: continue print( f'act {act_heading}: {act_word_count:,} words (~{act_word_count * 100// total_word_count}%)' ) print() for status, status_word_count in word_count_by_status.items(): print( f'{status}: {status_word_count:,} words (~{status_word_count * 100 // total_word_count}%)' ) print(f'total: {total_word_count:,} words') if __name__ == '__main__': main()