2021-09-11 16:35:38 +00:00
#!/usr/bin/python
2021-09-11 22:39:20 +00:00
import collections
2021-09-11 16:35:38 +00:00
import sys
CHAPTER_MARKER = ' ## '
2021-09-11 22:39:20 +00:00
STATUS_MARKER = ' [status]: # '
ACT_MARKER = ' [act]: # '
2021-10-22 12:28:13 +00:00
COMMENT_MARKER = ' [//]: # ' # Strandard markdown comment marker, supported by pandoc and calibre's ebook-convert
2021-09-11 16:35:38 +00:00
def count_words ( line ) :
count = 0
for word in line . strip ( ) . split ( ' ' ) :
if not word . strip ( ) or word == ' * ' or word . startswith ( ' # ' ) :
continue
count + = 1
return count
2021-09-11 16:57:48 +00:00
def main ( ) :
arguments = sys . argv [ 1 : ]
2021-09-11 16:35:38 +00:00
filename = arguments [ 0 ]
2021-10-22 12:28:13 +00:00
mdfile = None
2021-10-22 09:37:33 +00:00
if ' -pp ' in arguments :
2021-10-22 12:28:13 +00:00
# -pp flag to allow Markdown Preprocessing primarily to allow multi-file novel formatting
# this is implemented using a temporary file created using python's buit-in tempfile library
import MarkdownPP , tempfile
mdfile = tempfile . TemporaryFile ( mode = ' w+ ' )
MarkdownPP . MarkdownPP ( input = open ( filename ) , output = mdfile , modules = list ( MarkdownPP . modules ) )
mdfile . seek ( 0 )
else :
mdfile = open ( filename )
2021-10-22 09:37:33 +00:00
2021-10-22 08:41:03 +00:00
chapter_heading = None
act_heading = None
2021-09-11 16:35:38 +00:00
total_word_count = 0
2021-09-11 23:26:56 +00:00
word_count_by_chapter = collections . defaultdict ( int )
2021-09-11 22:39:20 +00:00
word_count_by_status = collections . defaultdict ( int )
2021-09-11 23:26:56 +00:00
word_count_by_act = collections . defaultdict ( int )
status_by_chapter = { }
2021-10-22 09:24:36 +00:00
current_status = None
2021-09-11 16:35:38 +00:00
2021-10-22 12:28:13 +00:00
for line in mdfile . readlines ( ) :
2021-09-11 16:35:38 +00:00
if line . startswith ( CHAPTER_MARKER ) :
2021-10-22 08:41:03 +00:00
word_count_by_act [ act_heading ] + = word_count_by_chapter [ chapter_heading ]
total_word_count + = word_count_by_chapter [ chapter_heading ]
2021-09-11 23:26:56 +00:00
2021-10-22 08:41:03 +00:00
chapter_heading = line [ len ( CHAPTER_MARKER ) : ] . strip ( ' () \n ' )
2021-09-11 16:35:38 +00:00
2021-10-22 08:41:03 +00:00
word_count_by_chapter [ chapter_heading ] = count_words ( chapter_heading ) # Count the words in chapter heading, because the chapter number and title count as words.
2021-10-22 09:24:36 +00:00
status_by_chapter [ chapter_heading ] = collections . defaultdict ( int )
current_status = None
2021-10-22 12:28:13 +00:00
elif line . startswith ( STATUS_MARKER ) : # Modified to allow multiple statuses in a single chapter, can swap back and forth.
2021-10-22 09:24:36 +00:00
if current_status == None :
current_status = line [ len ( STATUS_MARKER ) : ] . strip ( ' () \n ' )
status_by_chapter [ chapter_heading ] [ current_status ] = count_words ( chapter_heading )
else :
current_status = line [ len ( STATUS_MARKER ) : ] . strip ( ' () \n ' )
2021-09-11 22:39:20 +00:00
elif line . startswith ( ACT_MARKER ) :
2021-10-22 08:41:03 +00:00
act_heading = line [ len ( ACT_MARKER ) : ] . strip ( ' () \n ' )
word_count_by_act [ act_heading ] = count_words ( act_heading )
2021-10-22 12:28:13 +00:00
elif line . startswith ( COMMENT_MARKER ) : # don't count the words in a comment
2021-10-22 08:36:58 +00:00
pass
2021-09-11 16:35:38 +00:00
else :
2021-09-11 22:39:20 +00:00
line_word_count = count_words ( line )
2021-10-22 08:41:03 +00:00
word_count_by_chapter [ chapter_heading ] + = line_word_count
2021-09-11 23:26:56 +00:00
2021-10-22 09:24:36 +00:00
if current_status :
word_count_by_status [ current_status ] + = line_word_count
status_by_chapter [ chapter_heading ] [ current_status ] + = line_word_count
2021-09-11 22:39:20 +00:00
2021-10-22 13:37:18 +00:00
mdfile . close ( )
2021-09-11 23:26:56 +00:00
# Do some final accounting after the last chapter.
2021-10-22 08:41:03 +00:00
word_count_by_act [ act_heading ] + = word_count_by_chapter [ chapter_heading ]
total_word_count + = word_count_by_chapter [ chapter_heading ]
2021-09-11 23:26:56 +00:00
2021-10-22 12:28:13 +00:00
if ' -c ' in arguments or ' --chapter ' in arguments : # -c or --chapter to give a chapter-by-chapter word count summary
2021-10-22 09:54:19 +00:00
for chapter_heading , chapter_word_count in word_count_by_chapter . items ( ) :
if chapter_heading is None :
continue
if len ( status_by_chapter [ chapter_heading ] ) > 1 :
print ( f ' chapter { chapter_heading } : ' )
for chapter_status , status_count in status_by_chapter [ chapter_heading ] . items ( ) :
print ( f ' \t { status_count : , } ( { chapter_status } ) ' )
print ( f ' \t { chapter_word_count : , } words (total) ' )
elif len ( status_by_chapter [ chapter_heading ] ) == 1 :
chapter_status = list ( status_by_chapter [ chapter_heading ] . keys ( ) ) [ 0 ]
print ( f ' chapter { chapter_heading } : { chapter_word_count : , } ( { chapter_status } ) ' )
else :
print ( f ' chapter { chapter_heading } : { chapter_word_count : , } ' )
2021-10-22 09:24:36 +00:00
2021-10-22 09:54:19 +00:00
print ( )
2021-09-11 16:35:38 +00:00
2021-10-22 12:28:13 +00:00
if ' -a ' in arguments or ' --act ' in arguments : # -a or --act to give an act-by-act word count summary
2021-10-22 09:54:19 +00:00
for act_heading , act_word_count in word_count_by_act . items ( ) :
if act_heading is None :
continue
2021-09-11 23:26:56 +00:00
2021-10-22 09:54:19 +00:00
print ( ' act {} : {:,} words (~ {} % ) ' . format ( act_heading , act_word_count , act_word_count * 100 / / total_word_count ) )
2021-09-11 23:26:56 +00:00
2021-10-22 09:54:19 +00:00
print ( )
2021-09-11 22:39:20 +00:00
for status , status_word_count in word_count_by_status . items ( ) :
2021-10-22 12:28:13 +00:00
print ( f ' { status } : { status_word_count : , } words (~ { status_word_count * 100 / / total_word_count } %) ' )
2021-09-11 22:39:20 +00:00
2021-10-22 12:28:13 +00:00
print ( f ' total: { total_word_count : , } words ' )
2021-09-11 16:35:38 +00:00
if __name__ == ' __main__ ' :
2021-09-11 16:57:48 +00:00
main ( )