2021-09-11 16:35:38 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
|
2021-10-23 12:08:12 +00:00
|
|
|
import argparse
|
2021-09-11 22:39:20 +00:00
|
|
|
import collections
|
2021-10-22 20:44:56 +00:00
|
|
|
import tempfile
|
2021-09-11 16:35:38 +00:00
|
|
|
|
|
|
|
CHAPTER_MARKER = '## '
|
2021-09-11 22:39:20 +00:00
|
|
|
STATUS_MARKER = '[status]: # '
|
|
|
|
ACT_MARKER = '[act]: # '
|
2021-10-22 20:44:56 +00:00
|
|
|
# Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
|
|
|
|
COMMENT_MARKER = '[//]: # '
|
2021-09-11 16:35:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
def count_words(line):
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
for word in line.strip().split(' '):
|
|
|
|
if not word.strip() or word == '*' or word.startswith('#'):
|
|
|
|
continue
|
|
|
|
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
return count
|
|
|
|
|
|
|
|
|
2021-09-11 16:57:48 +00:00
|
|
|
def main():
|
2021-10-23 11:49:23 +00:00
|
|
|
# Better argument parsing
|
|
|
|
parser = argparse.ArgumentParser()
|
2021-10-23 12:08:12 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-c',
|
|
|
|
'--chapter',
|
|
|
|
action='store_true',
|
|
|
|
help='output chapter-by-chapter breakdown of word counts, including how many words in each chapter are tagged with which status',
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'-a',
|
|
|
|
'--act',
|
|
|
|
action='store_true',
|
|
|
|
help='output act-by-act breakdown of word counts (total only)',
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'-pp',
|
|
|
|
action='store_true',
|
|
|
|
help='run markdown pre-processor, this allows for a multi-file input (e.g. each chapter in its own file), but requires the MarkdownPP python library',
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'markdown_file',
|
|
|
|
type=argparse.FileType('r'),
|
|
|
|
help='The markdown file for the novel, main file if a multi-file novel',
|
|
|
|
)
|
2021-10-23 11:49:23 +00:00
|
|
|
arguments = parser.parse_args()
|
|
|
|
|
2021-10-22 12:28:13 +00:00
|
|
|
mdfile = None
|
2021-10-22 09:37:33 +00:00
|
|
|
|
2021-10-23 11:49:23 +00:00
|
|
|
if arguments.pp:
|
2021-10-22 12:28:13 +00:00
|
|
|
# -pp flag to allow Markdown Preprocessing primarily to allow multi-file novel formatting
|
|
|
|
# this is implemented using a temporary file created using python's buit-in tempfile library
|
2021-10-22 20:44:56 +00:00
|
|
|
import MarkdownPP
|
|
|
|
|
2021-10-22 12:28:13 +00:00
|
|
|
mdfile = tempfile.TemporaryFile(mode='w+')
|
2021-10-23 12:08:12 +00:00
|
|
|
MarkdownPP.MarkdownPP(
|
|
|
|
input=arguments.markdown_file, output=mdfile, modules=list(MarkdownPP.modules)
|
|
|
|
)
|
2021-10-22 12:28:13 +00:00
|
|
|
mdfile.seek(0)
|
|
|
|
else:
|
2021-10-23 12:08:12 +00:00
|
|
|
mdfile = arguments.markdown_file
|
2021-10-22 09:37:33 +00:00
|
|
|
|
2021-10-22 08:41:03 +00:00
|
|
|
chapter_heading = None
|
|
|
|
act_heading = None
|
2021-09-11 16:35:38 +00:00
|
|
|
total_word_count = 0
|
2021-09-11 23:26:56 +00:00
|
|
|
word_count_by_chapter = collections.defaultdict(int)
|
2021-09-11 22:39:20 +00:00
|
|
|
word_count_by_status = collections.defaultdict(int)
|
2021-09-11 23:26:56 +00:00
|
|
|
word_count_by_act = collections.defaultdict(int)
|
2022-01-10 17:59:18 +00:00
|
|
|
status_by_chapter = collections.defaultdict(lambda: collections.defaultdict(int))
|
2021-10-22 09:24:36 +00:00
|
|
|
current_status = None
|
2021-09-11 16:35:38 +00:00
|
|
|
|
2021-10-22 12:28:13 +00:00
|
|
|
for line in mdfile.readlines():
|
2021-09-11 16:35:38 +00:00
|
|
|
if line.startswith(CHAPTER_MARKER):
|
2021-10-22 08:41:03 +00:00
|
|
|
word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
|
|
|
|
total_word_count += word_count_by_chapter[chapter_heading]
|
2021-09-11 23:26:56 +00:00
|
|
|
|
2021-10-22 20:44:56 +00:00
|
|
|
chapter_heading = line[len(CHAPTER_MARKER) :].strip('()\n')
|
2021-09-11 16:35:38 +00:00
|
|
|
|
2021-10-22 20:44:56 +00:00
|
|
|
# Count the words in chapter heading, because the chapter number and title count as words.
|
2022-01-10 17:59:18 +00:00
|
|
|
if chapter_heading:
|
|
|
|
word_count_by_chapter[chapter_heading] = count_words(chapter_heading)
|
|
|
|
current_status = None
|
2021-10-22 20:44:56 +00:00
|
|
|
# Modified to allow multiple statuses in a single chapter, can swap back and forth.
|
|
|
|
elif line.startswith(STATUS_MARKER):
|
|
|
|
if current_status is None:
|
|
|
|
current_status = line[len(STATUS_MARKER) :].strip('()\n')
|
2022-01-10 17:59:18 +00:00
|
|
|
if chapter_heading:
|
2022-01-10 19:49:05 +00:00
|
|
|
status_by_chapter[chapter_heading][current_status] = count_words(
|
|
|
|
chapter_heading
|
|
|
|
)
|
2021-10-22 09:24:36 +00:00
|
|
|
else:
|
2021-10-22 20:44:56 +00:00
|
|
|
current_status = line[len(STATUS_MARKER) :].strip('()\n')
|
2021-09-11 22:39:20 +00:00
|
|
|
elif line.startswith(ACT_MARKER):
|
2021-10-22 20:44:56 +00:00
|
|
|
act_heading = line[len(ACT_MARKER) :].strip('()\n')
|
2021-10-22 08:41:03 +00:00
|
|
|
word_count_by_act[act_heading] = count_words(act_heading)
|
2021-10-22 20:44:56 +00:00
|
|
|
elif line.startswith(COMMENT_MARKER): # Don't count the words in a comment.
|
2021-10-22 08:36:58 +00:00
|
|
|
pass
|
2021-09-11 16:35:38 +00:00
|
|
|
else:
|
2021-09-11 22:39:20 +00:00
|
|
|
line_word_count = count_words(line)
|
2021-10-22 08:41:03 +00:00
|
|
|
word_count_by_chapter[chapter_heading] += line_word_count
|
2021-09-11 23:26:56 +00:00
|
|
|
|
2021-10-22 09:24:36 +00:00
|
|
|
if current_status:
|
|
|
|
word_count_by_status[current_status] += line_word_count
|
|
|
|
status_by_chapter[chapter_heading][current_status] += line_word_count
|
2021-09-11 22:39:20 +00:00
|
|
|
|
2021-10-22 13:37:18 +00:00
|
|
|
mdfile.close()
|
|
|
|
|
2021-09-11 23:26:56 +00:00
|
|
|
# Do some final accounting after the last chapter.
|
2021-10-22 08:41:03 +00:00
|
|
|
word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
|
|
|
|
total_word_count += word_count_by_chapter[chapter_heading]
|
2021-09-11 23:26:56 +00:00
|
|
|
|
2021-10-22 20:44:56 +00:00
|
|
|
# -c or --chapter to give a chapter-by-chapter word count summary.
|
2021-10-23 11:49:23 +00:00
|
|
|
if arguments.chapter:
|
2021-10-22 09:54:19 +00:00
|
|
|
for chapter_heading, chapter_word_count in word_count_by_chapter.items():
|
|
|
|
if chapter_heading is None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if len(status_by_chapter[chapter_heading]) > 1:
|
|
|
|
print(f'chapter {chapter_heading}:')
|
|
|
|
|
|
|
|
for chapter_status, status_count in status_by_chapter[chapter_heading].items():
|
|
|
|
print(f'\t {status_count:,} ({chapter_status})')
|
|
|
|
print(f'\t {chapter_word_count:,} words (total)')
|
|
|
|
elif len(status_by_chapter[chapter_heading]) == 1:
|
|
|
|
chapter_status = list(status_by_chapter[chapter_heading].keys())[0]
|
|
|
|
print(f'chapter {chapter_heading}: {chapter_word_count:,} ({chapter_status})')
|
|
|
|
else:
|
|
|
|
print(f'chapter {chapter_heading}: {chapter_word_count:,}')
|
2021-10-22 09:24:36 +00:00
|
|
|
|
2021-10-22 09:54:19 +00:00
|
|
|
print()
|
2021-09-11 16:35:38 +00:00
|
|
|
|
2021-10-22 20:44:56 +00:00
|
|
|
# -a or --act to give an act-by-act word count summary.
|
2021-10-23 11:49:23 +00:00
|
|
|
if arguments.act:
|
2021-10-22 09:54:19 +00:00
|
|
|
for act_heading, act_word_count in word_count_by_act.items():
|
|
|
|
if act_heading is None:
|
|
|
|
continue
|
2021-09-11 23:26:56 +00:00
|
|
|
|
2021-10-22 20:44:56 +00:00
|
|
|
print(
|
2021-10-23 11:49:23 +00:00
|
|
|
f'act {act_heading}: {act_word_count:,} words (~{act_word_count * 100// total_word_count}%)'
|
2021-10-22 20:44:56 +00:00
|
|
|
)
|
2021-09-11 23:26:56 +00:00
|
|
|
|
2021-10-22 09:54:19 +00:00
|
|
|
print()
|
2021-09-11 22:39:20 +00:00
|
|
|
|
|
|
|
for status, status_word_count in word_count_by_status.items():
|
2021-10-22 20:44:56 +00:00
|
|
|
print(
|
|
|
|
f'{status}: {status_word_count:,} words (~{status_word_count * 100 // total_word_count}%)'
|
|
|
|
)
|
2021-09-11 22:39:20 +00:00
|
|
|
|
2021-10-22 12:28:13 +00:00
|
|
|
print(f'total: {total_word_count:,} words')
|
2021-09-11 16:35:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-09-11 16:57:48 +00:00
|
|
|
main()
|