novel-stats/novel_stats/novel_stats.py

134 lines
4.8 KiB
Python
Raw Normal View History

2021-09-11 16:35:38 +00:00
#!/usr/bin/python
import collections
2021-09-11 16:35:38 +00:00
import sys
2021-10-22 20:44:56 +00:00
import tempfile
2021-09-11 16:35:38 +00:00
CHAPTER_MARKER = '## '
STATUS_MARKER = '[status]: # '
ACT_MARKER = '[act]: # '
2021-10-22 20:44:56 +00:00
# Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
COMMENT_MARKER = '[//]: # '
2021-09-11 16:35:38 +00:00
def count_words(line):
count = 0
for word in line.strip().split(' '):
if not word.strip() or word == '*' or word.startswith('#'):
continue
count += 1
return count
2021-09-11 16:57:48 +00:00
def main():
arguments = sys.argv[1:]
2021-09-11 16:35:38 +00:00
filename = arguments[0]
mdfile = None
2021-10-22 09:37:33 +00:00
if '-pp' in arguments:
# -pp flag to allow Markdown Preprocessing primarily to allow multi-file novel formatting
# this is implemented using a temporary file created using python's buit-in tempfile library
2021-10-22 20:44:56 +00:00
import MarkdownPP
mdfile = tempfile.TemporaryFile(mode='w+')
MarkdownPP.MarkdownPP(input=open(filename), output=mdfile, modules=list(MarkdownPP.modules))
mdfile.seek(0)
else:
mdfile = open(filename)
2021-10-22 09:37:33 +00:00
chapter_heading = None
act_heading = None
2021-09-11 16:35:38 +00:00
total_word_count = 0
word_count_by_chapter = collections.defaultdict(int)
word_count_by_status = collections.defaultdict(int)
word_count_by_act = collections.defaultdict(int)
status_by_chapter = {}
2021-10-22 09:24:36 +00:00
current_status = None
2021-09-11 16:35:38 +00:00
for line in mdfile.readlines():
2021-09-11 16:35:38 +00:00
if line.startswith(CHAPTER_MARKER):
word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
total_word_count += word_count_by_chapter[chapter_heading]
2021-10-22 20:44:56 +00:00
chapter_heading = line[len(CHAPTER_MARKER) :].strip('()\n')
2021-09-11 16:35:38 +00:00
2021-10-22 20:44:56 +00:00
# Count the words in chapter heading, because the chapter number and title count as words.
word_count_by_chapter[chapter_heading] = count_words(chapter_heading)
2021-10-22 09:24:36 +00:00
status_by_chapter[chapter_heading] = collections.defaultdict(int)
current_status = None
2021-10-22 20:44:56 +00:00
# Modified to allow multiple statuses in a single chapter, can swap back and forth.
elif line.startswith(STATUS_MARKER):
if current_status is None:
current_status = line[len(STATUS_MARKER) :].strip('()\n')
2021-10-22 09:24:36 +00:00
status_by_chapter[chapter_heading][current_status] = count_words(chapter_heading)
else:
2021-10-22 20:44:56 +00:00
current_status = line[len(STATUS_MARKER) :].strip('()\n')
elif line.startswith(ACT_MARKER):
2021-10-22 20:44:56 +00:00
act_heading = line[len(ACT_MARKER) :].strip('()\n')
word_count_by_act[act_heading] = count_words(act_heading)
2021-10-22 20:44:56 +00:00
elif line.startswith(COMMENT_MARKER): # Don't count the words in a comment.
2021-10-22 08:36:58 +00:00
pass
2021-09-11 16:35:38 +00:00
else:
line_word_count = count_words(line)
word_count_by_chapter[chapter_heading] += line_word_count
2021-10-22 09:24:36 +00:00
if current_status:
word_count_by_status[current_status] += line_word_count
status_by_chapter[chapter_heading][current_status] += line_word_count
2021-10-22 13:37:18 +00:00
mdfile.close()
# Do some final accounting after the last chapter.
word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
total_word_count += word_count_by_chapter[chapter_heading]
2021-10-22 20:44:56 +00:00
# -c or --chapter to give a chapter-by-chapter word count summary.
if '-c' in arguments or '--chapter' in arguments:
2021-10-22 09:54:19 +00:00
for chapter_heading, chapter_word_count in word_count_by_chapter.items():
if chapter_heading is None:
continue
if len(status_by_chapter[chapter_heading]) > 1:
print(f'chapter {chapter_heading}:')
for chapter_status, status_count in status_by_chapter[chapter_heading].items():
print(f'\t {status_count:,} ({chapter_status})')
print(f'\t {chapter_word_count:,} words (total)')
elif len(status_by_chapter[chapter_heading]) == 1:
chapter_status = list(status_by_chapter[chapter_heading].keys())[0]
print(f'chapter {chapter_heading}: {chapter_word_count:,} ({chapter_status})')
else:
print(f'chapter {chapter_heading}: {chapter_word_count:,}')
2021-10-22 09:24:36 +00:00
2021-10-22 09:54:19 +00:00
print()
2021-09-11 16:35:38 +00:00
2021-10-22 20:44:56 +00:00
# -a or --act to give an act-by-act word count summary.
if '-a' in arguments or '--act' in arguments:
2021-10-22 09:54:19 +00:00
for act_heading, act_word_count in word_count_by_act.items():
if act_heading is None:
continue
2021-10-22 20:44:56 +00:00
print(
'act {}: {:,} words (~{}%)'.format(
act_heading, act_word_count, act_word_count * 100 // total_word_count
)
)
2021-10-22 09:54:19 +00:00
print()
for status, status_word_count in word_count_by_status.items():
2021-10-22 20:44:56 +00:00
print(
f'{status}: {status_word_count:,} words (~{status_word_count * 100 // total_word_count}%)'
)
print(f'total: {total_word_count:,} words')
2021-09-11 16:35:38 +00:00
if __name__ == '__main__':
2021-09-11 16:57:48 +00:00
main()