novel-stats/novel_stats/novel_stats.py

87 lines
2.5 KiB
Python
Executable File

#!/usr/bin/python
import collections
import os
import string
import sys
CHAPTER_MARKER = '## '
STATUS_MARKER = '[status]: # '
ACT_MARKER = '[act]: # '
def count_words(line):
count = 0
for word in line.strip().split(' '):
if not word.strip() or word == '*' or word.startswith('#'):
continue
count += 1
return count
def main():
arguments = sys.argv[1:]
filename = arguments[0]
chapter_number = None
chapter_word_count = 0
chapter_status = None
act_number = None
act_word_count = 0
total_word_count = 0
word_count_by_status = collections.defaultdict(int)
for line in open(filename).readlines():
if line.startswith(CHAPTER_MARKER):
if chapter_number:
print(
'chapter {}: {} words{}'.format(
chapter_number,
chapter_word_count,
' ({})'.format(chapter_status) if chapter_status else ''
)
)
chapter_number = int(line[len(CHAPTER_MARKER):])
act_word_count += chapter_word_count
total_word_count += chapter_word_count
chapter_word_count = 1 # Start at one, because the chapter number itself counts as a word.
if chapter_status:
word_count_by_status[chapter_status] += 1
chapter_status = None # Clear out the status from the previous chapter.
elif line.startswith(STATUS_MARKER):
chapter_status = line[len(STATUS_MARKER):].strip('()\n')
elif line.startswith(ACT_MARKER):
if act_number:
print('act {}: {} words'.format(act_number, act_word_count))
act_number = int(line[len(ACT_MARKER):].strip('()\n'))
act_word_count = 1
else:
line_word_count = count_words(line)
chapter_word_count += line_word_count
if chapter_status:
word_count_by_status[chapter_status] += line_word_count
if chapter_status:
word_count_by_status[chapter_status] += 1
print('chapter {}: {} words'.format(chapter_number, chapter_word_count))
print('act {}: {} words'.format(act_number, act_word_count))
print()
total_word_count += chapter_word_count
for status, status_word_count in word_count_by_status.items():
print('{}: {} words'.format(status, status_word_count))
print('total: {} words'.format(total_word_count))
if __name__ == '__main__':
main()