novel-stats/novel_stats/novel_stats.py

99 lines
3.2 KiB
Python
Executable File

#!/usr/bin/python
import collections
import os
import string
import sys
CHAPTER_MARKER = '## '
STATUS_MARKER = '[status]: # '
ACT_MARKER = '[act]: # '
def count_words(line):
count = 0
for word in line.strip().split(' '):
if not word.strip() or word == '*' or word.startswith('#'):
continue
count += 1
return count
def main():
arguments = sys.argv[1:]
filename = arguments[0]
chapter_number = None
act_number = None
total_word_count = 0
word_count_by_chapter = collections.defaultdict(int)
word_count_by_status = collections.defaultdict(int)
word_count_by_act = collections.defaultdict(int)
status_by_chapter = {}
for line in open(filename).readlines():
if line.startswith(CHAPTER_MARKER):
word_count_by_act[act_number] += word_count_by_chapter[chapter_number]
total_word_count += word_count_by_chapter[chapter_number]
if chapter_number in status_by_chapter:
word_count_by_status[status_by_chapter[chapter_number]] += 1
chapter_number = int(line[len(CHAPTER_MARKER):])
word_count_by_chapter[chapter_number] = 1 # Start at one, because the chapter number itself counts as a word.
if chapter_number in status_by_chapter:
word_count_by_status[chapter_status] += 1
elif line.startswith(STATUS_MARKER):
status_by_chapter[chapter_number] = line[len(STATUS_MARKER):].strip('()\n')
elif line.startswith(ACT_MARKER):
act_number = int(line[len(ACT_MARKER):].strip('()\n'))
word_count_by_act[act_number] = 1
else:
line_word_count = count_words(line)
word_count_by_chapter[chapter_number] += line_word_count
if chapter_number in status_by_chapter:
word_count_by_status[status_by_chapter[chapter_number]] += line_word_count
# Do some final accounting after the last chapter.
word_count_by_act[act_number] += word_count_by_chapter[chapter_number]
total_word_count += word_count_by_chapter[chapter_number]
if chapter_number in status_by_chapter:
word_count_by_status[status_by_chapter[chapter_number]] += 1
# Print out word counts.
for chapter_number, chapter_word_count in word_count_by_chapter.items():
if chapter_number is None:
continue
chapter_status = status_by_chapter.get(chapter_number)
print(
'chapter {}: {:,} words{}'.format(
chapter_number,
chapter_word_count,
' ({})'.format(chapter_status) if chapter_status else '',
)
)
print()
for act_number, act_word_count in word_count_by_act.items():
if act_number is None:
continue
print('act {}: {:,} words (~{}%)'.format(act_number, act_word_count, act_word_count * 100 // total_word_count))
for status, status_word_count in word_count_by_status.items():
print('{}: {:,} words (~{}%)'.format(status, status_word_count, status_word_count * 100 // total_word_count))
print('total: {:,} words'.format(total_word_count))
if __name__ == '__main__':
main()