Compare commits

...

2 Commits

Author SHA1 Message Date
deroshkin 357eceabad Added emdash 0x2014 as a word separator 2021-11-22 21:25:08 +01:00
Dmytro Yeroshkin 1c53c94b9c Add title to the output 2021-11-01 10:01:31 +01:00
1 changed files with 12 additions and 1 deletions

View File

@ -4,18 +4,21 @@
import argparse import argparse
import collections import collections
import tempfile import tempfile
import re
CHAPTER_MARKER = '## ' CHAPTER_MARKER = '## '
STATUS_MARKER = '[status]: # ' STATUS_MARKER = '[status]: # '
ACT_MARKER = '[act]: # ' ACT_MARKER = '[act]: # '
# Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert. # Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
COMMENT_MARKER = '[//]: # ' COMMENT_MARKER = '[//]: # '
TITLE_MARKER = '# '
WORD_SEPS = [' ','—']
def count_words(line): def count_words(line):
count = 0 count = 0
for word in line.strip().split(' '): for word in re.split('|'.join(WORD_SEPS), line.strip()):
if not word.strip() or word == '*' or word.startswith('#'): if not word.strip() or word == '*' or word.startswith('#'):
continue continue
@ -74,6 +77,7 @@ def main():
word_count_by_act = collections.defaultdict(int) word_count_by_act = collections.defaultdict(int)
status_by_chapter = {} status_by_chapter = {}
current_status = None current_status = None
title = None
for line in mdfile.readlines(): for line in mdfile.readlines():
if line.startswith(CHAPTER_MARKER): if line.startswith(CHAPTER_MARKER):
@ -97,6 +101,10 @@ def main():
elif line.startswith(ACT_MARKER): elif line.startswith(ACT_MARKER):
act_heading = line[len(ACT_MARKER) :].strip('()\n') act_heading = line[len(ACT_MARKER) :].strip('()\n')
word_count_by_act[act_heading] = count_words(act_heading) word_count_by_act[act_heading] = count_words(act_heading)
elif line.startswith(TITLE_MARKER):
title = line[len(TITLE_MARKER):].strip()
line_word_count = count_words(line)
word_count_by_chapter[chapter_heading] += line_word_count
elif line.startswith(COMMENT_MARKER): # Don't count the words in a comment. elif line.startswith(COMMENT_MARKER): # Don't count the words in a comment.
pass pass
else: else:
@ -113,6 +121,9 @@ def main():
word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading] word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
total_word_count += word_count_by_chapter[chapter_heading] total_word_count += word_count_by_chapter[chapter_heading]
if title:
print(f'Novel Stats for {title.upper()}')
# -c or --chapter to give a chapter-by-chapter word count summary. # -c or --chapter to give a chapter-by-chapter word count summary.
if arguments.chapter: if arguments.chapter:
for chapter_heading, chapter_word_count in word_count_by_chapter.items(): for chapter_heading, chapter_word_count in word_count_by_chapter.items():