Added emdash 0x2014 as a word separator

Add title to the output
2021-11-22 21:25:08 +01:00 · 2021-11-01 10:01:31 +01:00
1 changed files with 12 additions and 1 deletions
--- a/novel_stats/novel_stats.py
+++ b/novel_stats/novel_stats.py
@ -4,18 +4,21 @@
 import argparse
 import collections
 import tempfile
+import re

 CHAPTER_MARKER = '## '
 STATUS_MARKER = '[status]: # '
 ACT_MARKER = '[act]: # '
 # Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
 COMMENT_MARKER = '[//]: # '
+TITLE_MARKER = '# '
+WORD_SEPS = [' ','—']


 def count_words(line):
    count = 0

-    for word in line.strip().split(' '):
+    for word in re.split('|'.join(WORD_SEPS), line.strip()):
        if not word.strip() or word == '*' or word.startswith('#'):
            continue

@ -74,6 +77,7 @@ def main():
    word_count_by_act = collections.defaultdict(int)
    status_by_chapter = {}
    current_status = None
+    title = None

    for line in mdfile.readlines():
        if line.startswith(CHAPTER_MARKER):
@ -97,6 +101,10 @@ def main():
        elif line.startswith(ACT_MARKER):
            act_heading = line[len(ACT_MARKER) :].strip('()\n')
            word_count_by_act[act_heading] = count_words(act_heading)
+        elif line.startswith(TITLE_MARKER):
+            title = line[len(TITLE_MARKER):].strip()
+            line_word_count = count_words(line)
+            word_count_by_chapter[chapter_heading] += line_word_count
        elif line.startswith(COMMENT_MARKER):  # Don't count the words in a comment.
            pass
        else:
@ -113,6 +121,9 @@ def main():
    word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
    total_word_count += word_count_by_chapter[chapter_heading]

+    if title:
+        print(f'Novel Stats for {title.upper()}')
+
    # -c or --chapter to give a chapter-by-chapter word count summary.
    if arguments.chapter:
        for chapter_heading, chapter_word_count in word_count_by_chapter.items():
Author	SHA1	Message	Date
deroshkin	357eceabad	Added emdash 0x2014 as a word separator	2021-11-22 21:25:08 +01:00
Dmytro Yeroshkin	1c53c94b9c	Add title to the output	2021-11-01 10:01:31 +01:00