From 357eceabad7a0c4590ecc1d8bc1fd5794146d30e Mon Sep 17 00:00:00 2001 From: deroshkin Date: Mon, 22 Nov 2021 21:25:08 +0100 Subject: [PATCH] Added emdash 0x2014 as a word separator --- novel_stats/novel_stats.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/novel_stats/novel_stats.py b/novel_stats/novel_stats.py index 7d660ca..189975d 100755 --- a/novel_stats/novel_stats.py +++ b/novel_stats/novel_stats.py @@ -4,6 +4,7 @@ import argparse import collections import tempfile +import re CHAPTER_MARKER = '## ' STATUS_MARKER = '[status]: # ' @@ -11,12 +12,13 @@ ACT_MARKER = '[act]: # ' # Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert. COMMENT_MARKER = '[//]: # ' TITLE_MARKER = '# ' +WORD_SEPS = [' ','—'] def count_words(line): count = 0 - for word in line.strip().split(' '): + for word in re.split('|'.join(WORD_SEPS), line.strip()): if not word.strip() or word == '*' or word.startswith('#'): continue