Added emdash 0x2014 as a word separator

This commit is contained in:
deroshkin 2021-11-22 21:25:08 +01:00
parent 1c53c94b9c
commit 357eceabad
1 changed files with 3 additions and 1 deletions

View File

@ -4,6 +4,7 @@
import argparse import argparse
import collections import collections
import tempfile import tempfile
import re
CHAPTER_MARKER = '## ' CHAPTER_MARKER = '## '
STATUS_MARKER = '[status]: # ' STATUS_MARKER = '[status]: # '
@ -11,12 +12,13 @@ ACT_MARKER = '[act]: # '
# Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert. # Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
COMMENT_MARKER = '[//]: # ' COMMENT_MARKER = '[//]: # '
TITLE_MARKER = '# ' TITLE_MARKER = '# '
WORD_SEPS = [' ','']
def count_words(line): def count_words(line):
count = 0 count = 0
for word in line.strip().split(' '): for word in re.split('|'.join(WORD_SEPS), line.strip()):
if not word.strip() or word == '*' or word.startswith('#'): if not word.strip() or word == '*' or word.startswith('#'):
continue continue