forked from witten/novel-stats
Added emdash 0x2014 as a word separator
This commit is contained in:
parent
1c53c94b9c
commit
357eceabad
|
@ -4,6 +4,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import collections
|
import collections
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import re
|
||||||
|
|
||||||
CHAPTER_MARKER = '## '
|
CHAPTER_MARKER = '## '
|
||||||
STATUS_MARKER = '[status]: # '
|
STATUS_MARKER = '[status]: # '
|
||||||
|
@ -11,12 +12,13 @@ ACT_MARKER = '[act]: # '
|
||||||
# Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
|
# Standard markdown comment marker, supported by Pandoc and Calibre's ebook-convert.
|
||||||
COMMENT_MARKER = '[//]: # '
|
COMMENT_MARKER = '[//]: # '
|
||||||
TITLE_MARKER = '# '
|
TITLE_MARKER = '# '
|
||||||
|
WORD_SEPS = [' ','—']
|
||||||
|
|
||||||
|
|
||||||
def count_words(line):
|
def count_words(line):
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for word in line.strip().split(' '):
|
for word in re.split('|'.join(WORD_SEPS), line.strip()):
|
||||||
if not word.strip() or word == '*' or word.startswith('#'):
|
if not word.strip() or word == '*' or word.startswith('#'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue