novel-stats-fork/novel_stats/novel_stats.py

#!/usr/bin/python


import collections
import sys
import os


CHAPTER_MARKER = '## '
STATUS_MARKER = '[status]: # '
ACT_MARKER = '[act]: # '
COMMENT_MARKER = '[//]: # '


def count_words(line):
    count = 0

    for word in line.strip().split(' '):
        if not word.strip() or word == '*' or word.startswith('#'):
            continue

        count += 1

    return count


def main():
    arguments = sys.argv[1:]
    filename = arguments[0]
    tmpfilename = None

    if '-pp' in arguments:
        import MarkdownPP
        tmpfilename = f'.tmp-{os.getpid}.md'
        MarkdownPP.MarkdownPP(input=open(filename), output=open(tmpfilename,'w'), modules=list(MarkdownPP.modules))
        filename = tmpfilename

    chapter_heading = None
    act_heading = None
    total_word_count = 0
    word_count_by_chapter = collections.defaultdict(int)
    word_count_by_status = collections.defaultdict(int)
    word_count_by_act = collections.defaultdict(int)
    status_by_chapter = {}
    current_status = None

    for line in open(filename).readlines():
        if line.startswith(CHAPTER_MARKER):
            word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
            total_word_count += word_count_by_chapter[chapter_heading]

            chapter_heading = line[len(CHAPTER_MARKER):].strip('()\n')

            word_count_by_chapter[chapter_heading] = count_words(chapter_heading)  # Count the words in chapter heading, because the chapter number and title count as words.

            status_by_chapter[chapter_heading] = collections.defaultdict(int)
            current_status = None
        elif line.startswith(STATUS_MARKER):
            if current_status == None:
                current_status = line[len(STATUS_MARKER):].strip('()\n')
                status_by_chapter[chapter_heading][current_status] = count_words(chapter_heading)
            else:
                current_status = line[len(STATUS_MARKER):].strip('()\n')
                status_by_chapter[chapter_heading][current_status] += 0
        elif line.startswith(ACT_MARKER):
            act_heading = line[len(ACT_MARKER):].strip('()\n')
            word_count_by_act[act_heading] = count_words(act_heading)
        elif line.startswith(COMMENT_MARKER):
            pass
        else:
            line_word_count = count_words(line)
            word_count_by_chapter[chapter_heading] += line_word_count

            if current_status:
                word_count_by_status[current_status] += line_word_count
                status_by_chapter[chapter_heading][current_status] += line_word_count

    # Do some final accounting after the last chapter.
    word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading]
    total_word_count += word_count_by_chapter[chapter_heading]

    # Print out word counts.
    for chapter_heading, chapter_word_count in word_count_by_chapter.items():
        if chapter_heading is None:
            continue


        if len(status_by_chapter[chapter_heading]) > 1:
            print(f'chapter {chapter_heading}:')

            for chapter_status, status_count in status_by_chapter[chapter_heading].items():
                print(f'\t {status_count:,} ({chapter_status})')
            print(f'\t {chapter_word_count:,} words (total)')
        elif len(status_by_chapter[chapter_heading]) == 1:
            chapter_status = list(status_by_chapter[chapter_heading].keys())[0]
            print(f'chapter {chapter_heading}: {chapter_word_count:,} ({chapter_status})')
        else:
            print(f'chapter {chapter_heading}: {chapter_word_count:,}')


    print()

    for act_heading, act_word_count in word_count_by_act.items():
        if act_heading is None:
            continue

        print('act {}: {:,} words (~{}%)'.format(act_heading, act_word_count, act_word_count * 100 // total_word_count))

    for status, status_word_count in word_count_by_status.items():
        print('{}: {:,} words (~{}%)'.format(status, status_word_count, status_word_count * 100 // total_word_count))

    print('total: {:,} words'.format(total_word_count))

    if tmpfilename:
        os.remove(tmpfilename)


if __name__ == '__main__':
    main()