#!/usr/bin/python import collections import sys CHAPTER_MARKER = '## ' STATUS_MARKER = '[status]: # ' ACT_MARKER = '[act]: # ' COMMENT_MARKER = '[//]: # ' def count_words(line): count = 0 for word in line.strip().split(' '): if not word.strip() or word == '*' or word.startswith('#'): continue count += 1 return count def main(): arguments = sys.argv[1:] filename = arguments[0] chapter_heading = None act_heading = None total_word_count = 0 word_count_by_chapter = collections.defaultdict(int) word_count_by_status = collections.defaultdict(int) word_count_by_act = collections.defaultdict(int) status_by_chapter = {} for line in open(filename).readlines(): if line.startswith(CHAPTER_MARKER): word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading] total_word_count += word_count_by_chapter[chapter_heading] if chapter_heading in status_by_chapter: word_count_by_status[status_by_chapter[chapter_heading]] += 1 chapter_heading = line[len(CHAPTER_MARKER):].strip('()\n') word_count_by_chapter[chapter_heading] = count_words(chapter_heading) # Count the words in chapter heading, because the chapter number and title count as words. elif line.startswith(STATUS_MARKER): status_by_chapter[chapter_heading] = line[len(STATUS_MARKER):].strip('()\n') elif line.startswith(ACT_MARKER): act_heading = line[len(ACT_MARKER):].strip('()\n') word_count_by_act[act_heading] = count_words(act_heading) elif line.startswith(COMMENT_MARKER): pass else: line_word_count = count_words(line) word_count_by_chapter[chapter_heading] += line_word_count if chapter_heading in status_by_chapter: word_count_by_status[status_by_chapter[chapter_heading]] += line_word_count # Do some final accounting after the last chapter. word_count_by_act[act_heading] += word_count_by_chapter[chapter_heading] total_word_count += word_count_by_chapter[chapter_heading] if chapter_heading in status_by_chapter: word_count_by_status[status_by_chapter[chapter_heading]] += 1 # Print out word counts. for chapter_heading, chapter_word_count in word_count_by_chapter.items(): if chapter_heading is None: continue chapter_status = status_by_chapter.get(chapter_heading) print( 'chapter {}: {:,} words{}'.format( chapter_heading, chapter_word_count, ' ({})'.format(chapter_status) if chapter_status else '', ) ) print() for act_heading, act_word_count in word_count_by_act.items(): if act_heading is None: continue print('act {}: {:,} words (~{}%)'.format(act_heading, act_word_count, act_word_count * 100 // total_word_count)) for status, status_word_count in word_count_by_status.items(): print('{}: {:,} words (~{}%)'.format(status, status_word_count, status_word_count * 100 // total_word_count)) print('total: {:,} words'.format(total_word_count)) if __name__ == '__main__': main()