From 1a535bfa85ff9c6742ede52623cfe7546d3c67e5 Mon Sep 17 00:00:00 2001 From: Jeff Pflueger Date: Mon, 1 May 2017 02:46:28 -0400 Subject: [PATCH] Finished TOOLBOX --- frequency.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/frequency.py b/frequency.py index 68be559..1a4fbd5 100644 --- a/frequency.py +++ b/frequency.py @@ -2,6 +2,7 @@ Project Gutenberg """ import string +import operator def get_word_list(file_name): @@ -10,7 +11,37 @@ def get_word_list(file_name): returns a list of the words used in the book as a list. All words are converted to lower case. """ - pass + #Open file + f = open(file_name, 'r') + lines = f.readlines() + curr_line = 0 + + #Create Dictionary + wordDict = {} + while lines[curr_line].find('START OF THIS PROJECT GUTENBERG EBOOK') == -1: + curr_line += 1 + lines = lines[curr_line+1:] + + #loop through lines and words + for i in lines: + line = i.split() + for j in line: + + #Get rid of punctuation + word = j.strip(string.punctuation) + + #If key exists + try: + #Increment + wordDict[word] += 1 + #Else + except KeyError: + #Create Key + wordDict[word] = 0 + #Return items in dictionary as tuple + return wordDict.items() + + def get_top_n_words(word_list, n): @@ -23,8 +54,14 @@ def get_top_n_words(word_list, n): returns: a list of n most frequently occurring words ordered from most frequently to least frequentlyoccurring """ - pass + #Sort list according to second value of tuple + sortedList = sorted(word_list, key=operator.itemgetter(1)) + #Reverse list + sortedList = sortedList[::-1] + #Return 1st hundred positions + return sortedList[:n] if __name__ == "__main__": print("Running WordFrequency Toolbox") - print(string.punctuation) + wordlist = get_word_list("pg32325.txt") + print(get_top_n_words(wordlist, 100))