From 595f5d69914cc08e06c8adaf076b898ccdca3b8e Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Fri, 29 May 2015 11:31:22 -0400 Subject: [PATCH 1/7] first commit to word_frequency --- word_frequency.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 word_frequency.py diff --git a/word_frequency.py b/word_frequency.py new file mode 100644 index 0000000..61b55fe --- /dev/null +++ b/word_frequency.py @@ -0,0 +1,28 @@ +# accept text from sample.txt +# create a dictionary +# collect top 20 words used +# reverse top 20 words +# print top 20 words +# store results in an outside file +import re + +with open ('/Users/lancerogers/homework/word_frequency/word-frequency/sample.txt','r') as file: + file_str = file.read() + + def sample(file_str): + h_dict = {} + for line in file_str.split(): + for words in line: + words = re.sub(r'[^A-Za-z\s]',"", words.lower()) + # words = re.sub('[" "]', '', words.lower()) + if words in h_dict: + h_dict[words] = h_dict[words] + 1 + else: + h_dict[words] = 1 + + return h_dict + + + +h_dict = sample(file_str) +print(h_dict) From 01ffeb5f22ae0cbf968649b3905f3f2ac470ec1c Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Fri, 29 May 2015 12:51:23 -0400 Subject: [PATCH 2/7] almost finished with first person --- word_frequency.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/word_frequency.py b/word_frequency.py index 61b55fe..26c8e76 100644 --- a/word_frequency.py +++ b/word_frequency.py @@ -11,9 +11,17 @@ def sample(file_str): h_dict = {} + #print(file_str) for line in file_str.split(): - for words in line: + #print(line) + for words in line.split(): + words = re.sub(r'[^A-Za-z\s]',"", words.lower()) + try: + words != '' + except: + pass + #print(words) # words = re.sub('[" "]', '', words.lower()) if words in h_dict: h_dict[words] = h_dict[words] + 1 From 453afb40ca3174832ea281da2cc878030d5c314f Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Fri, 29 May 2015 13:05:47 -0400 Subject: [PATCH 3/7] Passes Test --- word_frequency.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/word_frequency.py b/word_frequency.py index 26c8e76..659d65f 100644 --- a/word_frequency.py +++ b/word_frequency.py @@ -8,29 +8,21 @@ with open ('/Users/lancerogers/homework/word_frequency/word-frequency/sample.txt','r') as file: file_str = file.read() - - def sample(file_str): + #print(file_str) + def word_frequency(file_str): h_dict = {} #print(file_str) - for line in file_str.split(): + #for line in file_str.split(): #print(line) - for words in line.split(): + #for words in line.split(): - words = re.sub(r'[^A-Za-z\s]',"", words.lower()) - try: - words != '' - except: - pass - #print(words) - # words = re.sub('[" "]', '', words.lower()) - if words in h_dict: - h_dict[words] = h_dict[words] + 1 + words = re.sub(r'[^A-Za-z\s]',"", file_str).lower().split() + for string in words: + if string in h_dict: + h_dict[string] = h_dict[string] + 1 else: - h_dict[words] = 1 - + h_dict[string] = 1 return h_dict - - -h_dict = sample(file_str) +h_dict = word_frequency(file_str) print(h_dict) From 734e2e3d26a015a0b4794010995c772d9a814980 Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Fri, 29 May 2015 13:09:49 -0400 Subject: [PATCH 4/7] comment spaces --- word_frequency.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/word_frequency.py b/word_frequency.py index 659d65f..4a4fa9f 100644 --- a/word_frequency.py +++ b/word_frequency.py @@ -8,14 +8,10 @@ with open ('/Users/lancerogers/homework/word_frequency/word-frequency/sample.txt','r') as file: file_str = file.read() - #print(file_str) + # print(file_str) def word_frequency(file_str): h_dict = {} - #print(file_str) - #for line in file_str.split(): - #print(line) - #for words in line.split(): - + # print(file_str) words = re.sub(r'[^A-Za-z\s]',"", file_str).lower().split() for string in words: if string in h_dict: From 0582a1731038b5b6f4120df5343ecfa8b6360f9a Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Fri, 29 May 2015 17:06:23 -0400 Subject: [PATCH 5/7] word_frequency top 20 --- word_frequency.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/word_frequency.py b/word_frequency.py index 4a4fa9f..e6d0234 100644 --- a/word_frequency.py +++ b/word_frequency.py @@ -4,6 +4,7 @@ # reverse top 20 words # print top 20 words # store results in an outside file +from collections import Counter import re with open ('/Users/lancerogers/homework/word_frequency/word-frequency/sample.txt','r') as file: @@ -21,4 +22,21 @@ def word_frequency(file_str): return h_dict h_dict = word_frequency(file_str) -print(h_dict) +# print(h_dict) + + +# create a function that takes in a dictionary and returns the top 20 +# occurances in that dictionary +headache = sorted(h_dict.items(), key = lambda x: x[1], reverse = True) +top_twenty = headache[:20] +def strip_list(top_twenty): + for value in top_twenty: + if value != None: + tuple_str = value[0] + tuple_int = value[1] + print("{} {}".format(tuple_str, tuple_int)) + continue + else: + break + return +strip_list(top_twenty) From 87f7f061ec738a424e7e5059145d2baaf9d933ac Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Fri, 29 May 2015 17:08:52 -0400 Subject: [PATCH 6/7] top 20 --- word_frequency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/word_frequency.py b/word_frequency.py index e6d0234..e8a374c 100644 --- a/word_frequency.py +++ b/word_frequency.py @@ -4,7 +4,7 @@ # reverse top 20 words # print top 20 words # store results in an outside file -from collections import Counter + import re with open ('/Users/lancerogers/homework/word_frequency/word-frequency/sample.txt','r') as file: From 5a073b33e03f1becb9e65ddf1022ca15bb0274c1 Mon Sep 17 00:00:00 2001 From: Lance Rogers Date: Sun, 31 May 2015 13:27:35 -0400 Subject: [PATCH 7/7] still working on game --- __pycache__/word_frequency.cpython-34.pyc | Bin 0 -> 1096 bytes .../word_frequency_test.cpython-34-PYTEST.pyc | Bin 0 -> 2635 bytes word_frequency.py | 1 + 3 files changed, 1 insertion(+) create mode 100644 __pycache__/word_frequency.cpython-34.pyc create mode 100644 __pycache__/word_frequency_test.cpython-34-PYTEST.pyc diff --git a/__pycache__/word_frequency.cpython-34.pyc b/__pycache__/word_frequency.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a1237ccb6822e1387cf5a764fe1d4553177f3c0 GIT binary patch literal 1096 zcmbVKO>fgc5S`t`PU1MFs(>n&LM1Law1p#5Re{I_fhtg_iVP((*=^&*iPN>yCTVjj zoVoEIxbtiF%BdHA04LtK&<~DSJL8%0cs}0SpMK4IxDxGrFB1KrYmX262_*drrEmnr zL;;Z&q$H^Vok{v~j{-?rQV%Y^7D?ea^a-{@yZGnzTkVi8(wU&ZC0!!zQm@R($|Y4I zRpw+>A>|=Q1{Kn<(V|M9CDj9uluxRbSFDo8;XD#ZX*CwkF&k~qnj1T*Ht9wj4m;W; z2W)qffj&-5e*;Rj_l-Ur>tSb_yK7f&8V*LWUe6|(6{aJ&9v*;^%QL*w1E`2-E+`}F zVYD28Q=+p1^(3!yvHe`q#G>KBH!?Mp`1RudF}jK1`e!mSl}DTK@t$4f>ZfpGoi{5IUn zPF&1^0H0ZRKQV)lIr6e()Xt9eFq>NED2&HCsAOZ@JLc`L ziC_L**<%Vv?Kn!aRkTSLp(uyf6P~yWu!S&n#2UWgU2QQL>7gz1m9RylLuD!m=R6pX z6Wla0nO4?`GCfFxau+#ar>o5}=W(fPGcc{@5B7sZjbpvVl;vaia!Ej{zWYEdO5ghp Dlb`ka literal 0 HcmV?d00001 diff --git a/__pycache__/word_frequency_test.cpython-34-PYTEST.pyc b/__pycache__/word_frequency_test.cpython-34-PYTEST.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35da02449203a1395c74078bcea29b4596b9b916 GIT binary patch literal 2635 zcmb7G%Z}Vc6fO6AoDL+2&1|3qENH2xvxEgg5fO{FY&~($16EDhX3Ex67NkSAp0Q`?h^1rp3(Sm} zoUp4ic7uhG$$KoFu!cOGj~NS} zsz!5@C@s0DQ*Pu=*1VNM8!mFiX*CnJka&-^YVyMJOleziRdZXEh9|1FWi4yFnKco&w7s^S}2hXP?G zTDrXmTU4tsyi|L>N3>XybuM!6K=7RQ@ub|972gZJzQb5pTgi*8lvT=A<{cswx4#fOU{M=M zpnC>zbPF~Kcfa4A22UmwJS%HaaectCtwbV;eHd0o0+-^zC3#M!0kPD)0^!Cxc0_u7 z;ija-(c+3fw;X8`l~uXtB`dAtiEvXUda6LODO3Y2DW$12Apg4tc}6}3uY}IA)5zM$ zB_)JNs+GONc5!h!4LKN@Xrz)K7ed-!AlXPMCY_#gsH{#`{1tQ&YeEJgEGc%%rI(LL z;SuPK)lHsPPyq3;3=bUlj-Wh5n+c>kRxm_j-YW37ReJ)dxJZ#GjgYMtiSJ7KKw@GEB$A+9TmUJE z$+X%~L{Ms>KpsL(r9iyUs<@U=5mmNv(>vvBpL9k=EE=35Y8)=lT(5E{&%k=)t_WdJ zk5oIUJlJqL@_F&_A$`C_5vj_RiO&S=yJ!r_&*|3jl$MAG`rZMSmn2L%UG^6T#EHB< zQ;?uL>qRG!?>&!8cQak*7-ZjV)&puj>44hDddSdG&`s#s^^}*3N%!y- zu9-LJ5KsFCkp!?nA2K<__X)n{3k=rvmYab69I&4lyTa&EXcb6y6pCO%ZhWgfVC$-%}r047!n#Ro0EDpZ&%(S}*(Shexlm!#t`; z*&6p7o(o*LR3eWsoOD0;dU0%q@2y7EJ67Aadq5A7DK|(ICg0jm;m3?HuwXuT_t`&4 zTfKka-&uSC17Ohx?2g5OV=-XoNE5M0%=)<@=0@+$jWIX5V|#kJK08+GEd{t9yF$IC z%&ljAecTe2i=k&U#NA|D6E`~7_-BBap5Tur)*YjMIKylA&;dgO0~2o@6{G}1yn~A1 zWBk1P{6FY8J$1Plce5y}#YRR^7e>)Wr47i9x_J~)YW<21VOM`h1GzXEC>l+mKEc