From 0401d67178ec9f627e5d43eb18fd3dd623f22e1b Mon Sep 17 00:00:00 2001 From: Bekk Blando Date: Thu, 28 May 2015 13:15:44 -0400 Subject: [PATCH 1/5] Init Getting Started --- word_frequency.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 word_frequency.py diff --git a/word_frequency.py b/word_frequency.py new file mode 100644 index 0000000..fc0d7a7 --- /dev/null +++ b/word_frequency.py @@ -0,0 +1,3 @@ +def word_frequency(text): + re.sub(r'[^A-Za-z]\s', "", text).lower() + From b7e7ae4ca7f3d086ae5d32d7259940deafe3603c Mon Sep 17 00:00:00 2001 From: Bekk Blando Date: Thu, 28 May 2015 15:20:42 -0400 Subject: [PATCH 2/5] Passes The Test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doesn’t put it into an order yet --- __pycache__/word_frequency.cpython-34.pyc | Bin 0 -> 629 bytes word_frequency.py | 24 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 __pycache__/word_frequency.cpython-34.pyc diff --git a/__pycache__/word_frequency.cpython-34.pyc b/__pycache__/word_frequency.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12cc35b6ada6832712d80493f7b970253c74a82e GIT binary patch literal 629 zcmYjPO>Yx15PkOUrp+e^gb+uLIV2HBqzaWb=OR=^MGGZLcc)AKR)tMjjHW@?8CZK?L&jn&p=@Lp{ zArVW6G(Z~SJLd(U5NU*+z>nvMBT$4{4Lc4U;DU>N{6vQ!PCM{9Y=Xt}Ztb40!&z6j z<_DdUrgWGkHW6Os75>|&e3~hP1!mj_o5Tj#1(->H%PjD?72x)_605h^2n;XZU>jh= zCNZx;k33=b6*vC!fgauT!QOx8g6VlTJX<$Ee|_JJC%yF-bLnvJTk3{RYI1=ouUw>x zThcBvOO;s;gqBt|ZX}8(`S^j~7CId#MVVU{dj1o-$yNTG4n~Hw8Jy63exfFMS`3Co zQZ6XBW-!g{xAJP>^Ly8tZc56NyUHE!-Tv~uQNr}BV4Fyl4?I4E?Vj=x78CK dLCc2@j}}E*DthOC$#|*74Ll86ohRa%{0oc;ks<&9 literal 0 HcmV?d00001 diff --git a/word_frequency.py b/word_frequency.py index fc0d7a7..a23a95a 100644 --- a/word_frequency.py +++ b/word_frequency.py @@ -1,3 +1,23 @@ +import re +import collections +from collections import OrderedDict + + def word_frequency(text): - re.sub(r'[^A-Za-z]\s', "", text).lower() - + clean_text = re.sub(r'[^A-Za-z\s]', "", text).lower().split() + # print(type(clean_text)) + # print(clean_text) + word_count = {} + for word in clean_text: + if word in word_count: + word_count[word] = word_count[word] + 1 + else: + word_count[word] = 1 + #new_word_count = OrderedDict(reversed(sorted(word_count.items(), key=lambda t: t[1]))) + #print(new_word_count) + return word_count + +with open('sample.txt') as file_text: + text = file_text.read() + +print(word_frequency(text)) From 10149bacc1bf5d3640fa445a6c87d9e72694f831 Mon Sep 17 00:00:00 2001 From: Bekk Blando Date: Thu, 28 May 2015 16:26:09 -0400 Subject: [PATCH 3/5] Passes Test and Gives First Twenty --- __pycache__/word_frequency.cpython-34.pyc | Bin 629 -> 1002 bytes word_frequency.py | 21 +++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/__pycache__/word_frequency.cpython-34.pyc b/__pycache__/word_frequency.cpython-34.pyc index 12cc35b6ada6832712d80493f7b970253c74a82e..e7e4831ae050a0b5b83a479c0129f90c22b644d6 100644 GIT binary patch literal 1002 zcmY*Y%}(1u5T5lPA%8$As<^cWROJ9srCusUv<1#o6+%=78X0>CPBwPf-9Qp0C)#tR zUit`CAD}O@*Pgia0eb3;qXJ#cjOUx3*`04}|E|``-^ZQ9uP(qZc(BUkzaiB>$s{BK zBtUrQ3kXFKcR&j0I}kb`I+Wo;LgK+E@;yKw5+7~^d|8Ds01TkJ05`N%f+ITe{fE(O zz(qjHEOA=5%)8|5oo>Jhomzpg3RnTGLWf`t(1mUtu*ON^(!dceaf}X20{HB}I>!ke zG!|~Db=YqRTY0AvRG93gv55soX$7PUq`Rd08yN%8rBg^8m{JpTk;C0|UA8j;DOrc< z&0HA0f+=_T0%jgeNgU_}ppMxgxK3G5=5FiqCD+`8BjbVb!2}$ekAvn#V-KPbM}{K~=>e%Oks$)aWc{Gw>?@D(51=^+ zqnLGJ{_HjnH7ecQ!Cr4iMp=?K_wsny$IR$vCp8ztv*vZKlFf64mqX0raRJ+RaBDEO zrIw8PXGyfZ#ut;qC~MFC;<7j&X2y!tdJMyQhS%-;jeE|DCu8gXH@HYK z{g@1hXKo+@Q4>|M=G28Np8JY7CS@J<2NJi;$lP+Ev8?$2u$4HM5@VC*nJ$_SE9Yqz z$u@O?s;ISIKENzoJWnMSEn81vl&BT1_lTHM%j7Dbv40iGA%Qskr(Y1N$V8hJtJkJa>!+d7GmBN>4{4*W#Ax09+3wu>x248_E z09OX8fUm-g!1EfoDo_==8qBbifrd}u?1E;2Vp~S^pc52pjy8YWwiO&GqACV=7CPB| z93Q}xz;SaQ9%PuJOK_(Fs|$d16E|=C!SbL4$d;smu;wVI6XUXVRCL?L$yL%!;e+6G zM@c0{Q#l#lq-q$CC`;9(9|jx?3a4Z4=Dd)$qs6O~*OgJs{1;%x;)k4^+k#!+GCCO2 z$h+6Xtu`hgJF-SBVv=>qmYIdhB$u>%{g9UUm^(UCQA!Oj4nykkAdV7-Uj=3@TJly! mJTz7NEQmNWguG0qC}qpN*dK Date: Tue, 21 Jul 2015 20:54:49 -0400 Subject: [PATCH 4/5] Cleaned Up Code --- __pycache__/word_frequency.cpython-34.pyc | Bin 1002 -> 1798 bytes word_frequency.py | 29 ++++++++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/__pycache__/word_frequency.cpython-34.pyc b/__pycache__/word_frequency.cpython-34.pyc index e7e4831ae050a0b5b83a479c0129f90c22b644d6..613e306c83749a0e4bbbb5fcc7fcc142a9308ef6 100644 GIT binary patch literal 1798 zcmY*ZPjB5s5TEt?8^0t?0;TkV;839_Z4;FnQV69Dmr4Yos-&1E(LQ_Mdw#LK$?m$2 zqvU`J=Y9_E-1sKu$f*}TKu`R}A!)H^#xt`svpcgh{%3h5Jp5(;?BRguZ+h}o5o&!5fJ&gmP)EghR?*5UaHs{=cJl? zZKcXGU~U}rnKM%508`AQDgX(vmde5v4qi^Q3@18PrPJVbAt#78sbvgnU}3zBO_?P! z(aOps1!Dl(NuA;xR_Y9%`XzH9wG5}m%BeNEoM|s-@Sm#$=zuS^#L?Lc2t}F$@ii4= z8zhzKRHg;M8;P|r^)mHNE=uH+Y1LA2>9KA}R?A!gwanAm+{-raoQ?BZ7XO{GFtWg@ zLe9oa0gN8W8U}>GqG_`+CAg}PR<)v2NZ27h&`F2GyLnrU(4|ET$pEmopQBtL9)u;G zTBbP@UcmJ+MWsM%5M<^V4Zz6ISEx_vWTgt^%0i_9@Txdy#Zf?7IgRJwQ(S&EH;g$* zEvppwUg1t;ShYv2QU2PLj5ZeBOn8RC5%4k4-GSB`fG*Ix=+A(gc+p{cDad!< zX%Z*}T?TY^NEh7YKhkA~&c0)#-GguGvP%~oy6DnzK=ujw9*f-3%ynC?UT4om|9WNr z^fd_~e!}PZ=@SC%pMFLA?0|E?HN`1d04d|gleVj}4&*OQBj8lsqYpN@k)+k^Z&VS06x}%rxiM z3@wkR(@E2J#(I2vsn@w151X#nXFdvJOlNV_@*7wDbyU_hU5*`%u5|525gHXI#_t82 zi8NX*bk#0gsos^$F7XLg|DWFmbWVbh_)OduD|nYhh_@rwfx}=3zK@snw}VyaKJVF& zVSgSvm7ipK)1Ub_A3`x+w>%TgC^lK9W1pJBwOL1_XOSleg0_flo0j>0T;{;1`fzB++_V%ON{rZKL#Hr3&FZjv(7 cJKSFGBamezvDWD?^*hU-+!7nzdyv<~ziyJ~Qvd(} literal 1002 zcmY*Y%}(1u5T5lPA%8$As<^cWROJ9srCusUv<1#o6+%=78X0>CPBwPf-9Qp0C)#tR zUit`CAD}O@*Pgia0eb3;qXJ#cjOUx3*`04}|E|``-^ZQ9uP(qZc(BUkzaiB>$s{BK zBtUrQ3kXFKcR&j0I}kb`I+Wo;LgK+E@;yKw5+7~^d|8Ds01TkJ05`N%f+ITe{fE(O zz(qjHEOA=5%)8|5oo>Jhomzpg3RnTGLWf`t(1mUtu*ON^(!dceaf}X20{HB}I>!ke zG!|~Db=YqRTY0AvRG93gv55soX$7PUq`Rd08yN%8rBg^8m{JpTk;C0|UA8j;DOrc< z&0HA0f+=_T0%jgeNgU_}ppMxgxK3G5=5FiqCD+`8BjbVb!2}$ekAvn#V-KPbM}{K~=>e%Oks$)aWc{Gw>?@D(51=^+ zqnLGJ{_HjnH7ecQ!Cr4iMp=?K_wsny$IR$vCp8ztv*vZKlFf64mqX0raRJ+RaBDEO zrIw8PXGyfZ#ut;qC~MFC;<7j&X2y!tdJMyQhS%-;jeE|DCu8gXH@HYK z{g@1hXKo+@Q4>|M=G28Np8JY7CS@J<2NJi;$lP+Ev8?$2u$4HM5@VC*nJ$_SE9Yqz z$u@O?s;ISIKENzoJWnMSEn81vl&BT1_lTHM%j7Dbv40iGA% Date: Tue, 18 Aug 2015 10:04:30 -0400 Subject: [PATCH 5/5] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4af6898..9672c93 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +Learned more complicated python # Calculate word frequency ## Description