From 04bb589243eb1772b458e070adbc067154f84fdc Mon Sep 17 00:00:00 2001 From: SorenOlegnowicz Date: Thu, 28 May 2015 16:27:24 -0400 Subject: [PATCH 1/2] homework 3 --- .ipynb_checkpoints/Untitled-checkpoint.ipynb | 6 ++ Untitled.ipynb | 56 +++++++++++++++++++ __pycache__/word_frequency.cpython-34.pyc | Bin 0 -> 884 bytes text.txt | 16 ++++++ word_frequency.py | 21 +++++++ 5 files changed, 99 insertions(+) create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb create mode 100644 Untitled.ipynb create mode 100644 __pycache__/word_frequency.cpython-34.pyc create mode 100644 text.txt create mode 100644 word_frequency.py diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..286dcb3 --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..eb2c037 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Project', \"Gutenberg's\", 'The', 'Hound', 'of', 'the', 'Baskervilles,', 'by', 'A.', 'Conan', 'Doyle', 'This', 'eBook', 'is', 'for', 'the', 'use', 'of', 'anyone', 'anywhere', 'at', 'no', 'cost', 'and', 'with', 'almost', 'no', 'restrictions', 'whatsoever.', 'You', 'may', 'copy', 'it,', 'give', 'it', 'away', 'or', 're-use', 'it', 'under', 'the', 'terms', 'of', 'the', 'Project', 'Gutenberg', 'License', 'included', 'with', 'this', 'eBook', 'or', 'online', 'at', 'www.gutenberg.org', 'Title:', 'The', 'Hound', 'of', 'the', 'Baskervilles', 'Author:', 'A.', 'Conan', 'Doyle', 'Posting', 'Date:', 'December', '8,', '2008', '[EBook', '#2852]', 'Release', 'Date:', 'October,', '2001', 'Language:', 'English']\n" + ] + } + ], + "source": [ + "def word_frequency(text):\n", + " with open(text) as sample:\n", + " return sample.read()\n", + "print(word_frequency('text.txt').split())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/__pycache__/word_frequency.cpython-34.pyc b/__pycache__/word_frequency.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f11cffb5fecc783601b038429d31617328321932 GIT binary patch literal 884 zcmYjPOK%e~5FUFU329m$MMCAqagmaoP&q_Vt8$L0LRDBO(Kc(!rkmZ=cF04`DRAa@ z@Rxk;sTY0#Cq6qgbmf`-=Hc=98~?6v*AC_vCpV1fhVFe2fDa(*15}E|C?|@E1f!JG ziqX$qia7~RLyuMn`E-h;qJ27x~@A*2m9>jW}s6KB+T9x3-H^O#52QvUE`M6e^cTr36!~-5I#EKZmMWzv%3NF&3I_#(yCL==O$Gy5B}Ii8yxoAKXOvDEB95ewN1w$1A7G2dZw8=ei1 z97YInJiZ11gBm>EO%8)uvk5vdHVhAAS}|_928Ji3ADs3ut*-Gd#e7rQQe&(4AgT(5 zd7uHw^hOht-s zpiaEIwfRwCLXfCMYWzG?TH4oYf=r8%GAxOB944+(9O}MHu>9XY{qCq;_?3NV#9lJ_ z4F9Mqt!?~rEQ&Z>zf0pwk)+aI(Q#{nvCInXHr%_JJLTW?MrAt5MaNR4YEX6WA$!8> HwMYCfx5~hj literal 0 HcmV?d00001 diff --git a/text.txt b/text.txt new file mode 100644 index 0000000..aa29bca --- /dev/null +++ b/text.txt @@ -0,0 +1,16 @@ +Project Gutenberg's The Hound of the Baskervilles, by A. Conan Doyle + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: The Hound of the Baskervilles + +Author: A. Conan Doyle + +Posting Date: December 8, 2008 [EBook #2852] +Release Date: October, 2001 + +Language: English diff --git a/word_frequency.py b/word_frequency.py new file mode 100644 index 0000000..59ab826 --- /dev/null +++ b/word_frequency.py @@ -0,0 +1,21 @@ +import re + + +with open('sample.txt') as sample: + new_text = sample.read() + +def word_frequency(new_text): + edict = {} + regex_text = re.sub(r'[^A-Za-z\s]', "", new_text).lower().split() + for word in regex_text: + if word in edict: + edict[word] += 1 + else: + edict[word] = 1 + return edict + +print(word_frequency(new_text)) + +def max_dict(a_dict): + t_list = sorted(list(edict.items()), key=lambda x: x[1], reverse=True) + return a[:20] From be875b5e2557d5891698fc7684b8415971604818 Mon Sep 17 00:00:00 2001 From: SorenOlegnowicz Date: Thu, 28 May 2015 16:47:39 -0400 Subject: [PATCH 2/2] Delete text.txt --- text.txt | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 text.txt diff --git a/text.txt b/text.txt deleted file mode 100644 index aa29bca..0000000 --- a/text.txt +++ /dev/null @@ -1,16 +0,0 @@ -Project Gutenberg's The Hound of the Baskervilles, by A. Conan Doyle - -This eBook is for the use of anyone anywhere at no cost and with -almost no restrictions whatsoever. You may copy it, give it away or -re-use it under the terms of the Project Gutenberg License included -with this eBook or online at www.gutenberg.org - - -Title: The Hound of the Baskervilles - -Author: A. Conan Doyle - -Posting Date: December 8, 2008 [EBook #2852] -Release Date: October, 2001 - -Language: English