diff --git a/__pycache__/word_frequency.cpython-34.pyc b/__pycache__/word_frequency.cpython-34.pyc
new file mode 100644
index 0000000..7a1237c
Binary files /dev/null and b/__pycache__/word_frequency.cpython-34.pyc differ
diff --git a/__pycache__/word_frequency_test.cpython-34-PYTEST.pyc b/__pycache__/word_frequency_test.cpython-34-PYTEST.pyc
new file mode 100644
index 0000000..35da024
Binary files /dev/null and b/__pycache__/word_frequency_test.cpython-34-PYTEST.pyc differ
diff --git a/word_frequency.py b/word_frequency.py
new file mode 100644
index 0000000..d3dd693
--- /dev/null
+++ b/word_frequency.py
@@ -0,0 +1,43 @@
+# accept text from sample.txt
+# create a dictionary
+# collect top 20 words used
+# reverse top 20 words
+# print top 20 words
+# store results in an outside file
+
+import re
+
+with open ('/Users/lancerogers/homework/word_frequency/word-frequency/sample.txt','r') as file:
+    file_str = file.read()
+    # print(file_str)
+    def word_frequency(file_str):
+        h_dict = {}
+        # print(file_str)
+        words = re.sub(r'[^A-Za-z\s]',"", file_str).lower().split()
+        for string in words:
+            if string in h_dict:
+                h_dict[string] = h_dict[string] + 1
+            else:
+                h_dict[string] = 1
+        return h_dict
+
+h_dict = word_frequency(file_str)
+# print(h_dict)
+
+
+# create a function that takes in a dictionary and returns the top 20
+# occurances in that dictionary
+headache = sorted(h_dict.items(), key = lambda x: x[1], reverse = True)
+top_twenty = headache[:20]
+
+def strip_list(top_twenty):
+    for value in top_twenty:
+        if value != None:
+            tuple_str = value[0]
+            tuple_int = value[1]
+            print("{} {}".format(tuple_str, tuple_int))
+            continue
+        else:
+            break
+        return
+strip_list(top_twenty)