diff --git a/Jeff spam homework.ipynb b/Jeff spam homework.ipynb
new file mode 100644
index 0000000..94acdf5
--- /dev/null
+++ b/Jeff spam homework.ipynb
@@ -0,0 +1,472 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "import sklearn\n",
+ "from sklearn.cross_validation import train_test_split \n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.naive_bayes import MultinomialNB as mnb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"spambase/spambase.data\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 0.64 | \n",
+ " 0.64.1 | \n",
+ " 0.1 | \n",
+ " 0.32 | \n",
+ " 0.2 | \n",
+ " 0.3 | \n",
+ " 0.4 | \n",
+ " 0.5 | \n",
+ " 0.6 | \n",
+ " ... | \n",
+ " 0.40 | \n",
+ " 0.41 | \n",
+ " 0.42 | \n",
+ " 0.778 | \n",
+ " 0.43 | \n",
+ " 0.44 | \n",
+ " 3.756 | \n",
+ " 61 | \n",
+ " 278 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.21 | \n",
+ " 0.28 | \n",
+ " 0.50 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.28 | \n",
+ " 0.21 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.94 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.132 | \n",
+ " 0 | \n",
+ " 0.372 | \n",
+ " 0.180 | \n",
+ " 0.048 | \n",
+ " 5.114 | \n",
+ " 101 | \n",
+ " 1028 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.06 | \n",
+ " 0.00 | \n",
+ " 0.71 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.19 | \n",
+ " 0.19 | \n",
+ " 0.12 | \n",
+ " 0.64 | \n",
+ " 0.25 | \n",
+ " ... | \n",
+ " 0.01 | \n",
+ " 0.143 | \n",
+ " 0 | \n",
+ " 0.276 | \n",
+ " 0.184 | \n",
+ " 0.010 | \n",
+ " 9.821 | \n",
+ " 485 | \n",
+ " 2259 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 58 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 0.64 0.64.1 0.1 0.32 0.2 0.3 0.4 0.5 0.6 ... 0.40 \\\n",
+ "0 0.21 0.28 0.50 0 0 0.28 0.21 0.07 0.00 0.94 ... 0.00 \n",
+ "1 0.06 0.00 0.71 0 0 0.19 0.19 0.12 0.64 0.25 ... 0.01 \n",
+ "\n",
+ " 0.41 0.42 0.778 0.43 0.44 3.756 61 278 1 \n",
+ "0 0.132 0 0.372 0.180 0.048 5.114 101 1028 1 \n",
+ "1 0.143 0 0.276 0.184 0.010 9.821 485 2259 1 \n",
+ "\n",
+ "[2 rows x 58 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "spam_train, spam_test = train_test_split(df, test_size=.4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 0.64 | \n",
+ " 0.64.1 | \n",
+ " 0.1 | \n",
+ " 0.32 | \n",
+ " 0.2 | \n",
+ " 0.3 | \n",
+ " 0.4 | \n",
+ " 0.5 | \n",
+ " 0.6 | \n",
+ " ... | \n",
+ " 0.40 | \n",
+ " 0.41 | \n",
+ " 0.42 | \n",
+ " 0.778 | \n",
+ " 0.43 | \n",
+ " 0.44 | \n",
+ " 3.756 | \n",
+ " 61 | \n",
+ " 278 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2215 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1.408 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2.6 | \n",
+ " 6 | \n",
+ " 13 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3310 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1.4 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 58 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 0.64 0.64.1 0.1 0.32 0.2 0.3 0.4 0.5 0.6 ... 0.40 0.41 \\\n",
+ "2215 0 0 0 0 0 0 0 0 0 0 ... 1.408 0 \n",
+ "3310 0 0 0 0 0 0 0 0 0 0 ... 0.000 0 \n",
+ "\n",
+ " 0.42 0.778 0.43 0.44 3.756 61 278 1 \n",
+ "2215 0 0 0 0 2.6 6 13 0 \n",
+ "3310 0 0 0 0 1.4 3 7 0 \n",
+ "\n",
+ "[2 rows x 58 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "spam_train.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 0.64 | \n",
+ " 0.64.1 | \n",
+ " 0.1 | \n",
+ " 0.32 | \n",
+ " 0.2 | \n",
+ " 0.3 | \n",
+ " 0.4 | \n",
+ " 0.5 | \n",
+ " 0.6 | \n",
+ " ... | \n",
+ " 0.40 | \n",
+ " 0.41 | \n",
+ " 0.42 | \n",
+ " 0.778 | \n",
+ " 0.43 | \n",
+ " 0.44 | \n",
+ " 3.756 | \n",
+ " 61 | \n",
+ " 278 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 871 | \n",
+ " 0.45 | \n",
+ " 0.00 | \n",
+ " 0.67 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.67 | \n",
+ " 0.0 | \n",
+ " 0.67 | \n",
+ " 0.22 | \n",
+ " 0.22 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0.111 | \n",
+ " 0 | \n",
+ " 1.599 | \n",
+ " 0.148 | \n",
+ " 0.000 | \n",
+ " 4.947 | \n",
+ " 102 | \n",
+ " 564 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1074 | \n",
+ " 0.00 | \n",
+ " 0.55 | \n",
+ " 0.55 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.55 | \n",
+ " 2.2 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.55 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0.165 | \n",
+ " 0 | \n",
+ " 0.496 | \n",
+ " 0.000 | \n",
+ " 0.082 | \n",
+ " 16.826 | \n",
+ " 148 | \n",
+ " 387 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 58 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 0.64 0.64.1 0.1 0.32 0.2 0.3 0.4 0.5 0.6 ... 0.40 \\\n",
+ "871 0.45 0.00 0.67 0 0 0.67 0.0 0.67 0.22 0.22 ... 0 \n",
+ "1074 0.00 0.55 0.55 0 0 0.55 2.2 0.00 0.00 0.55 ... 0 \n",
+ "\n",
+ " 0.41 0.42 0.778 0.43 0.44 3.756 61 278 1 \n",
+ "871 0.111 0 1.599 0.148 0.000 4.947 102 564 1 \n",
+ "1074 0.165 0 0.496 0.000 0.082 16.826 148 387 1 \n",
+ "\n",
+ "[2 rows x 58 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "spam_test.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "training_run = mnb()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "training_run.fit(spam_train.iloc[:, :57], spam_train.iloc[:, -1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.79619565217391308"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "training_run.score(spam_test.iloc[:, :57], spam_test.iloc[:, -1])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Spam detector is coming back with 80% accuracy score."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.4.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/README.md b/README.md
index 3a8b6f2..0821c82 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# Classifying spam
+# Run Jeff spam homework.ipynb using Ipython Notebook
+
+## Classifying spam
## Description