diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
index 0808166..b361fa2 100644
--- a/your-code/challenge-1.ipynb
+++ b/your-code/challenge-1.ipynb
@@ -66,11 +66,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This is a test string with a URL special characters numbers and multiple whitespaces\n"
+     ]
+    }
+   ],
    "source": [
-    "def clean_up(s):\n",
+    "import re\n",
+    "\n",
+    "\n",
+    "def clean_up(text):\n",
+    "    text = re.sub(r'http\\S+|www\\.\\S+', '', text)\n",
+    "    text = re.sub(r\"[^a-zA-Z\\s']\", ' ', text)\n",
+    "    text = re.sub(r'\\s+', ' ', text).strip()\n",
+    "    return text\n",
+    "   \n",
+    "    \n",
+    "    \n",
     "    \"\"\"\n",
     "    Cleans up numbers, URLs, and special characters from a string.\n",
     "\n",
@@ -79,7 +97,13 @@
     "\n",
     "    Returns:\n",
     "        A string that has been cleaned up.\n",
-    "    \"\"\""
+    "    \"\"\"\n",
+    "    \n",
+    "test_string = \"This is a test string with a URL http://example.com, special characters #@!, numbers 123, and multiple    whitespaces.\"\n",
+    "\n",
+    "cleaned_text = clean_up(test_string)\n",
+    "\n",
+    "print(cleaned_text)\n"
    ]
   },
   {
@@ -101,12 +125,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['ironhack', 's', 'q', 'website', 'is']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to\n",
+      "[nltk_data]     C:\\Users\\User\\AppData\\Roaming\\nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'\\n    Tokenize a string.\\n\\n    Args:\\n        s: String to be tokenized.\\n\\n    Returns:\\n        A list of words as the result of tokenization.\\n'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "def tokenize(s):\n",
-    "    \"\"\"\n",
+    "import nltk\n",
+    "from nltk.tokenize import word_tokenize\n",
+    "\n",
+    "nltk.download('punkt')\n",
+    "\n",
+    "\n",
+    "def tokenize(text):\n",
+    "    return word_tokenize(text)    \n",
+    "    \n",
+    "\n",
+    "    \n",
+    "test_string = \"ironhack s  q website  is\"\n",
+    "tokens = tokenize(test_string)\n",
+    "print(tokens)    \n",
+    "    \n",
+    "\n",
+    "\"\"\"\n",
     "    Tokenize a string.\n",
     "\n",
     "    Args:\n",
@@ -114,7 +180,7 @@
     "\n",
     "    Returns:\n",
     "        A list of words as the result of tokenization.\n",
-    "    \"\"\""
+    "\"\"\""
    ]
   },
   {
@@ -145,11 +211,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def stem_and_lemmatize(l):\n",
+    "import nltk\n",
+    "from nltk.stem import PorterStemmer, WordNetLemmatizer\n",
+    "\n",
+    "\n",
+    "\n",
+    "def stem_and_lemmatize(words):\n",
+    "    \n",
+    "    stemmer = PorterStemmer()\n",
+    "    lemmatizer = WordNetLemmatizer()\n",
+    "    \n",
+    "    stemmed = [stemmer.stem(word) for word in words]\n",
+    "    lemmatized = [lemmatizer.lemmatize(word) for word in words]\n",
+    "    \n",
+    "    return stemmed, lemmatized\n",
     "    \"\"\"\n",
     "    Perform stemming and lemmatization on a list of words.\n",
     "\n",
@@ -176,11 +255,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package stopwords to\n",
+      "[nltk_data]     C:\\Users\\User\\AppData\\Roaming\\nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n"
+     ]
+    }
+   ],
    "source": [
-    "def remove_stopwords(l):\n",
+    "import nltk\n",
+    "from nltk.corpus import stopwords\n",
+    "\n",
+    "nltk.download('stopwords')\n",
+    "\n",
+    "\n",
+    "def remove_stopwords(words):\n",
+    "    stop_words = set(stopwords.words('english'))\n",
+    "    \n",
+    "    clean_words = [word for word in words if word.lower() not in stop_words]\n",
+    "    \n",
+    "    return clean_words\n",
     "    \"\"\"\n",
     "    Remove English stopwords from a list of strings.\n",
     "\n",
@@ -204,7 +304,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -218,7 +318,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,
diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb
index 6b0e116..0770d69 100644
--- a/your-code/challenge-2.ipynb
+++ b/your-code/challenge-2.ipynb
@@ -46,11 +46,227 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 71,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>target</th>\n",
+       "      <th>id</th>\n",
+       "      <th>date</th>\n",
+       "      <th>flag</th>\n",
+       "      <th>user</th>\n",
+       "      <th>text</th>\n",
+       "      <th>is_positive</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467810672</td>\n",
+       "      <td>Mon Apr 06 22:19:49 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>scotthamilton</td>\n",
+       "      <td>is upset that he can't update his Facebook by ...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467810917</td>\n",
+       "      <td>Mon Apr 06 22:19:53 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>mattycus</td>\n",
+       "      <td>@Kenichan I dived many times for the ball. Man...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467811184</td>\n",
+       "      <td>Mon Apr 06 22:19:57 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>ElleCTF</td>\n",
+       "      <td>my whole body feels itchy and like its on fire</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467811193</td>\n",
+       "      <td>Mon Apr 06 22:19:57 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>Karoli</td>\n",
+       "      <td>@nationwideclass no, it's not behaving at all....</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467811372</td>\n",
+       "      <td>Mon Apr 06 22:20:00 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>joy_wolf</td>\n",
+       "      <td>@Kwesidei not the whole crew</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1599994</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2193601966</td>\n",
+       "      <td>Tue Jun 16 08:40:49 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>AmandaMarie1028</td>\n",
+       "      <td>Just woke up. Having no school is the best fee...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1599995</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2193601969</td>\n",
+       "      <td>Tue Jun 16 08:40:49 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>TheWDBoards</td>\n",
+       "      <td>TheWDB.com - Very cool to hear old Walt interv...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1599996</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2193601991</td>\n",
+       "      <td>Tue Jun 16 08:40:49 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>bpbabe</td>\n",
+       "      <td>Are you ready for your MoJo Makeover? Ask me f...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1599997</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2193602064</td>\n",
+       "      <td>Tue Jun 16 08:40:49 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>tinydiamondz</td>\n",
+       "      <td>Happy 38th Birthday to my boo of alll time!!! ...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1599998</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2193602129</td>\n",
+       "      <td>Tue Jun 16 08:40:50 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>RyanTrevMorris</td>\n",
+       "      <td>happy #charitytuesday @theNSPCC @SparksCharity...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1599999 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         target          id                          date      flag  \\\n",
+       "0             0  1467810672  Mon Apr 06 22:19:49 PDT 2009  NO_QUERY   \n",
+       "1             0  1467810917  Mon Apr 06 22:19:53 PDT 2009  NO_QUERY   \n",
+       "2             0  1467811184  Mon Apr 06 22:19:57 PDT 2009  NO_QUERY   \n",
+       "3             0  1467811193  Mon Apr 06 22:19:57 PDT 2009  NO_QUERY   \n",
+       "4             0  1467811372  Mon Apr 06 22:20:00 PDT 2009  NO_QUERY   \n",
+       "...         ...         ...                           ...       ...   \n",
+       "1599994       4  2193601966  Tue Jun 16 08:40:49 PDT 2009  NO_QUERY   \n",
+       "1599995       4  2193601969  Tue Jun 16 08:40:49 PDT 2009  NO_QUERY   \n",
+       "1599996       4  2193601991  Tue Jun 16 08:40:49 PDT 2009  NO_QUERY   \n",
+       "1599997       4  2193602064  Tue Jun 16 08:40:49 PDT 2009  NO_QUERY   \n",
+       "1599998       4  2193602129  Tue Jun 16 08:40:50 PDT 2009  NO_QUERY   \n",
+       "\n",
+       "                    user                                               text  \\\n",
+       "0          scotthamilton  is upset that he can't update his Facebook by ...   \n",
+       "1               mattycus  @Kenichan I dived many times for the ball. Man...   \n",
+       "2                ElleCTF    my whole body feels itchy and like its on fire    \n",
+       "3                 Karoli  @nationwideclass no, it's not behaving at all....   \n",
+       "4               joy_wolf                      @Kwesidei not the whole crew    \n",
+       "...                  ...                                                ...   \n",
+       "1599994  AmandaMarie1028  Just woke up. Having no school is the best fee...   \n",
+       "1599995      TheWDBoards  TheWDB.com - Very cool to hear old Walt interv...   \n",
+       "1599996           bpbabe  Are you ready for your MoJo Makeover? Ask me f...   \n",
+       "1599997     tinydiamondz  Happy 38th Birthday to my boo of alll time!!! ...   \n",
+       "1599998   RyanTrevMorris  happy #charitytuesday @theNSPCC @SparksCharity...   \n",
+       "\n",
+       "         is_positive  \n",
+       "0              False  \n",
+       "1              False  \n",
+       "2              False  \n",
+       "3              False  \n",
+       "4              False  \n",
+       "...              ...  \n",
+       "1599994         True  \n",
+       "1599995         True  \n",
+       "1599996         True  \n",
+       "1599997         True  \n",
+       "1599998         True  \n",
+       "\n",
+       "[1599999 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# your code here"
+    "import pandas as pd\n",
+    "import re\n",
+    "import nltk\n",
+    "from nltk.corpus import stopwords\n",
+    "from nltk.stem import PorterStemmer, WordNetLemmatizer\n",
+    "from nltk.probability import FreqDist\n",
+    "from nltk.classify import NaiveBayesClassifier\n",
+    "from nltk.classify.util import accuracy as nltk_accuracy\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "# your code here\n",
+    "df = pd.read_csv(r'C:\\Users\\User\\Desktop\\iRonhack\\Classes at Iron\\Week 20\\Day 1\\lab-nlp\\your-code\\twitter_data.csv', encoding='ISO-8859-1')\n",
+    "\n",
+    "df.columns = ['target','id','date','flag','user','text']\n",
+    "df\n",
+    "\n",
+    "\n",
+    "#map\n",
+    "df['is_positive'] = df['target'].map({0: False, 4: True})\n",
+    "df"
    ]
   },
   {
@@ -76,11 +292,538 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 72,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>target</th>\n",
+       "      <th>id</th>\n",
+       "      <th>date</th>\n",
+       "      <th>flag</th>\n",
+       "      <th>user</th>\n",
+       "      <th>text</th>\n",
+       "      <th>is_positive</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>541200</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2200003313</td>\n",
+       "      <td>Tue Jun 16 18:18:13 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>DEWGetMeTho77</td>\n",
+       "      <td>@Nkluvr4eva My poor little dumpling  In Holmde...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>750</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467998601</td>\n",
+       "      <td>Mon Apr 06 23:11:18 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>Young_J</td>\n",
+       "      <td>I'm off too bed. I gotta wake up hella early t...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>766711</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2300049112</td>\n",
+       "      <td>Tue Jun 23 13:40:12 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>dougnawoschik</td>\n",
+       "      <td>I havent been able to listen to it yet  My spe...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>285055</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1993474319</td>\n",
+       "      <td>Mon Jun 01 10:26:09 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>thireven</td>\n",
+       "      <td>now remembers why solving a relatively big equ...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>705995</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2256551006</td>\n",
+       "      <td>Sat Jun 20 12:56:51 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>taracollins086</td>\n",
+       "      <td>Ate too much, feel sick</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1374482</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2051447103</td>\n",
+       "      <td>Fri Jun 05 22:02:36 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>_Jaska</td>\n",
+       "      <td>@girlwonder24 Thanks.</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>667014</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2245469948</td>\n",
+       "      <td>Fri Jun 19 16:10:39 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>julianicolao</td>\n",
+       "      <td>trying to study for the biggest test, next wee...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1451234</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2063022808</td>\n",
+       "      <td>Sun Jun 07 01:05:46 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>ElaineToni</td>\n",
+       "      <td>Just finished watching Your Song Presents: Boy...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1181412</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1982082859</td>\n",
+       "      <td>Sun May 31 10:29:36 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>lindseyrd20</td>\n",
+       "      <td>@janfran813 awww i can't wait to get one</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>517910</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2191411932</td>\n",
+       "      <td>Tue Jun 16 05:13:13 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>serraannisa</td>\n",
+       "      <td>doing nothing</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>20000 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         target          id                          date      flag  \\\n",
+       "541200        0  2200003313  Tue Jun 16 18:18:13 PDT 2009  NO_QUERY   \n",
+       "750           0  1467998601  Mon Apr 06 23:11:18 PDT 2009  NO_QUERY   \n",
+       "766711        0  2300049112  Tue Jun 23 13:40:12 PDT 2009  NO_QUERY   \n",
+       "285055        0  1993474319  Mon Jun 01 10:26:09 PDT 2009  NO_QUERY   \n",
+       "705995        0  2256551006  Sat Jun 20 12:56:51 PDT 2009  NO_QUERY   \n",
+       "...         ...         ...                           ...       ...   \n",
+       "1374482       4  2051447103  Fri Jun 05 22:02:36 PDT 2009  NO_QUERY   \n",
+       "667014        0  2245469948  Fri Jun 19 16:10:39 PDT 2009  NO_QUERY   \n",
+       "1451234       4  2063022808  Sun Jun 07 01:05:46 PDT 2009  NO_QUERY   \n",
+       "1181412       4  1982082859  Sun May 31 10:29:36 PDT 2009  NO_QUERY   \n",
+       "517910        0  2191411932  Tue Jun 16 05:13:13 PDT 2009  NO_QUERY   \n",
+       "\n",
+       "                   user                                               text  \\\n",
+       "541200    DEWGetMeTho77  @Nkluvr4eva My poor little dumpling  In Holmde...   \n",
+       "750             Young_J  I'm off too bed. I gotta wake up hella early t...   \n",
+       "766711    dougnawoschik  I havent been able to listen to it yet  My spe...   \n",
+       "285055         thireven  now remembers why solving a relatively big equ...   \n",
+       "705995   taracollins086                           Ate too much, feel sick    \n",
+       "...                 ...                                                ...   \n",
+       "1374482          _Jaska                             @girlwonder24 Thanks.    \n",
+       "667014     julianicolao  trying to study for the biggest test, next wee...   \n",
+       "1451234      ElaineToni  Just finished watching Your Song Presents: Boy...   \n",
+       "1181412     lindseyrd20          @janfran813 awww i can't wait to get one    \n",
+       "517910      serraannisa                                     doing nothing    \n",
+       "\n",
+       "         is_positive  \n",
+       "541200         False  \n",
+       "750            False  \n",
+       "766711         False  \n",
+       "285055         False  \n",
+       "705995         False  \n",
+       "...              ...  \n",
+       "1374482         True  \n",
+       "667014         False  \n",
+       "1451234         True  \n",
+       "1181412         True  \n",
+       "517910         False  \n",
+       "\n",
+       "[20000 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# your code here"
+    "import re\n",
+    "import nltk\n",
+    "from nltk.tokenize import word_tokenize\n",
+    "import nltk\n",
+    "from nltk.stem import PorterStemmer, WordNetLemmatizer\n",
+    "import nltk\n",
+    "from nltk.corpus import stopwords\n",
+    "import random\n",
+    "\n",
+    "\n",
+    "# your code here\n",
+    "\n",
+    "\n",
+    "df_sampled = df.sample(n=20000, random_state=42)\n",
+    "df_sampled\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>target</th>\n",
+       "      <th>id</th>\n",
+       "      <th>date</th>\n",
+       "      <th>flag</th>\n",
+       "      <th>user</th>\n",
+       "      <th>text</th>\n",
+       "      <th>is_positive</th>\n",
+       "      <th>clean</th>\n",
+       "      <th>token</th>\n",
+       "      <th>stemmed_and_lemmatized</th>\n",
+       "      <th>text_processed</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>541200</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2200003313</td>\n",
+       "      <td>Tue Jun 16 18:18:13 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>DEWGetMeTho77</td>\n",
+       "      <td>@Nkluvr4eva My poor little dumpling  In Holmde...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Nkluvr eva My poor little dumpling In Holmdel ...</td>\n",
+       "      <td>[Nkluvr, eva, My, poor, little, dumpling, In, ...</td>\n",
+       "      <td>[Nkluvr, eva, My, poor, little, dumpling, In, ...</td>\n",
+       "      <td>[Nkluvr, eva, poor, little, dumpling, Holmdel,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>750</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1467998601</td>\n",
+       "      <td>Mon Apr 06 23:11:18 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>Young_J</td>\n",
+       "      <td>I'm off too bed. I gotta wake up hella early t...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>I'm off too bed I gotta wake up hella early to...</td>\n",
+       "      <td>[I, 'm, off, too, bed, I, got, ta, wake, up, h...</td>\n",
+       "      <td>[I, 'm, off, too, bed, I, got, ta, wake, up, h...</td>\n",
+       "      <td>['m, bed, got, ta, wake, hella, early, tomorro...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>766711</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2300049112</td>\n",
+       "      <td>Tue Jun 23 13:40:12 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>dougnawoschik</td>\n",
+       "      <td>I havent been able to listen to it yet  My spe...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>I havent been able to listen to it yet My spea...</td>\n",
+       "      <td>[I, havent, been, able, to, listen, to, it, ye...</td>\n",
+       "      <td>[I, havent, been, able, to, listen, to, it, ye...</td>\n",
+       "      <td>[havent, able, listen, yet, speaker, busted]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>285055</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1993474319</td>\n",
+       "      <td>Mon Jun 01 10:26:09 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>thireven</td>\n",
+       "      <td>now remembers why solving a relatively big equ...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>now remembers why solving a relatively big equ...</td>\n",
+       "      <td>[now, remembers, why, solving, a, relatively, ...</td>\n",
+       "      <td>[now, remembers, why, solving, a, relatively, ...</td>\n",
+       "      <td>[remembers, solving, relatively, big, equation...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>705995</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2256551006</td>\n",
+       "      <td>Sat Jun 20 12:56:51 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>taracollins086</td>\n",
+       "      <td>Ate too much, feel sick</td>\n",
+       "      <td>False</td>\n",
+       "      <td>Ate too much feel sick</td>\n",
+       "      <td>[Ate, too, much, feel, sick]</td>\n",
+       "      <td>[Ate, too, much, feel, sick]</td>\n",
+       "      <td>[Ate, much, feel, sick]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1374482</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2051447103</td>\n",
+       "      <td>Fri Jun 05 22:02:36 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>_Jaska</td>\n",
+       "      <td>@girlwonder24 Thanks.</td>\n",
+       "      <td>True</td>\n",
+       "      <td>girlwonder Thanks</td>\n",
+       "      <td>[girlwonder, Thanks]</td>\n",
+       "      <td>[girlwonder, Thanks]</td>\n",
+       "      <td>[girlwonder, Thanks]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>667014</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2245469948</td>\n",
+       "      <td>Fri Jun 19 16:10:39 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>julianicolao</td>\n",
+       "      <td>trying to study for the biggest test, next wee...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>trying to study for the biggest test next week...</td>\n",
+       "      <td>[trying, to, study, for, the, biggest, test, n...</td>\n",
+       "      <td>[trying, to, study, for, the, biggest, test, n...</td>\n",
+       "      <td>[trying, study, biggest, test, next, week, n't...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1451234</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2063022808</td>\n",
+       "      <td>Sun Jun 07 01:05:46 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>ElaineToni</td>\n",
+       "      <td>Just finished watching Your Song Presents: Boy...</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Just finished watching Your Song Presents Boys...</td>\n",
+       "      <td>[Just, finished, watching, Your, Song, Present...</td>\n",
+       "      <td>[Just, finished, watching, Your, Song, Present...</td>\n",
+       "      <td>[finished, watching, Song, Presents, Boystown]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1181412</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1982082859</td>\n",
+       "      <td>Sun May 31 10:29:36 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>lindseyrd20</td>\n",
+       "      <td>@janfran813 awww i can't wait to get one</td>\n",
+       "      <td>True</td>\n",
+       "      <td>janfran awww i can't wait to get one</td>\n",
+       "      <td>[janfran, awww, i, ca, n't, wait, to, get, one]</td>\n",
+       "      <td>[janfran, awww, i, ca, n't, wait, to, get, one]</td>\n",
+       "      <td>[janfran, awww, ca, n't, wait, get, one]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>517910</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2191411932</td>\n",
+       "      <td>Tue Jun 16 05:13:13 PDT 2009</td>\n",
+       "      <td>NO_QUERY</td>\n",
+       "      <td>serraannisa</td>\n",
+       "      <td>doing nothing</td>\n",
+       "      <td>False</td>\n",
+       "      <td>doing nothing</td>\n",
+       "      <td>[doing, nothing]</td>\n",
+       "      <td>[doing, nothing]</td>\n",
+       "      <td>[nothing]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>20000 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         target          id                          date      flag  \\\n",
+       "541200        0  2200003313  Tue Jun 16 18:18:13 PDT 2009  NO_QUERY   \n",
+       "750           0  1467998601  Mon Apr 06 23:11:18 PDT 2009  NO_QUERY   \n",
+       "766711        0  2300049112  Tue Jun 23 13:40:12 PDT 2009  NO_QUERY   \n",
+       "285055        0  1993474319  Mon Jun 01 10:26:09 PDT 2009  NO_QUERY   \n",
+       "705995        0  2256551006  Sat Jun 20 12:56:51 PDT 2009  NO_QUERY   \n",
+       "...         ...         ...                           ...       ...   \n",
+       "1374482       4  2051447103  Fri Jun 05 22:02:36 PDT 2009  NO_QUERY   \n",
+       "667014        0  2245469948  Fri Jun 19 16:10:39 PDT 2009  NO_QUERY   \n",
+       "1451234       4  2063022808  Sun Jun 07 01:05:46 PDT 2009  NO_QUERY   \n",
+       "1181412       4  1982082859  Sun May 31 10:29:36 PDT 2009  NO_QUERY   \n",
+       "517910        0  2191411932  Tue Jun 16 05:13:13 PDT 2009  NO_QUERY   \n",
+       "\n",
+       "                   user                                               text  \\\n",
+       "541200    DEWGetMeTho77  @Nkluvr4eva My poor little dumpling  In Holmde...   \n",
+       "750             Young_J  I'm off too bed. I gotta wake up hella early t...   \n",
+       "766711    dougnawoschik  I havent been able to listen to it yet  My spe...   \n",
+       "285055         thireven  now remembers why solving a relatively big equ...   \n",
+       "705995   taracollins086                           Ate too much, feel sick    \n",
+       "...                 ...                                                ...   \n",
+       "1374482          _Jaska                             @girlwonder24 Thanks.    \n",
+       "667014     julianicolao  trying to study for the biggest test, next wee...   \n",
+       "1451234      ElaineToni  Just finished watching Your Song Presents: Boy...   \n",
+       "1181412     lindseyrd20          @janfran813 awww i can't wait to get one    \n",
+       "517910      serraannisa                                     doing nothing    \n",
+       "\n",
+       "         is_positive                                              clean  \\\n",
+       "541200         False  Nkluvr eva My poor little dumpling In Holmdel ...   \n",
+       "750            False  I'm off too bed I gotta wake up hella early to...   \n",
+       "766711         False  I havent been able to listen to it yet My spea...   \n",
+       "285055         False  now remembers why solving a relatively big equ...   \n",
+       "705995         False                             Ate too much feel sick   \n",
+       "...              ...                                                ...   \n",
+       "1374482         True                                  girlwonder Thanks   \n",
+       "667014         False  trying to study for the biggest test next week...   \n",
+       "1451234         True  Just finished watching Your Song Presents Boys...   \n",
+       "1181412         True               janfran awww i can't wait to get one   \n",
+       "517910         False                                      doing nothing   \n",
+       "\n",
+       "                                                     token  \\\n",
+       "541200   [Nkluvr, eva, My, poor, little, dumpling, In, ...   \n",
+       "750      [I, 'm, off, too, bed, I, got, ta, wake, up, h...   \n",
+       "766711   [I, havent, been, able, to, listen, to, it, ye...   \n",
+       "285055   [now, remembers, why, solving, a, relatively, ...   \n",
+       "705995                        [Ate, too, much, feel, sick]   \n",
+       "...                                                    ...   \n",
+       "1374482                               [girlwonder, Thanks]   \n",
+       "667014   [trying, to, study, for, the, biggest, test, n...   \n",
+       "1451234  [Just, finished, watching, Your, Song, Present...   \n",
+       "1181412    [janfran, awww, i, ca, n't, wait, to, get, one]   \n",
+       "517910                                    [doing, nothing]   \n",
+       "\n",
+       "                                    stemmed_and_lemmatized  \\\n",
+       "541200   [Nkluvr, eva, My, poor, little, dumpling, In, ...   \n",
+       "750      [I, 'm, off, too, bed, I, got, ta, wake, up, h...   \n",
+       "766711   [I, havent, been, able, to, listen, to, it, ye...   \n",
+       "285055   [now, remembers, why, solving, a, relatively, ...   \n",
+       "705995                        [Ate, too, much, feel, sick]   \n",
+       "...                                                    ...   \n",
+       "1374482                               [girlwonder, Thanks]   \n",
+       "667014   [trying, to, study, for, the, biggest, test, n...   \n",
+       "1451234  [Just, finished, watching, Your, Song, Present...   \n",
+       "1181412    [janfran, awww, i, ca, n't, wait, to, get, one]   \n",
+       "517910                                    [doing, nothing]   \n",
+       "\n",
+       "                                            text_processed  \n",
+       "541200   [Nkluvr, eva, poor, little, dumpling, Holmdel,...  \n",
+       "750      ['m, bed, got, ta, wake, hella, early, tomorro...  \n",
+       "766711        [havent, able, listen, yet, speaker, busted]  \n",
+       "285055   [remembers, solving, relatively, big, equation...  \n",
+       "705995                             [Ate, much, feel, sick]  \n",
+       "...                                                    ...  \n",
+       "1374482                               [girlwonder, Thanks]  \n",
+       "667014   [trying, study, biggest, test, next, week, n't...  \n",
+       "1451234     [finished, watching, Song, Presents, Boystown]  \n",
+       "1181412           [janfran, awww, ca, n't, wait, get, one]  \n",
+       "517910                                           [nothing]  \n",
+       "\n",
+       "[20000 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def clean_up(text):\n",
+    "    text = re.sub(r'http\\S+|www\\.\\S+', '', text)\n",
+    "    text = re.sub(r\"[^a-zA-Z\\s']\", ' ', text)\n",
+    "    text = re.sub(r'\\s+', ' ', text).strip()\n",
+    "    return text\n",
+    "\n",
+    "def tokenize(text):\n",
+    "    return nltk.word_tokenize(text)\n",
+    "\n",
+    "def stem_and_lemmatize(words):\n",
+    "    stemmer = PorterStemmer()\n",
+    "    lemmatizer = WordNetLemmatizer()\n",
+    "    stemmed = [stemmer.stem(word) for word in words]\n",
+    "    lemmatized = [lemmatizer.lemmatize(word) for word in words]\n",
+    "    return lemmatized  \n",
+    "\n",
+    "def remove_stopwords(words):\n",
+    "    stop_words = set(stopwords.words('english'))\n",
+    "    return [word for word in words if word.lower() not in stop_words]\n",
+    "\n",
+    "\n",
+    "\n",
+    "df_sampled['clean'] = df_sampled['text'].apply(clean_up)\n",
+    "df_sampled['token'] = df_sampled['clean'].apply(tokenize)\n",
+    "df_sampled['stemmed_and_lemmatized'] = df_sampled['token'].apply(stem_and_lemmatize)\n",
+    "df_sampled['text_processed'] = df_sampled['stemmed_and_lemmatized'].apply(remove_stopwords)\n",
+    "\n",
+    "df_sampled\n",
+    "\n"
    ]
   },
   {
@@ -98,11 +841,1048 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 74,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Top 5,000 words example (first 10): [(\"n't\", 2268), (\"'s\", 2150), (\"'m\", 1610), ('wa', 1412), ('day', 1217), ('get', 1044), ('like', 945), ('good', 933), ('quot', 921), ('go', 898), ('work', 824), ('today', 772), ('love', 756), ('got', 741), ('time', 729), ('one', 695), ('going', 693), ('u', 683), ('know', 680), ('back', 595), ('want', 571), ('amp', 567), ('really', 550), ('think', 535), ('night', 535), ('im', 532), ('see', 528), ('na', 506), (\"'ll\", 488), ('ca', 487), ('lol', 483), ('home', 477), ('new', 476), ('still', 475), ('well', 468), ('much', 466), ('ha', 462), ('need', 459), ('feel', 437), ('miss', 435), ('last', 409), ('make', 408), ('tomorrow', 400), (\"'re\", 375), ('great', 361), ('would', 359), ('morning', 357), ('bad', 337), ('fun', 326), ('sad', 323), ('sleep', 322), ('come', 315), ('wish', 313), ('week', 312), ('tonight', 307), ('say', 305), ('right', 303), ('thing', 298), ('oh', 297), ('friend', 296), ('could', 295), ('nice', 295), ('though', 294), ('haha', 294), (\"'\", 293), ('thanks', 290), (\"'ve\", 285), ('wait', 281), ('gon', 275), ('bed', 274), ('look', 274), ('hope', 274), ('better', 269), ('way', 268), ('lt', 267), ('getting', 266), ('hate', 259), ('twitter', 258), ('people', 251), ('hour', 249), ('sorry', 237), ('weekend', 232), ('Thanks', 228), ('show', 226), ('little', 225), ('happy', 225), ('next', 222), ('school', 221), ('Good', 220), ('doe', 218), ('Oh', 216), ('sick', 215), ('even', 214), ('take', 211), ('dont', 209), ('watching', 208), ('guy', 207), ('working', 207), ('LOL', 205), ('soon', 205)]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[\"n't\",\n",
+       " \"'s\",\n",
+       " \"'m\",\n",
+       " 'wa',\n",
+       " 'day',\n",
+       " 'get',\n",
+       " 'like',\n",
+       " 'good',\n",
+       " 'quot',\n",
+       " 'go',\n",
+       " 'work',\n",
+       " 'today',\n",
+       " 'love',\n",
+       " 'got',\n",
+       " 'time',\n",
+       " 'one',\n",
+       " 'going',\n",
+       " 'u',\n",
+       " 'know',\n",
+       " 'back',\n",
+       " 'want',\n",
+       " 'amp',\n",
+       " 'really',\n",
+       " 'think',\n",
+       " 'night',\n",
+       " 'im',\n",
+       " 'see',\n",
+       " 'na',\n",
+       " \"'ll\",\n",
+       " 'ca',\n",
+       " 'lol',\n",
+       " 'home',\n",
+       " 'new',\n",
+       " 'still',\n",
+       " 'well',\n",
+       " 'much',\n",
+       " 'ha',\n",
+       " 'need',\n",
+       " 'feel',\n",
+       " 'miss',\n",
+       " 'last',\n",
+       " 'make',\n",
+       " 'tomorrow',\n",
+       " \"'re\",\n",
+       " 'great',\n",
+       " 'would',\n",
+       " 'morning',\n",
+       " 'bad',\n",
+       " 'fun',\n",
+       " 'sad',\n",
+       " 'sleep',\n",
+       " 'come',\n",
+       " 'wish',\n",
+       " 'week',\n",
+       " 'tonight',\n",
+       " 'say',\n",
+       " 'right',\n",
+       " 'thing',\n",
+       " 'oh',\n",
+       " 'friend',\n",
+       " 'could',\n",
+       " 'nice',\n",
+       " 'though',\n",
+       " 'haha',\n",
+       " \"'\",\n",
+       " 'thanks',\n",
+       " \"'ve\",\n",
+       " 'wait',\n",
+       " 'gon',\n",
+       " 'bed',\n",
+       " 'look',\n",
+       " 'hope',\n",
+       " 'better',\n",
+       " 'way',\n",
+       " 'lt',\n",
+       " 'getting',\n",
+       " 'hate',\n",
+       " 'twitter',\n",
+       " 'people',\n",
+       " 'hour',\n",
+       " 'sorry',\n",
+       " 'weekend',\n",
+       " 'Thanks',\n",
+       " 'show',\n",
+       " 'little',\n",
+       " 'happy',\n",
+       " 'next',\n",
+       " 'school',\n",
+       " 'Good',\n",
+       " 'doe',\n",
+       " 'Oh',\n",
+       " 'sick',\n",
+       " 'even',\n",
+       " 'take',\n",
+       " 'dont',\n",
+       " 'watching',\n",
+       " 'guy',\n",
+       " 'working',\n",
+       " 'LOL',\n",
+       " 'soon',\n",
+       " 'life',\n",
+       " 'girl',\n",
+       " 'cant',\n",
+       " 'watch',\n",
+       " 'year',\n",
+       " 'x',\n",
+       " 'always',\n",
+       " 'movie',\n",
+       " 'already',\n",
+       " 'everyone',\n",
+       " 'tweet',\n",
+       " 'long',\n",
+       " 'yeah',\n",
+       " 'tired',\n",
+       " 'first',\n",
+       " 'never',\n",
+       " 'wan',\n",
+       " 'suck',\n",
+       " 'sure',\n",
+       " 'start',\n",
+       " 'awesome',\n",
+       " 'find',\n",
+       " 'let',\n",
+       " 'something',\n",
+       " 'yet',\n",
+       " 'phone',\n",
+       " 'best',\n",
+       " 'pretty',\n",
+       " \"'d\",\n",
+       " 'away',\n",
+       " 'done',\n",
+       " 'feeling',\n",
+       " 'cool',\n",
+       " 'old',\n",
+       " 'song',\n",
+       " 'man',\n",
+       " 'looking',\n",
+       " 'sun',\n",
+       " 'thought',\n",
+       " 'please',\n",
+       " 'yes',\n",
+       " 'another',\n",
+       " 'bit',\n",
+       " 'lot',\n",
+       " 'help',\n",
+       " 'hurt',\n",
+       " 'wo',\n",
+       " 'house',\n",
+       " 'made',\n",
+       " 'ever',\n",
+       " 'keep',\n",
+       " 'n',\n",
+       " 'ya',\n",
+       " 'game',\n",
+       " 'ready',\n",
+       " 'went',\n",
+       " 'guess',\n",
+       " 'th',\n",
+       " 'ok',\n",
+       " 'early',\n",
+       " 'mean',\n",
+       " 'follow',\n",
+       " 'sound',\n",
+       " 'hard',\n",
+       " 'pic',\n",
+       " 'rain',\n",
+       " 'left',\n",
+       " 'summer',\n",
+       " 'Im',\n",
+       " 'hey',\n",
+       " 'thank',\n",
+       " 'missed',\n",
+       " 'ur',\n",
+       " 'trying',\n",
+       " 'lost',\n",
+       " 'big',\n",
+       " 'stuff',\n",
+       " 'w',\n",
+       " 'ta',\n",
+       " 'said',\n",
+       " 'late',\n",
+       " 'Ca',\n",
+       " 'party',\n",
+       " 'call',\n",
+       " 'video',\n",
+       " 'tell',\n",
+       " 'someone',\n",
+       " 'baby',\n",
+       " 'yesterday',\n",
+       " 'play',\n",
+       " 'car',\n",
+       " 'found',\n",
+       " 'luck',\n",
+       " 'mom',\n",
+       " 'nothing',\n",
+       " 'maybe',\n",
+       " 'many',\n",
+       " 'birthday',\n",
+       " 'Twitter',\n",
+       " 'follower',\n",
+       " 'weather',\n",
+       " 'two',\n",
+       " 'also',\n",
+       " 'bored',\n",
+       " 'exam',\n",
+       " 'gone',\n",
+       " 'might',\n",
+       " 'waiting',\n",
+       " 'read',\n",
+       " 'funny',\n",
+       " 'hot',\n",
+       " 'world',\n",
+       " 'gt',\n",
+       " 'finally',\n",
+       " 'job',\n",
+       " 'damn',\n",
+       " 'since',\n",
+       " 'excited',\n",
+       " 'later',\n",
+       " 'amazing',\n",
+       " 'check',\n",
+       " 'hear',\n",
+       " 'family',\n",
+       " 'making',\n",
+       " 'live',\n",
+       " 'head',\n",
+       " 'saw',\n",
+       " 'talk',\n",
+       " 'Thank',\n",
+       " 'Got',\n",
+       " 'coming',\n",
+       " 'anything',\n",
+       " 'Going',\n",
+       " 'cold',\n",
+       " 'Well',\n",
+       " 'try',\n",
+       " 'almost',\n",
+       " 'end',\n",
+       " 'give',\n",
+       " 'around',\n",
+       " 'boy',\n",
+       " 'thats',\n",
+       " 'put',\n",
+       " 'till',\n",
+       " 'tho',\n",
+       " 'leave',\n",
+       " 'glad',\n",
+       " 'use',\n",
+       " 'beautiful',\n",
+       " 'dad',\n",
+       " 'far',\n",
+       " 'b',\n",
+       " 'book',\n",
+       " 'place',\n",
+       " 'fan',\n",
+       " 'lunch',\n",
+       " 'missing',\n",
+       " 'cry',\n",
+       " 'must',\n",
+       " 'stop',\n",
+       " 'least',\n",
+       " 'Hope',\n",
+       " 'stay',\n",
+       " 'music',\n",
+       " 'Happy',\n",
+       " 'free',\n",
+       " 'forward',\n",
+       " 'picture',\n",
+       " 'xx',\n",
+       " 'wanted',\n",
+       " 'food',\n",
+       " 'Hey',\n",
+       " 'iPhone',\n",
+       " 'update',\n",
+       " 'Sorry',\n",
+       " 'r',\n",
+       " 'class',\n",
+       " 'woke',\n",
+       " 'omg',\n",
+       " 'yay',\n",
+       " 'may',\n",
+       " 'eat',\n",
+       " 'kid',\n",
+       " 'busy',\n",
+       " 'cause',\n",
+       " 'anymore',\n",
+       " 'totally',\n",
+       " 'thinking',\n",
+       " 'headache',\n",
+       " 'actually',\n",
+       " 'dog',\n",
+       " 'dinner',\n",
+       " 'minute',\n",
+       " 'U',\n",
+       " 'okay',\n",
+       " 'sweet',\n",
+       " 'lovely',\n",
+       " 'shit',\n",
+       " 'New',\n",
+       " 'ill',\n",
+       " 'idea',\n",
+       " 'win',\n",
+       " 'poor',\n",
+       " 'without',\n",
+       " 'came',\n",
+       " 'month',\n",
+       " 'word',\n",
+       " 'Love',\n",
+       " 'hair',\n",
+       " 'cute',\n",
+       " 'wrong',\n",
+       " 'Day',\n",
+       " 'wow',\n",
+       " 'believe',\n",
+       " 'face',\n",
+       " 'anyone',\n",
+       " 'Sunday',\n",
+       " 'name',\n",
+       " 'every',\n",
+       " 'everything',\n",
+       " 'able',\n",
+       " 'playing',\n",
+       " 'didnt',\n",
+       " 'sooo',\n",
+       " 'kinda',\n",
+       " 'buy',\n",
+       " 'Yeah',\n",
+       " 'part',\n",
+       " 'p',\n",
+       " 'Morning',\n",
+       " 'mine',\n",
+       " 'finished',\n",
+       " 'room',\n",
+       " 'mileycyrus',\n",
+       " 'else',\n",
+       " 'alone',\n",
+       " 'eye',\n",
+       " 'listening',\n",
+       " 'either',\n",
+       " 'ticket',\n",
+       " 'enough',\n",
+       " 'heard',\n",
+       " 'stupid',\n",
+       " 'OMG',\n",
+       " 'outside',\n",
+       " 'following',\n",
+       " 'hug',\n",
+       " 'mind',\n",
+       " 'meet',\n",
+       " 'true',\n",
+       " 'eating',\n",
+       " 'Monday',\n",
+       " 'final',\n",
+       " 'coffee',\n",
+       " 'study',\n",
+       " 'real',\n",
+       " 'computer',\n",
+       " 'blog',\n",
+       " 'post',\n",
+       " 'break',\n",
+       " 'person',\n",
+       " 'enjoy',\n",
+       " 'Haha',\n",
+       " 'reading',\n",
+       " 'whole',\n",
+       " 'hand',\n",
+       " 'dream',\n",
+       " 'hehe',\n",
+       " 'talking',\n",
+       " 'album',\n",
+       " 'aww',\n",
+       " 'crazy',\n",
+       " 'Yes',\n",
+       " 'reply',\n",
+       " 'Watching',\n",
+       " 'Still',\n",
+       " 'probably',\n",
+       " 'photo',\n",
+       " 'add',\n",
+       " 'plan',\n",
+       " 'rest',\n",
+       " 'Damn',\n",
+       " 'half',\n",
+       " 'using',\n",
+       " 'taking',\n",
+       " 'text',\n",
+       " 'side',\n",
+       " 'hahaha',\n",
+       " 'Friday',\n",
+       " 'run',\n",
+       " 'fine',\n",
+       " 'stuck',\n",
+       " 'heart',\n",
+       " 'seen',\n",
+       " 'news',\n",
+       " 'full',\n",
+       " 'god',\n",
+       " 'forgot',\n",
+       " 'hit',\n",
+       " 'Great',\n",
+       " 'seems',\n",
+       " 'seeing',\n",
+       " 'hi',\n",
+       " 'trip',\n",
+       " 'course',\n",
+       " 'pain',\n",
+       " 'kind',\n",
+       " 'money',\n",
+       " 'change',\n",
+       " 'beach',\n",
+       " 'told',\n",
+       " 'nite',\n",
+       " 'started',\n",
+       " 'shopping',\n",
+       " 'hopefully',\n",
+       " 'super',\n",
+       " 'took',\n",
+       " 'problem',\n",
+       " 'brother',\n",
+       " 'site',\n",
+       " 'boring',\n",
+       " 'com',\n",
+       " 'send',\n",
+       " 'used',\n",
+       " 'train',\n",
+       " 'pm',\n",
+       " 'tried',\n",
+       " 'nap',\n",
+       " 'died',\n",
+       " 'quite',\n",
+       " 'remember',\n",
+       " 'reason',\n",
+       " 'pay',\n",
+       " 'finish',\n",
+       " 'soo',\n",
+       " 'bought',\n",
+       " 'afternoon',\n",
+       " 'sister',\n",
+       " 'link',\n",
+       " 'ago',\n",
+       " 'P',\n",
+       " 'raining',\n",
+       " 'LOVE',\n",
+       " 'instead',\n",
+       " 'rock',\n",
+       " 'til',\n",
+       " 'crap',\n",
+       " 'Back',\n",
+       " 'drink',\n",
+       " 'cuz',\n",
+       " 'couple',\n",
+       " 'point',\n",
+       " 'Get',\n",
+       " 'concert',\n",
+       " 'drive',\n",
+       " 'tommcfly',\n",
+       " 'dude',\n",
+       " 'jealous',\n",
+       " 'running',\n",
+       " 'lmao',\n",
+       " 'boo',\n",
+       " 'welcome',\n",
+       " 'test',\n",
+       " 'sore',\n",
+       " 'Yay',\n",
+       " 'tv',\n",
+       " 'loved',\n",
+       " 'evening',\n",
+       " 'hell',\n",
+       " 'page',\n",
+       " 'yea',\n",
+       " 'walk',\n",
+       " 'season',\n",
+       " 'wonder',\n",
+       " 'list',\n",
+       " 'store',\n",
+       " 'anyway',\n",
+       " 'awake',\n",
+       " 'move',\n",
+       " 'wont',\n",
+       " 'studying',\n",
+       " 'sunny',\n",
+       " 'breakfast',\n",
+       " 'friday',\n",
+       " 'definitely',\n",
+       " 'water',\n",
+       " 'wake',\n",
+       " 'Hi',\n",
+       " 'God',\n",
+       " 'monday',\n",
+       " 'cat',\n",
+       " 'asleep',\n",
+       " 'mum',\n",
+       " 'bring',\n",
+       " 'open',\n",
+       " 'le',\n",
+       " 'leaving',\n",
+       " 'ugh',\n",
+       " 'chocolate',\n",
+       " 'hr',\n",
+       " 'email',\n",
+       " 'moment',\n",
+       " 'office',\n",
+       " 'second',\n",
+       " 'shower',\n",
+       " 'smile',\n",
+       " 'Lol',\n",
+       " 'Ugh',\n",
+       " 'hungry',\n",
+       " 'broke',\n",
+       " 'Wish',\n",
+       " 'clean',\n",
+       " 'cut',\n",
+       " 'ddlovato',\n",
+       " 'gym',\n",
+       " 'ride',\n",
+       " 'Today',\n",
+       " 'watched',\n",
+       " 'visit',\n",
+       " 'Please',\n",
+       " 'R',\n",
+       " 'ask',\n",
+       " 'number',\n",
+       " 'red',\n",
+       " 'worth',\n",
+       " 'project',\n",
+       " 'Getting',\n",
+       " 'close',\n",
+       " 'saying',\n",
+       " 'One',\n",
+       " 'lucky',\n",
+       " 'sitting',\n",
+       " 'worse',\n",
+       " 'seriously',\n",
+       " 'online',\n",
+       " 'church',\n",
+       " 'shirt',\n",
+       " 'dance',\n",
+       " 'set',\n",
+       " 'bout',\n",
+       " 'together',\n",
+       " 'wonderful',\n",
+       " 'wear',\n",
+       " 'team',\n",
+       " 'answer',\n",
+       " 'top',\n",
+       " 'June',\n",
+       " 'tea',\n",
+       " 'longer',\n",
+       " 'E',\n",
+       " 'soooo',\n",
+       " 'worry',\n",
+       " 'care',\n",
+       " 'meeting',\n",
+       " 'forget',\n",
+       " 'min',\n",
+       " 'Go',\n",
+       " 'sunday',\n",
+       " 'internet',\n",
+       " 'hang',\n",
+       " 'cream',\n",
+       " 'st',\n",
+       " 'starting',\n",
+       " 'mood',\n",
+       " 'v',\n",
+       " 'fast',\n",
+       " 'horrible',\n",
+       " 'date',\n",
+       " 'c',\n",
+       " 'Enjoy',\n",
+       " 'via',\n",
+       " 'happen',\n",
+       " 'earlier',\n",
+       " 'fucking',\n",
+       " 'ate',\n",
+       " 'favorite',\n",
+       " 'followfriday',\n",
+       " 'driving',\n",
+       " 'happened',\n",
+       " 'TV',\n",
+       " 'Saturday',\n",
+       " 'doesnt',\n",
+       " 'high',\n",
+       " 'mother',\n",
+       " 'town',\n",
+       " 'enjoying',\n",
+       " 'agree',\n",
+       " 'turn',\n",
+       " 'chance',\n",
+       " 'Wow',\n",
+       " 'parent',\n",
+       " 'website',\n",
+       " 'Finally',\n",
+       " 'question',\n",
+       " 'Ok',\n",
+       " 'Glad',\n",
+       " 'broken',\n",
+       " 'tweeting',\n",
+       " 'black',\n",
+       " 'rainy',\n",
+       " 'co',\n",
+       " 'ice',\n",
+       " 'Goodnight',\n",
+       " 'pool',\n",
+       " 'heading',\n",
+       " 'sigh',\n",
+       " 'B',\n",
+       " 'drinking',\n",
+       " 'park',\n",
+       " 'fall',\n",
+       " 'slept',\n",
+       " 'YAY',\n",
+       " 'small',\n",
+       " 'LA',\n",
+       " 'laptop',\n",
+       " 'chat',\n",
+       " 'knew',\n",
+       " 'fail',\n",
+       " 'da',\n",
+       " 'chicken',\n",
+       " 'goin',\n",
+       " 'slow',\n",
+       " 'throat',\n",
+       " 'episode',\n",
+       " 'e',\n",
+       " 'business',\n",
+       " 'garden',\n",
+       " 'homework',\n",
+       " 'passed',\n",
+       " 'upset',\n",
+       " 'comment',\n",
+       " 'sleeping',\n",
+       " 'airport',\n",
+       " 'saturday',\n",
+       " 'Let',\n",
+       " 'taken',\n",
+       " 'understand',\n",
+       " 'due',\n",
+       " 'shop',\n",
+       " 'Work',\n",
+       " 'hello',\n",
+       " 'listen',\n",
+       " 'k',\n",
+       " 'woman',\n",
+       " 'support',\n",
+       " 'aw',\n",
+       " 'Night',\n",
+       " 'star',\n",
+       " 'Time',\n",
+       " 'vote',\n",
+       " 'story',\n",
+       " 'scared',\n",
+       " 'message',\n",
+       " 'holiday',\n",
+       " 'foot',\n",
+       " 'nd',\n",
+       " 'Maybe',\n",
+       " 'awww',\n",
+       " 'line',\n",
+       " 'L',\n",
+       " 'weird',\n",
+       " 'sunshine',\n",
+       " 'fell',\n",
+       " 'seem',\n",
+       " 'english',\n",
+       " 'lady',\n",
+       " 'award',\n",
+       " 'pick',\n",
+       " 'bus',\n",
+       " 'glass',\n",
+       " 'worst',\n",
+       " 'called',\n",
+       " 'Feeling',\n",
+       " 'account',\n",
+       " 'congrats',\n",
+       " 'goodnight',\n",
+       " 'company',\n",
+       " 'Really',\n",
+       " 'xD',\n",
+       " 'son',\n",
+       " 'Poor',\n",
+       " 'dear',\n",
+       " 'mad',\n",
+       " 'Need',\n",
+       " 'order',\n",
+       " 'Last',\n",
+       " 'fuck',\n",
+       " 'past',\n",
+       " 'facebook',\n",
+       " 'rather',\n",
+       " 'havent',\n",
+       " 'spent',\n",
+       " 'load',\n",
+       " 'fix',\n",
+       " 'bag',\n",
+       " 'short',\n",
+       " 'May',\n",
+       " 'leg',\n",
+       " 'interesting',\n",
+       " 'gave',\n",
+       " 'dead',\n",
+       " 'Nice',\n",
+       " 'hoping',\n",
+       " 'different',\n",
+       " 'loving',\n",
+       " 'catch',\n",
+       " 'ipod',\n",
+       " 'absolutely',\n",
+       " 'perfect',\n",
+       " 'case',\n",
+       " 'Miss',\n",
+       " 'officially',\n",
+       " 'writing',\n",
+       " 'sometimes',\n",
+       " 'meant',\n",
+       " 'ah',\n",
+       " 'cleaning',\n",
+       " 'forever',\n",
+       " 'X',\n",
+       " 'issue',\n",
+       " 'G',\n",
+       " 'window',\n",
+       " 'dress',\n",
+       " 'idk',\n",
+       " 'Looking',\n",
+       " 'deal',\n",
+       " 'inside',\n",
+       " 'needed',\n",
+       " 'si',\n",
+       " 'moving',\n",
+       " 'profile',\n",
+       " 'write',\n",
+       " 'C',\n",
+       " 'graduation',\n",
+       " 'fight',\n",
+       " 'bday',\n",
+       " 'met',\n",
+       " 'power',\n",
+       " 'box',\n",
+       " 'cousin',\n",
+       " 'sent',\n",
+       " 'Sad',\n",
+       " 'bet',\n",
+       " 'looked',\n",
+       " 'living',\n",
+       " 'worried',\n",
+       " 'bye',\n",
+       " 'wedding',\n",
+       " 'iphone',\n",
+       " 'college',\n",
+       " 'btw',\n",
+       " 'Welcome',\n",
+       " 'touch',\n",
+       " 'kill',\n",
+       " 'Awesome',\n",
+       " 'Another',\n",
+       " 'youtube',\n",
+       " 'fantastic',\n",
+       " 'camera',\n",
+       " 'group',\n",
+       " 'vip',\n",
+       " 'cake',\n",
+       " 'sort',\n",
+       " 'cup',\n",
+       " 'especially',\n",
+       " 'city',\n",
+       " 'gorgeous',\n",
+       " 'clothes',\n",
+       " 'version',\n",
+       " 'finger',\n",
+       " 'band',\n",
+       " 'Everyone',\n",
+       " 'liked',\n",
+       " 'unfortunately',\n",
+       " 'beer',\n",
+       " 'shoot',\n",
+       " 'lonely',\n",
+       " 'bitch',\n",
+       " 'shoe',\n",
+       " 'singing',\n",
+       " 'interview',\n",
+       " 'random',\n",
+       " 'drop',\n",
+       " 'ppl',\n",
+       " 'yr',\n",
+       " 'gay',\n",
+       " 'sleepy',\n",
+       " 'white',\n",
+       " 'body',\n",
+       " 'fb',\n",
+       " 'Someone',\n",
+       " 'supposed',\n",
+       " 'info',\n",
+       " 'paper',\n",
+       " 'David',\n",
+       " 'Like',\n",
+       " 'Birthday',\n",
+       " 'lil',\n",
+       " 'Trying',\n",
+       " 'Awww',\n",
+       " 'ai',\n",
+       " 'shot',\n",
+       " 'Also',\n",
+       " 'special',\n",
+       " 'door',\n",
+       " 'sign',\n",
+       " 'hubby',\n",
+       " 'puppy',\n",
+       " 'plz',\n",
+       " 'alright',\n",
+       " 'shall',\n",
+       " 'thx',\n",
+       " 'save',\n",
+       " 'everybody',\n",
+       " 'OK',\n",
+       " 'arm',\n",
+       " 'bro',\n",
+       " 'learn',\n",
+       " 'web',\n",
+       " 'asked',\n",
+       " 'ive',\n",
+       " 'peep',\n",
+       " 'green',\n",
+       " 'See',\n",
+       " 'quick',\n",
+       " 'june',\n",
+       " 'sit',\n",
+       " 'K',\n",
+       " 'confused',\n",
+       " 'laugh',\n",
+       " 'promise',\n",
+       " 'Okay',\n",
+       " 'voice',\n",
+       " 'flight',\n",
+       " 'tear',\n",
+       " 'realize',\n",
+       " 'nose',\n",
+       " 'babe',\n",
+       " 'note',\n",
+       " 'relaxing',\n",
+       " 'Looks',\n",
+       " 'Ah',\n",
+       " 'future',\n",
+       " 'lesson',\n",
+       " 'em',\n",
+       " 'easy',\n",
+       " 'Hopefully',\n",
+       " 'Man',\n",
+       " 'yummy',\n",
+       " 'hold',\n",
+       " 'spend',\n",
+       " 'Life',\n",
+       " 'light',\n",
+       " 'Follow',\n",
+       " 'xoxo',\n",
+       " 'xxx',\n",
+       " 'blue',\n",
+       " 'Aww',\n",
+       " 'lazy',\n",
+       " 'smell',\n",
+       " 'Listening',\n",
+       " 'along',\n",
+       " 'English',\n",
+       " 'father',\n",
+       " 'WTF',\n",
+       " 'freaking',\n",
+       " 'Home',\n",
+       " 'except',\n",
+       " 'math',\n",
+       " 'tour',\n",
+       " 'age',\n",
+       " 'plane',\n",
+       " 'hanging',\n",
+       " 'share',\n",
+       " 'join',\n",
+       " 'wine',\n",
+       " 'OH',\n",
+       " 'download',\n",
+       " 'fact',\n",
+       " 'Gon',\n",
+       " 'bloody',\n",
+       " 'ahh',\n",
+       " 'stand',\n",
+       " 'none',\n",
+       " 'Hello',\n",
+       " 'fly',\n",
+       " 'j',\n",
+       " 'vacation',\n",
+       " 'wishing',\n",
+       " 'myspace',\n",
+       " 'Facebook',\n",
+       " 'paid',\n",
+       " 'N',\n",
+       " 'exciting',\n",
+       " 'matter',\n",
+       " 'mile',\n",
+       " 'round',\n",
+       " 'huge',\n",
+       " 'Went',\n",
+       " 'lame',\n",
+       " 'jus',\n",
+       " 'hospital',\n",
+       " 'figure',\n",
+       " 'giving',\n",
+       " 'topic',\n",
+       " 'ear',\n",
+       " 'afford',\n",
+       " 'nearly',\n",
+       " 'ended',\n",
+       " 'Come',\n",
+       " 'hahah',\n",
+       " 'air',\n",
+       " 'Mr',\n",
+       " 'proud',\n",
+       " 'whatever',\n",
+       " 'three',\n",
+       " 'lately',\n",
+       " 'kitty',\n",
+       " 'club',\n",
+       " 'XD',\n",
+       " 'f',\n",
+       " 'sing',\n",
+       " 'showing',\n",
+       " 'road',\n",
+       " 'Take',\n",
+       " 'beat',\n",
+       " 'London',\n",
+       " 'sale',\n",
+       " 'country',\n",
+       " 'gunna',\n",
+       " 'DM',\n",
+       " 'warm',\n",
+       " 'stopped',\n",
+       " 'worked',\n",
+       " 'boot',\n",
+       " 'fever',\n",
+       " 'Tuesday',\n",
+       " 'cheese',\n",
+       " 'exactly',\n",
+       " 'Star',\n",
+       " 'service',\n",
+       " 'tan',\n",
+       " 'cover',\n",
+       " 'Heading',\n",
+       " 'iPod',\n",
+       " 'Working',\n",
+       " 'low',\n",
+       " 'helping',\n",
+       " 'Mother',\n",
+       " 'type',\n",
+       " 'fair',\n",
+       " 'trouble',\n",
+       " 'honey',\n",
+       " 'g',\n",
+       " 'storm',\n",
+       " 'fam',\n",
+       " 'joke',\n",
+       " 'enjoyed',\n",
+       " 'sold',\n",
+       " 'mouth',\n",
+       " 'wearing',\n",
+       " 'depressing',\n",
+       " 'luv',\n",
+       " 'Boo',\n",
+       " 'card',\n",
+       " 'child',\n",
+       " 'delicious',\n",
+       " 'mall',\n",
+       " 'degree',\n",
+       " 'radio',\n",
+       " 'sooooo',\n",
+       " 'boyfriend',\n",
+       " 'packing',\n",
+       " 'played',\n",
+       " 'ahead',\n",
+       " 'Ha',\n",
+       " 'spot',\n",
+       " 'snow',\n",
+       " 'floor',\n",
+       " 'afraid',\n",
+       " 'bike',\n",
+       " 'happens',\n",
+       " 'young',\n",
+       " 'others',\n",
+       " 'drunk',\n",
+       " 'l',\n",
+       " 'GOOD',\n",
+       " 'Check',\n",
+       " 'REALLY',\n",
+       " 'flu',\n",
+       " 'tweeps',\n",
+       " 'safe',\n",
+       " 'yo',\n",
+       " 'mail',\n",
+       " 'indeed',\n",
+       " 'Fuck',\n",
+       " 'UK',\n",
+       " 'Guess',\n",
+       " 'piece',\n",
+       " 'changed',\n",
+       " ...]"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# your code here"
+    "from nltk.probability import FreqDist\n",
+    "\n",
+    "all_words = [word for words in df_sampled['text_processed'] for word in words]\n",
+    "\n",
+    "freq_dist = FreqDist(all_words)\n",
+    "\n",
+    "top_5000_words = freq_dist.most_common(5000)\n",
+    "\n",
+    "print(\"Top 5,000 words example (first 10):\", top_5000_words[:100])\n",
+    "\n",
+    "top5000 = []\n",
+    "\n",
+    "for whatever in top_5000_words:\n",
+    "    for name in whatever:\n",
+    "        top5000.append(name)\n",
+    "        \n",
+    "top5000 = top5000[::2]\n",
+    "\n",
+    "top5000\n",
+    "\n"
    ]
   },
   {
@@ -167,11 +1947,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here"
+    "\n",
+    "def find_features(document):\n",
+    "    words = set(document)\n",
+    "    features = {word: (word in words) for word in top5000}\n",
+    "    return features\n",
+    "\n",
+    "\n",
+    "documents = list(zip(df_sampled['text_processed'], df_sampled['is_positive']))\n",
+    "\n",
+    "\n",
+    "featuresets = [(find_features(doc), category) for (doc, category) in documents]"
    ]
   },
   {
@@ -210,11 +2000,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 76,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here"
+    "\n",
+    "\n",
+    "random.seed(42)\n",
+    "random.shuffle(featuresets)\n",
+    "train_set, test_set = train_test_split(featuresets, test_size=0.2)\n",
+    "\n",
+    "classifier = NaiveBayesClassifier.train(train_set)"
    ]
   },
   {
@@ -230,75 +2026,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 77,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.7160\n",
+      "Most Informative Features\n",
+      "                     Ugh = True            False : True   =     24.9 : 1.0\n",
+      "                  throat = True            False : True   =     20.1 : 1.0\n",
+      "                    Poor = True            False : True   =     18.7 : 1.0\n",
+      "                   laugh = True             True : False  =     16.0 : 1.0\n",
+      "                 Welcome = True             True : False  =     15.3 : 1.0\n",
+      "                  Follow = True             True : False  =     14.0 : 1.0\n",
+      "                     sad = True            False : True   =     13.7 : 1.0\n",
+      "                horrible = True            False : True   =     12.9 : 1.0\n",
+      "                     ugh = True            False : True   =     12.9 : 1.0\n",
+      "                      Hi = True             True : False  =     12.7 : 1.0\n"
+     ]
+    }
+   ],
    "source": [
-    "# your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Bonus Question 1: Improve Model Performance\n",
-    "\n",
-    "If you are still not exhausted so far and want to dig deeper, try to improve your classifier performance. There are many aspects you can dig into, for example:\n",
-    "\n",
-    "* Improve stemming and lemmatization. Inspect your bag of words and the most important features. Are there any words you should furuther remove from analysis? You can append these words to further remove to the stop words list.\n",
+    "# your code here\n",
     "\n",
-    "* Remember we only used the top 5,000 features to build model? Try using different numbers of top features. The bottom line is to use as few features as you can without compromising your model performance. The fewer features you select into your model, the faster your model is trained. Then you can use a larger sample size to improve your model accuracy score."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Bonus Question 2: Machine Learning Pipeline\n",
+    "accuracy = nltk_accuracy(classifier, test_set)\n",
+    "print(f\"Accuracy: {accuracy:.4f}\")\n",
     "\n",
-    "In a new Jupyter Notebook, combine all your codes into a function (or a class). Your new function will execute the complete machine learning pipeline job by receiving the dataset location and output the classifier. This will allow you to use your function to predict the sentiment of any tweet in real time. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# your code here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Bonus Question 3: Apache Spark\n",
     "\n",
-    "If you have completed the Apache Spark advanced topic lab, what you can do is to migrate your pipeline from local to a Databricks Notebook. Share your notebook with your instructor and classmates to show off your achievements!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# your code here"
+    "classifier.show_most_informative_features(10)\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -312,7 +2075,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,