ironhack-datalabs · Polymorphism36 · Apr 5, 2020 · Apr 6, 2020 · Apr 6, 2020 · Apr 6, 2020
diff --git a/module-1/lab-advanced-web-scraping/your-code/Learning.ipynb b/module-1/lab-advanced-web-scraping/your-code/Learning.ipynb
@@ -298,7 +298,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.2"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

diff --git a/module-1/lab-advanced-web-scraping/your-code/main.ipynb b/module-1/lab-advanced-web-scraping/your-code/main.ipynb
@@ -235,7 +235,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.2"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

diff --git a/module-1/lab-bag-of-words/your-code/main.ipynb b/module-1/lab-bag-of-words/your-code/main.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,13 +25,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Ironhack is cool.', 'I love Ironhack.', 'I am a student at Ironhack.']\n"
+     ]
+    }
+   ],
    "source": [
     "corpus = []\n",
     "\n",
-    "# Write your code here"
+    "# Write your code here\n",
+    "for e in docs:\n",
+    "    with open(e, 'r') as y:\n",
+    "        corpus.append(y.read())\n",
+    "\n",
+    "print (corpus)\n"
    ]
   },
   {
@@ -43,10 +56,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['ironhack is cool', 'i love ironhack', 'i am a student at ironhack']\n"
+     ]
+    }
+   ],
+   "source": [
+    "corpus=[corpus[i].lower() for i in range(len(corpus))]\n",
+    "corpus=[corpus[i].replace('.', '') for i in range(len(corpus))]\n",
+    "print(corpus)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -84,13 +109,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
     "bag_of_words = []\n",
     "\n",
-    "# Write your code here"
+    "# Write your code here\n",
+    "\n",
+    "for e in corpus:\n",
+    "    f=e.split(' ')\n",
+    "    for g in f:\n",
+    "        if g not in bag_of_words : bag_of_words.append(g)"
    ]
   },
   {
@@ -104,10 +134,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['ironhack', 'is', 'cool', 'i', 'love', 'am', 'a', 'student', 'at']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(bag_of_words)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -118,13 +158,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
     "term_freq = []\n",
     "\n",
-    "# Write your code here"
+    "# Write your code here\n",
+    "\n",
+    "for e in corpus:\n",
+    "    lista=[]\n",
+    "    e=e.split()\n",
+    "    for f in bag_of_words:\n",
+    "        if f in e:\n",
+    "            lista.append(1)\n",
+    "        else:\n",
+    "            lista.append(0)\n",
+    "    term_freq.append(lista)\n",
+    "\n",
+    "    "
    ]
   },
   {
@@ -169,7 +221,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.4"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,