Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion module-1/lab-advanced-web-scraping/your-code/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down
88 changes: 70 additions & 18 deletions module-1/lab-bag-of-words/your-code/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -25,13 +25,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Ironhack is cool.', 'I love Ironhack.', 'I am a student at Ironhack.']\n"
]
}
],
"source": [
"corpus = []\n",
"\n",
"# Write your code here"
"# Write your code here\n",
"for e in docs:\n",
" with open(e, 'r') as y:\n",
" corpus.append(y.read())\n",
"\n",
"print (corpus)\n"
]
},
{
Expand All @@ -43,10 +56,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ironhack is cool', 'i love ironhack', 'i am a student at ironhack']\n"
]
}
],
"source": [
"corpus=[corpus[i].lower() for i in range(len(corpus))]\n",
"corpus=[corpus[i].replace('.', '') for i in range(len(corpus))]\n",
"print(corpus)"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -84,13 +109,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"bag_of_words = []\n",
"\n",
"# Write your code here"
"# Write your code here\n",
"\n",
"for e in corpus:\n",
" f=e.split(' ')\n",
" for g in f:\n",
" if g not in bag_of_words : bag_of_words.append(g)"
]
},
{
Expand All @@ -104,10 +134,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ironhack', 'is', 'cool', 'i', 'love', 'am', 'a', 'student', 'at']\n"
]
}
],
"source": [
"print(bag_of_words)"
]
},
{
"cell_type": "markdown",
Expand All @@ -118,13 +158,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"term_freq = []\n",
"\n",
"# Write your code here"
"# Write your code here\n",
"\n",
"for e in corpus:\n",
" lista=[]\n",
" e=e.split()\n",
" for f in bag_of_words:\n",
" if f in e:\n",
" lista.append(1)\n",
" else:\n",
" lista.append(0)\n",
" term_freq.append(lista)\n",
"\n",
" "
]
},
{
Expand Down Expand Up @@ -169,7 +221,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down
Loading