diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
index 812f7a4..5a88557 100644
--- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -40,9 +40,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: bs4 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (0.0.1)\n",
+ "Requirement already satisfied: requests in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (2.22.0)\n",
+ "Requirement already satisfied: pandas in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (0.24.2)\n",
+ "Requirement already satisfied: html5lib in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (1.0.1)\n",
+ "Requirement already satisfied: lxml in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.3.4)\n",
+ "Requirement already satisfied: beautifulsoup4 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from bs4) (4.7.1)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (1.25.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (2019.6.16)\n",
+ "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (2.8)\n",
+ "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (3.0.4)\n",
+ "Requirement already satisfied: numpy>=1.12.0 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from pandas) (1.16.4)\n",
+ "Requirement already satisfied: pytz>=2011k in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from pandas) (2019.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.5.0 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from pandas) (2.8.0)\n",
+ "Requirement already satisfied: webencodings in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from html5lib) (0.5.1)\n",
+ "Requirement already satisfied: six>=1.9 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from html5lib) (1.12.0)\n",
+ "Requirement already satisfied: soupsieve>=1.2 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from beautifulsoup4->bs4) (1.9.2)\n"
+ ]
+ }
+ ],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
@@ -52,9 +75,11 @@
"# from lxml.html import fromstring\n",
"# import urllib.request\n",
"# from urllib.request import urlopen\n",
- "# import random\n",
+ "import random\n",
"# import re\n",
- "# import scrapy"
+ "#import scrapy\n",
+ "import sys\n",
+ "!{sys.executable} -m pip install bs4 requests pandas html5lib lxml"
]
},
{
@@ -66,12 +91,3793 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://github.com/trending/developers'"
+ "url = 'https://github.com/trending/developers'\n",
+ "html = requests.get(url).content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "Trending developers on GitHub today · GitHub \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Trending \n",
+ "
\n",
+ " These are the\n",
+ " developers\n",
+ " building the hot tools today.\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ " 1\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "ericmjl \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " For PyCon, PyData, ODSC, and beyond!\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 2\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "bfred-it \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "📱 Make videos playable inline on the iPhone (prevents automatic fullscreen)\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 3\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "iRoachie \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Material Design implementation of Tabs\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 4\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " wav2c makes arrays out of sounds - useful for Arduino SMAPLER v2 (and for GameBoy Advance) etc\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 5\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "sobolevn \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A curated list of cryptography resources and links.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 6\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "fthomas \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Simple refinement types for Scala\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 7\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "syuilo \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "🌎 A federated blogging platform 🚀 \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 8\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "CompuIves \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 9\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "balloob \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Library for Python 3 to communicate with the Google Chromecast.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 10\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "sdras \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A curated list of awesome actions to use on GitHub\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 11\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "skmp \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Module to export Word, Excel & PowerPoint to PDF. Requires windows and installed office 2013\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 12\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "hovancik \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " break time reminder app\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 13\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "amueller \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A little word cloud generator in Python\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 14\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A process viewer GUI in rust\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 15\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "mholt \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Fast and powerful CSV (delimited text) parser that gracefully handles large files and malformed input\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 16\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " PicoRV32 - A Size-Optimized RISC-V CPU\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 17\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "frenck \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A curated list of amazingly awesome Home Assistant resources.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 18\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "unixorn \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A collection of ZSH frameworks, plugins & themes inspired by the various awesome list collections out there.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 19\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "aneagoie \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Tutorial for udemy course - React\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 20\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "randombit \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Crypto and TLS for Modern C++\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 21\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "rauchg \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Public Slack organizations made easy\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 22\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "tgriesser \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A query builder for PostgreSQL, MySQL and SQLite3, designed to be flexible, portable, and fun to use.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 23\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "vtjnash \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Posix-compliant file name pattern matching\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 24\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "andersy005 \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Deep Learning Specialization by Andrew Ng on Coursera.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 25\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "djrtwo \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Gas Costs from Ethereum Yellow Paper\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " You can’t perform that action at this time.\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
You signed in with another tab or window. Reload to refresh your session. \n",
+ "
You signed out in another tab or window. Reload to refresh your session. \n",
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "
\n",
+ "\n",
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#printing the content from the Trending Developers\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "soup_html"
]
},
{
@@ -79,9 +3885,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "#your code"
- ]
+ "source": []
},
{
"cell_type": "markdown",
@@ -134,11 +3938,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Eric Ma',\n",
+ " 'Federico Brigante',\n",
+ " 'Kyle Roach',\n",
+ " 'Olle Jonsson',\n",
+ " 'Nikita Sobolev',\n",
+ " 'Frank S. Thomas',\n",
+ " 'syuilo',\n",
+ " 'Ives van Hoorne',\n",
+ " 'Paulus Schoutsen',\n",
+ " 'Sarah Drasner',\n",
+ " 'Stefanos Kornilios Mitsis Poiitidis',\n",
+ " 'Jan Hovancik',\n",
+ " 'Andreas Mueller',\n",
+ " 'Guillaume Gomez',\n",
+ " 'Matt Holt',\n",
+ " 'Clifford Wolf',\n",
+ " 'Franck Nijhof',\n",
+ " 'Joe Block',\n",
+ " 'Andrei Neagoie',\n",
+ " 'Jack Lloyd',\n",
+ " 'Guillermo Rauch',\n",
+ " 'Tim Griesser',\n",
+ " 'Jameson Nash',\n",
+ " 'Anderson Banihirwe',\n",
+ " 'Danny Ryan']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#To reach the names\n",
+ "tags = ['h1'] #esto es una lista!\n",
+ "texto = soup_html.find_all(tags,{'class':'h3 lh-condensed'})\n",
+ "\n",
+ "#Método largo\n",
+ "#names = []\n",
+ "#for t in texto:\n",
+ "# names.append(t.text)\n",
+ "#names \n",
+ " \n",
+ "#list comprenhension\n",
+ "names = [t.text for t in texto]\n",
+ "names"
]
},
{
@@ -152,7 +4003,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -162,37 +4013,148 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 9,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "b'\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n Trending Python repositories on GitHub today \\xc2\\xb7 GitHub \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n\\n\\n \\n\\n \\n\\n \\n \\n \\n\\n \\n\\n\\n\\n\\n \\n\\n\\n \\n\\n\\n\\n \\n\\n \\n \\n\\n \\n \\n\\n \\n\\n \\n\\n \\n \\n\\n \\n\\n\\n \\n\\n\\n \\n\\n \\n\\n \\n \\n\\n \\n\\n\\n\\n\\n\\n \\n\\n \\n\\n \\n \\n\\n \\n\\n
\\n\\n\\n \\n\\n
\\n\\n\\n\\n \\n
\\n\\n\\n\\n\\n
\\n
Trending \\n
See what the GitHub community is most excited about today.
\\n
\\n
\\n\\n\\n\\n
\\n \\n
\\n
\\n \\n\\n \\n\\n \\n Comprehensive Python Cheatsheet\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Fully automated offensive security framework for reconnaissance and vulnerability scanning\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 1,189\\n \\n\\n
\\n \\n 211\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n\\n
\\n \\n 188 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Ludwig is a toolbox built on top of TensorFlow that allows to train and test deep learning models without the need to write code.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 5,146\\n \\n\\n
\\n \\n 547\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 74 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Official python implementation for \"A Baseline for 3D Multi-Object Tracking\"\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n StyleGAN - Official TensorFlow Implementation\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Dagster is an open-source system for building data applications.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 507\\n \\n\\n
\\n \\n 31\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 24 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Models and examples built with TensorFlow\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n text detection mainly based on ctpn model in tensorflow, id card detect, connectionist text proposal network\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n \\n\\xf0\\x9f\\x94\\x8e Find usernames across social networks\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 5,022\\n \\n\\n
\\n \\n 414\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 273 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Deepfakes Software For All\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n pytorch implementation for \"Deep Flow-Guided Video Inpainting\"(CVPR\\'19)\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n BCC - Tools for BPF-based Linux IO analysis, networking, monitoring, and more\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Crypto Trading Bots in Python - Triangular Arbitrage, Beginner & Advanced Cryptocurrency Trading Bots Written in Python\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Deep Learning Examples\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 930\\n \\n\\n
\\n \\n 276\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 41 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Putting TensorFlow back in PyTorch, back in TensorFlow (differentiable TensorFlow PyTorch adapters).\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Text recognition (optical character recognition) with deep learning methods.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 321\\n \\n\\n
\\n \\n 92\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n\\n
\\n \\n 18 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Unofficial PyTorch implementation of BlazeFace\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n A library for encrypted, privacy preserving deep learning\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 3,542\\n \\n\\n
\\n \\n 790\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 8 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n A collective list of free APIs for use in software and web development.\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n \\xe3\\x80\\x8a\\xe5\\x8a\\xa8\\xe6\\x89\\x8b\\xe5\\xad\\xa6\\xe6\\xb7\\xb1\\xe5\\xba\\xa6\\xe5\\xad\\xa6\\xe4\\xb9\\xa0\\xe3\\x80\\x8b\\xef\\xbc\\x9a\\xe9\\x9d\\xa2\\xe5\\x90\\x91\\xe4\\xb8\\xad\\xe6\\x96\\x87\\xe8\\xaf\\xbb\\xe8\\x80\\x85\\xe3\\x80\\x81\\xe8\\x83\\xbd\\xe8\\xbf\\x90\\xe8\\xa1\\x8c\\xe3\\x80\\x81\\xe5\\x8f\\xaf\\xe8\\xae\\xa8\\xe8\\xae\\xba\\xe3\\x80\\x82\\xe8\\x8b\\xb1\\xe6\\x96\\x87\\xe7\\x89\\x88\\xe5\\x8d\\xb3\\xe4\\xbc\\xaf\\xe5\\x85\\x8b\\xe5\\x88\\xa9\\xe2\\x80\\x9c\\xe6\\xb7\\xb1\\xe5\\xba\\xa6\\xe5\\xad\\xa6\\xe4\\xb9\\xa0\\xe5\\xaf\\xbc\\xe8\\xae\\xba\\xef\\xbc\\x88STAT 157\\xef\\xbc\\x89\\xe2\\x80\\x9d\\xe6\\x95\\x99\\xe6\\x9d\\x90\\xe3\\x80\\x82\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Apache Airflow\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Project thu th\\xe1\\xba\\xadp \\xc4\\x91i\\xe1\\xbb\\x83m chu\\xe1\\xba\\xa9n \\xc4\\x91\\xe1\\xba\\xa1i h\\xe1\\xbb\\x8dc 2014 - 2018 v\\xc3\\xa0 ph\\xc3\\xa2n t\\xc3\\xadch d\\xe1\\xbb\\xaf li\\xe1\\xbb\\x87u\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 61\\n \\n\\n
\\n \\n 12\\n \\n\\n \\n
\\n Built by\\n \\n \\n\\n
\\n \\n 5 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Advanced information gathering & OSINT tool for phone numbers.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 631\\n \\n\\n
\\n \\n 149\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n\\n
\\n \\n 10 stars today\\n \\n
\\n \\n\\n
\\n
\\n
\\n \\n
\\n\\n
\\n\\n \\n\\n\\n\\n\\n \\n
\\n
\\n \\n \\n You can\\xe2\\x80\\x99t perform that action at this time.\\n
\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n
\\n
You signed in with another tab or window. Reload to refresh your session. \\n
You signed out in another tab or window. Reload to refresh your session. \\n
\\n \\n \\n \\n \\n \\n \\n \\n
\\n \\n \\n \\n\\n \\n\\n
\\n\\n \\n\\n\\n'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "html = requests.get(url).content\n",
+ "html"
]
},
{
- "cell_type": "markdown",
- "metadata": {},
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['gto76/python-cheatsheet',\n",
+ " 'j3ssie/Osmedeus',\n",
+ " 'tangzixiang0304/Shielded_detector',\n",
+ " 'uber/ludwig',\n",
+ " 'xinshuoweng/AB3DMOT',\n",
+ " 'NVlabs/stylegan',\n",
+ " 'dagster-io/dagster',\n",
+ " 'tensorflow/models',\n",
+ " 'eragonruan/text-detection-ctpn',\n",
+ " 'sherlock-project/sherlock',\n",
+ " 'deepfakes/faceswap',\n",
+ " 'nbei/Deep-Flow-Guided-Video-Inpainting',\n",
+ " 'iovisor/bcc',\n",
+ " 'Roibal/Cryptocurrency-Trading-Bots-Python-Beginner-Advance',\n",
+ " 'NVIDIA/DeepLearningExamples',\n",
+ " 'BlackHC/tfpyth',\n",
+ " 'clovaai/deep-text-recognition-benchmark',\n",
+ " 'tkat0/PyTorch_BlazeFace',\n",
+ " 'OpenMined/PySyft',\n",
+ " 'CoreyMSchafer/code_snippets',\n",
+ " 'public-apis/public-apis',\n",
+ " 'd2l-ai/d2l-zh',\n",
+ " 'apache/airflow',\n",
+ " 'beecost/bee-university',\n",
+ " 'sundowndev/PhoneInfoga']"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#### Display all the image links from Walt Disney wikipedia page"
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "tags = ['h1']\n",
+ "texto = soup_html.find_all(tags,{'class':'h3 lh-condensed'})\n",
+ "\n",
+ "names = [t.text.replace('\\n','').replace(' ','') for t in texto]\n",
+ "names\n",
+ "\n"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/Walt_Disney'"
+ "#### Display all the image links from Walt Disney wikipedia page"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 11,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/30px-Animation_disc.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/29px-P_vip.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/24px-Magic_Kingdom_castle.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/30px-Video-x-generic.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/30px-Flag_of_Los_Angeles_County%2C_California.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/USA_flag_on_television.svg/30px-USA_flag_on_television.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/22px-Commons-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/25px-Wikiquote-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/30px-Wikidata-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1',\n",
+ " '/static/images/wikimedia-button.png',\n",
+ " '/static/images/poweredby_mediawiki_88x31.png']"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "# This is the url you will scrape in this exercise\n",
+ "url = 'https://en.wikipedia.org/wiki/Walt_Disney'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "tags = ['img']\n",
+ "images = soup_html.find_all(tags)\n",
+ "\n",
+ "image_list = [i.attrs['src'] for i in images]\n",
+ "image_list"
]
},
{
@@ -204,21 +4166,189 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://en.wikipedia.org/wiki/Python' "
+ "url ='https://en.wikipedia.org/wiki/Python'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 13,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['https://en.wiktionary.org/wiki/Python',\n",
+ " 'https://en.wiktionary.org/wiki/python',\n",
+ " '/w/index.php?title=Python&action=edit§ion=1',\n",
+ " '/wiki/Pythonidae',\n",
+ " '/wiki/Python_(genus)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=2',\n",
+ " '/wiki/Python_(mythology)',\n",
+ " '/wiki/Python_of_Aenus',\n",
+ " '/wiki/Python_(painter)',\n",
+ " '/wiki/Python_of_Byzantium',\n",
+ " '/wiki/Python_of_Catana',\n",
+ " '/w/index.php?title=Python&action=edit§ion=3',\n",
+ " '/wiki/Python_(film)',\n",
+ " '/wiki/Pythons_2',\n",
+ " '/wiki/Monty_Python',\n",
+ " '/wiki/Python_(Monty)_Pictures',\n",
+ " '/w/index.php?title=Python&action=edit§ion=4',\n",
+ " '/wiki/Python_(programming_language)',\n",
+ " '/wiki/CPython',\n",
+ " '/wiki/CMU_Common_Lisp',\n",
+ " '/wiki/PERQ#PERQ_3',\n",
+ " '/w/index.php?title=Python&action=edit§ion=5',\n",
+ " '/w/index.php?title=Python&action=edit§ion=6',\n",
+ " '/wiki/Python_(Busch_Gardens_Tampa_Bay)',\n",
+ " '/wiki/Python_(Coney_Island,_Cincinnati,_Ohio)',\n",
+ " '/wiki/Python_(Efteling)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=7',\n",
+ " '/wiki/Python_(automobile_maker)',\n",
+ " '/wiki/Python_(Ford_prototype)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=8',\n",
+ " '/wiki/Colt_Python',\n",
+ " '/wiki/Python_(missile)',\n",
+ " '/wiki/Python_(nuclear_primary)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=9',\n",
+ " '/wiki/Python_Anghelo',\n",
+ " '/w/index.php?title=Python&action=edit§ion=10',\n",
+ " '/wiki/PYTHON',\n",
+ " '/w/index.php?title=Python&action=edit§ion=11',\n",
+ " '/wiki/Cython',\n",
+ " '/wiki/Pyton',\n",
+ " '/wiki/File:Disambig_gray.svg',\n",
+ " '/wiki/Help:Disambiguation',\n",
+ " '//en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Python&namespace=0',\n",
+ " 'https://en.wikipedia.org/w/index.php?title=Python&oldid=905477736',\n",
+ " '/wiki/Help:Category',\n",
+ " '/wiki/Category:Disambiguation_pages',\n",
+ " '/wiki/Category:Disambiguation_pages_with_short_description',\n",
+ " '/wiki/Category:All_article_disambiguation_pages',\n",
+ " '/wiki/Category:All_disambiguation_pages',\n",
+ " '/wiki/Category:Animal_common_name_disambiguation_pages',\n",
+ " '/wiki/Special:MyTalk',\n",
+ " '/wiki/Special:MyContributions',\n",
+ " '/w/index.php?title=Special:CreateAccount&returnto=Python',\n",
+ " '/w/index.php?title=Special:UserLogin&returnto=Python',\n",
+ " '/wiki/Python',\n",
+ " '/wiki/Talk:Python',\n",
+ " '/wiki/Python',\n",
+ " '/w/index.php?title=Python&action=edit',\n",
+ " '/w/index.php?title=Python&action=history',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Portal:Contents',\n",
+ " '/wiki/Portal:Featured_content',\n",
+ " '/wiki/Portal:Current_events',\n",
+ " '/wiki/Special:Random',\n",
+ " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n",
+ " '//shop.wikimedia.org',\n",
+ " '/wiki/Help:Contents',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:Community_portal',\n",
+ " '/wiki/Special:RecentChanges',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n",
+ " '/wiki/Special:WhatLinksHere/Python',\n",
+ " '/wiki/Special:RecentChangesLinked/Python',\n",
+ " '/wiki/Wikipedia:File_Upload_Wizard',\n",
+ " '/wiki/Special:SpecialPages',\n",
+ " '/w/index.php?title=Python&oldid=905477736',\n",
+ " '/w/index.php?title=Python&action=info',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452',\n",
+ " '/w/index.php?title=Special:CiteThisPage&page=Python&id=905477736',\n",
+ " 'https://commons.wikimedia.org/wiki/Category:Python',\n",
+ " '/w/index.php?title=Special:Book&bookcmd=book_creator&referer=Python',\n",
+ " '/w/index.php?title=Special:ElectronPdf&page=Python&action=show-download-screen',\n",
+ " '/w/index.php?title=Python&printable=yes',\n",
+ " 'https://af.wikipedia.org/wiki/Python',\n",
+ " 'https://als.wikipedia.org/wiki/Python',\n",
+ " 'https://az.wikipedia.org/wiki/Python',\n",
+ " 'https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)',\n",
+ " 'https://be.wikipedia.org/wiki/Python',\n",
+ " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)',\n",
+ " 'https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)',\n",
+ " 'https://da.wikipedia.org/wiki/Python',\n",
+ " 'https://de.wikipedia.org/wiki/Python',\n",
+ " 'https://eo.wikipedia.org/wiki/Pitono_(apartigilo)',\n",
+ " 'https://eu.wikipedia.org/wiki/Python_(argipena)',\n",
+ " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86',\n",
+ " 'https://fr.wikipedia.org/wiki/Python',\n",
+ " 'https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0',\n",
+ " 'https://hr.wikipedia.org/wiki/Python_(razdvojba)',\n",
+ " 'https://io.wikipedia.org/wiki/Pitono',\n",
+ " 'https://id.wikipedia.org/wiki/Python',\n",
+ " 'https://ia.wikipedia.org/wiki/Python_(disambiguation)',\n",
+ " 'https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)',\n",
+ " 'https://it.wikipedia.org/wiki/Python_(disambigua)',\n",
+ " 'https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F',\n",
+ " 'https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)',\n",
+ " 'https://kg.wikipedia.org/wiki/Mboma_(nyoka)',\n",
+ " 'https://la.wikipedia.org/wiki/Python_(discretiva)',\n",
+ " 'https://lb.wikipedia.org/wiki/Python',\n",
+ " 'https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)',\n",
+ " 'https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)',\n",
+ " 'https://nl.wikipedia.org/wiki/Python',\n",
+ " 'https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3',\n",
+ " 'https://no.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pl.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)',\n",
+ " 'https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)',\n",
+ " 'https://sd.wikipedia.org/wiki/%D8%A7%D8%B1%DA%99',\n",
+ " 'https://sk.wikipedia.org/wiki/Python',\n",
+ " 'https://sh.wikipedia.org/wiki/Python',\n",
+ " 'https://fi.wikipedia.org/wiki/Python',\n",
+ " 'https://sv.wikipedia.org/wiki/Pyton',\n",
+ " 'https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99',\n",
+ " 'https://tr.wikipedia.org/wiki/Python',\n",
+ " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n",
+ " 'https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86',\n",
+ " 'https://vi.wikipedia.org/wiki/Python',\n",
+ " 'https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License',\n",
+ " '//creativecommons.org/licenses/by-sa/3.0/',\n",
+ " '//foundation.wikimedia.org/wiki/Terms_of_Use',\n",
+ " '//foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " '//www.wikimediafoundation.org/',\n",
+ " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:General_disclaimer',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n",
+ " 'https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute',\n",
+ " 'https://foundation.wikimedia.org/wiki/Cookie_statement',\n",
+ " '//en.m.wikipedia.org/w/index.php?title=Python&mobileaction=toggle_view_mobile',\n",
+ " 'https://wikimediafoundation.org/',\n",
+ " 'https://www.mediawiki.org/']"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "wiki_links = soup_html.find_all('a',{'href':True})\n",
+ "\n",
+ "wiki_links_list = [w['href'] for w in wiki_links if not w['href'].startswith('#')]\n",
+ "wiki_links_list\n",
+ "\n",
+ "#wiki_links_list = []\n",
+ "#for i in wiki_links:\n",
+ "# try:\n",
+ "# wiki_links_list.append(i.attrs['href'])\n",
+ "# except:\n",
+ "# pass\n",
+ "#wiki_links"
]
},
{
@@ -230,21 +4360,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Title 2 - The Congress',\n",
+ " 'Title 6 - Domestic Security',\n",
+ " 'Title 7 - Agriculture',\n",
+ " 'Title 15 - Commerce and Trade',\n",
+ " 'Title 16 - Conservation',\n",
+ " 'Title 19 - Customs Duties',\n",
+ " 'Title 21 - Food and Drugs',\n",
+ " 'Title 26 - Internal Revenue Code',\n",
+ " 'Title 34 - Crime Control and Law Enforcement',\n",
+ " \"Title 38 - Veterans' Benefits\",\n",
+ " 'Title 42 - The Public Health and Welfare',\n",
+ " 'Title 43 - Public Lands',\n",
+ " 'Title 48 - Territories and Insular Possessions',\n",
+ " 'Title 49 - Transportation',\n",
+ " 'Title 50 - War and National Defense']"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'http://uscode.house.gov/download/download.shtml'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "url = 'http://uscode.house.gov/download/download.shtml'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "titles = soup_html.find_all('div',{'class':'usctitlechanged'})\n",
+ "\n",
+ "titles_list = [t.text.replace('\\n\\n ','').replace('\\n\\n ','') for t in titles]\n",
+ "titles_list"
]
},
{
@@ -256,21 +4409,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ALEJANDRO ROSALES CASTILLO',\n",
+ " 'YASER ABDEL SAID',\n",
+ " 'JASON DEREK BROWN',\n",
+ " 'RAFAEL CARO-QUINTERO',\n",
+ " 'ALEXIS FLORES',\n",
+ " 'EUGENE PALMER',\n",
+ " 'SANTIAGO VILLALBA MEDEROS',\n",
+ " 'ROBERT WILLIAM FISHER',\n",
+ " 'BHADRESHKUMAR CHETANBHAI PATEL',\n",
+ " 'ARNOLDO JIMENEZ']"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.fbi.gov/wanted/topten'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code "
+ "url = 'https://www.fbi.gov/wanted/topten'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html5lib')\n",
+ "\n",
+ "wanted = soup_html.find_all('h3',{'class':'title'})\n",
+ "wanted_list = [w.text.replace('\\n','') for w in wanted]\n",
+ "wanted_list"
]
},
{
@@ -282,21 +4452,654 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 16,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date & Time UTC \n",
+ " Latitude degrees \n",
+ " Longitude degrees \n",
+ " Region name [+] \n",
+ " \n",
+ " \n",
+ " \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910».1 \n",
+ " 12345678910» \n",
+ " 12345678910».1 \n",
+ " 12345678910» \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2019-07-16 20:52:18.608min ago \n",
+ " 36.07 \n",
+ " N \n",
+ " 117.84 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2019-07-16 20:49:09.111min ago \n",
+ " 36.07 \n",
+ " N \n",
+ " 117.65 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2019-07-16 20:33:52.927min ago \n",
+ " 40.09 \n",
+ " N \n",
+ " 19.91 \n",
+ " E \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2019-07-16 20:31:33.329min ago \n",
+ " 23.45 \n",
+ " S \n",
+ " 66.86 \n",
+ " W \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2019-07-16 20:29:07.731min ago \n",
+ " 35.86 \n",
+ " N \n",
+ " 117.69 \n",
+ " W \n",
+ " 2.2 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2019-07-16 20:23:34.737min ago \n",
+ " 36.07 \n",
+ " N \n",
+ " 117.84 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2019-07-16 20:19:00.142min ago \n",
+ " 33.10 \n",
+ " N \n",
+ " 12.42 \n",
+ " W \n",
+ " 2.8 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2019-07-16 20:17:51.643min ago \n",
+ " 35.55 \n",
+ " N \n",
+ " 117.43 \n",
+ " W \n",
+ " 2.8 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2019-07-16 20:15:36.845min ago \n",
+ " 35.78 \n",
+ " N \n",
+ " 117.62 \n",
+ " W \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2019-07-16 20:11:01.550min ago \n",
+ " 37.82 \n",
+ " N \n",
+ " 121.77 \n",
+ " W \n",
+ " 4.3 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 2019-07-16 19:42:25.91hr 18min ago \n",
+ " 35.61 \n",
+ " N \n",
+ " 117.47 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 2019-07-16 19:35:57.01hr 25min ago \n",
+ " 35.62 \n",
+ " N \n",
+ " 117.45 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 2019-07-16 19:23:50.11hr 37min ago \n",
+ " 36.19 \n",
+ " N \n",
+ " 117.89 \n",
+ " W \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 2019-07-16 19:20:21.41hr 40min ago \n",
+ " 38.39 \n",
+ " N \n",
+ " 16.94 \n",
+ " E \n",
+ " 3.1 \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 2019-07-16 19:16:53.81hr 44min ago \n",
+ " 38.45 \n",
+ " N \n",
+ " 16.91 \n",
+ " E \n",
+ " 2.6 \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 2019-07-16 19:16:15.91hr 44min ago \n",
+ " 61.27 \n",
+ " N \n",
+ " 152.44 \n",
+ " W \n",
+ " 2.4 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2019-07-16 19:11:48.91hr 49min ago \n",
+ " 36.03 \n",
+ " N \n",
+ " 117.87 \n",
+ " W \n",
+ " 2.5 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2019-07-16 19:04:00.21hr 57min ago \n",
+ " 35.96 \n",
+ " N \n",
+ " 117.71 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 18 \n",
+ " 2019-07-16 19:01:48.01hr 59min ago \n",
+ " 39.56 \n",
+ " N \n",
+ " 67.17 \n",
+ " E \n",
+ " 3.6 \n",
+ " \n",
+ " \n",
+ " 19 \n",
+ " 2019-07-16 19:01:00.82hr 00min ago \n",
+ " 35.68 \n",
+ " N \n",
+ " 117.54 \n",
+ " W \n",
+ " 2.5 \n",
+ " \n",
+ " \n",
+ " 20 \n",
+ " 2019-07-16 18:53:32.02hr 07min ago \n",
+ " 0.68 \n",
+ " S \n",
+ " 126.36 \n",
+ " E \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ " 21 \n",
+ " 2019-07-16 18:50:16.22hr 10min ago \n",
+ " 43.62 \n",
+ " N \n",
+ " 75.40 \n",
+ " E \n",
+ " 3.2 \n",
+ " \n",
+ " \n",
+ " 22 \n",
+ " 2019-07-16 18:47:48.92hr 13min ago \n",
+ " 35.59 \n",
+ " N \n",
+ " 117.42 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 23 \n",
+ " 2019-07-16 18:36:26.82hr 24min ago \n",
+ " 35.74 \n",
+ " N \n",
+ " 117.56 \n",
+ " W \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 24 \n",
+ " 2019-07-16 18:22:31.92hr 38min ago \n",
+ " 35.65 \n",
+ " N \n",
+ " 117.52 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 25 \n",
+ " 2019-07-16 18:15:26.52hr 45min ago \n",
+ " 28.47 \n",
+ " N \n",
+ " 56.76 \n",
+ " E \n",
+ " 4.3 \n",
+ " \n",
+ " \n",
+ " 26 \n",
+ " 2019-07-16 18:10:01.02hr 51min ago \n",
+ " 34.41 \n",
+ " S \n",
+ " 150.73 \n",
+ " E \n",
+ " 2.4 \n",
+ " \n",
+ " \n",
+ " 27 \n",
+ " 2019-07-16 17:48:24.03hr 12min ago \n",
+ " 9.93 \n",
+ " S \n",
+ " 118.23 \n",
+ " E \n",
+ " 4.1 \n",
+ " \n",
+ " \n",
+ " 28 \n",
+ " 2019-07-16 17:42:29.93hr 18min ago \n",
+ " 35.67 \n",
+ " N \n",
+ " 117.54 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 29 \n",
+ " 2019-07-16 17:39:43.03hr 21min ago \n",
+ " 0.54 \n",
+ " S \n",
+ " 127.86 \n",
+ " E \n",
+ " 4.1 \n",
+ " \n",
+ " \n",
+ " 30 \n",
+ " 2019-07-16 17:31:56.03hr 29min ago \n",
+ " 35.67 \n",
+ " N \n",
+ " 117.47 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 31 \n",
+ " 2019-07-16 17:05:45.03hr 55min ago \n",
+ " 36.20 \n",
+ " N \n",
+ " 117.90 \n",
+ " W \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 32 \n",
+ " 2019-07-16 17:05:08.03hr 55min ago \n",
+ " 15.40 \n",
+ " N \n",
+ " 94.64 \n",
+ " W \n",
+ " 4.2 \n",
+ " \n",
+ " \n",
+ " 33 \n",
+ " 2019-07-16 17:01:30.83hr 59min ago \n",
+ " 36.10 \n",
+ " N \n",
+ " 117.90 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 34 \n",
+ " 2019-07-16 16:45:56.54hr 15min ago \n",
+ " 0.68 \n",
+ " S \n",
+ " 127.58 \n",
+ " E \n",
+ " 4.7 \n",
+ " \n",
+ " \n",
+ " 35 \n",
+ " 2019-07-16 16:45:55.04hr 15min ago \n",
+ " 18.99 \n",
+ " N \n",
+ " 70.09 \n",
+ " W \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 36 \n",
+ " 2019-07-16 16:43:40.94hr 17min ago \n",
+ " 37.23 \n",
+ " N \n",
+ " 28.27 \n",
+ " E \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 37 \n",
+ " 2019-07-16 16:43:21.74hr 17min ago \n",
+ " 36.03 \n",
+ " N \n",
+ " 117.79 \n",
+ " W \n",
+ " 2.4 \n",
+ " \n",
+ " \n",
+ " 38 \n",
+ " 2019-07-16 16:36:41.54hr 24min ago \n",
+ " 35.96 \n",
+ " N \n",
+ " 117.30 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 39 \n",
+ " 2019-07-16 16:28:38.14hr 32min ago \n",
+ " 35.92 \n",
+ " N \n",
+ " 117.68 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 40 \n",
+ " 2019-07-16 16:27:59.04hr 33min ago \n",
+ " 18.51 \n",
+ " S \n",
+ " 120.55 \n",
+ " E \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 41 \n",
+ " 2019-07-16 16:26:00.54hr 35min ago \n",
+ " 30.57 \n",
+ " N \n",
+ " 141.98 \n",
+ " E \n",
+ " 4.8 \n",
+ " \n",
+ " \n",
+ " 42 \n",
+ " 2019-07-16 16:21:05.14hr 39min ago \n",
+ " 28.45 \n",
+ " N \n",
+ " 56.70 \n",
+ " E \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 43 \n",
+ " 2019-07-16 16:01:04.34hr 59min ago \n",
+ " 62.22 \n",
+ " N \n",
+ " 150.00 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 44 \n",
+ " 2019-07-16 15:59:52.05hr 01min ago \n",
+ " 35.81 \n",
+ " N \n",
+ " 117.63 \n",
+ " W \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 45 \n",
+ " 2019-07-16 15:57:04.45hr 03min ago \n",
+ " 36.10 \n",
+ " N \n",
+ " 117.82 \n",
+ " W \n",
+ " 3.0 \n",
+ " \n",
+ " \n",
+ " 46 \n",
+ " 2019-07-16 15:28:58.05hr 32min ago \n",
+ " 18.26 \n",
+ " S \n",
+ " 120.37 \n",
+ " E \n",
+ " 3.3 \n",
+ " \n",
+ " \n",
+ " 47 \n",
+ " 2019-07-16 15:26:26.65hr 34min ago \n",
+ " 35.68 \n",
+ " N \n",
+ " 117.51 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 48 \n",
+ " 2019-07-16 15:25:42.65hr 35min ago \n",
+ " 37.16 \n",
+ " N \n",
+ " 141.54 \n",
+ " E \n",
+ " 4.6 \n",
+ " \n",
+ " \n",
+ " 49 \n",
+ " 2019-07-16 15:13:22.15hr 47min ago \n",
+ " 35.90 \n",
+ " N \n",
+ " 117.67 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 50 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " 51 \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " \n",
+ " \n",
+ " 52 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date & Time UTC Latitude degrees \\\n",
+ " 12345678910» 12345678910» 12345678910».1 \n",
+ "0 2019-07-16 20:52:18.608min ago 36.07 N \n",
+ "1 2019-07-16 20:49:09.111min ago 36.07 N \n",
+ "2 2019-07-16 20:33:52.927min ago 40.09 N \n",
+ "3 2019-07-16 20:31:33.329min ago 23.45 S \n",
+ "4 2019-07-16 20:29:07.731min ago 35.86 N \n",
+ "5 2019-07-16 20:23:34.737min ago 36.07 N \n",
+ "6 2019-07-16 20:19:00.142min ago 33.10 N \n",
+ "7 2019-07-16 20:17:51.643min ago 35.55 N \n",
+ "8 2019-07-16 20:15:36.845min ago 35.78 N \n",
+ "9 2019-07-16 20:11:01.550min ago 37.82 N \n",
+ "10 2019-07-16 19:42:25.91hr 18min ago 35.61 N \n",
+ "11 2019-07-16 19:35:57.01hr 25min ago 35.62 N \n",
+ "12 2019-07-16 19:23:50.11hr 37min ago 36.19 N \n",
+ "13 2019-07-16 19:20:21.41hr 40min ago 38.39 N \n",
+ "14 2019-07-16 19:16:53.81hr 44min ago 38.45 N \n",
+ "15 2019-07-16 19:16:15.91hr 44min ago 61.27 N \n",
+ "16 2019-07-16 19:11:48.91hr 49min ago 36.03 N \n",
+ "17 2019-07-16 19:04:00.21hr 57min ago 35.96 N \n",
+ "18 2019-07-16 19:01:48.01hr 59min ago 39.56 N \n",
+ "19 2019-07-16 19:01:00.82hr 00min ago 35.68 N \n",
+ "20 2019-07-16 18:53:32.02hr 07min ago 0.68 S \n",
+ "21 2019-07-16 18:50:16.22hr 10min ago 43.62 N \n",
+ "22 2019-07-16 18:47:48.92hr 13min ago 35.59 N \n",
+ "23 2019-07-16 18:36:26.82hr 24min ago 35.74 N \n",
+ "24 2019-07-16 18:22:31.92hr 38min ago 35.65 N \n",
+ "25 2019-07-16 18:15:26.52hr 45min ago 28.47 N \n",
+ "26 2019-07-16 18:10:01.02hr 51min ago 34.41 S \n",
+ "27 2019-07-16 17:48:24.03hr 12min ago 9.93 S \n",
+ "28 2019-07-16 17:42:29.93hr 18min ago 35.67 N \n",
+ "29 2019-07-16 17:39:43.03hr 21min ago 0.54 S \n",
+ "30 2019-07-16 17:31:56.03hr 29min ago 35.67 N \n",
+ "31 2019-07-16 17:05:45.03hr 55min ago 36.20 N \n",
+ "32 2019-07-16 17:05:08.03hr 55min ago 15.40 N \n",
+ "33 2019-07-16 17:01:30.83hr 59min ago 36.10 N \n",
+ "34 2019-07-16 16:45:56.54hr 15min ago 0.68 S \n",
+ "35 2019-07-16 16:45:55.04hr 15min ago 18.99 N \n",
+ "36 2019-07-16 16:43:40.94hr 17min ago 37.23 N \n",
+ "37 2019-07-16 16:43:21.74hr 17min ago 36.03 N \n",
+ "38 2019-07-16 16:36:41.54hr 24min ago 35.96 N \n",
+ "39 2019-07-16 16:28:38.14hr 32min ago 35.92 N \n",
+ "40 2019-07-16 16:27:59.04hr 33min ago 18.51 S \n",
+ "41 2019-07-16 16:26:00.54hr 35min ago 30.57 N \n",
+ "42 2019-07-16 16:21:05.14hr 39min ago 28.45 N \n",
+ "43 2019-07-16 16:01:04.34hr 59min ago 62.22 N \n",
+ "44 2019-07-16 15:59:52.05hr 01min ago 35.81 N \n",
+ "45 2019-07-16 15:57:04.45hr 03min ago 36.10 N \n",
+ "46 2019-07-16 15:28:58.05hr 32min ago 18.26 S \n",
+ "47 2019-07-16 15:26:26.65hr 34min ago 35.68 N \n",
+ "48 2019-07-16 15:25:42.65hr 35min ago 37.16 N \n",
+ "49 2019-07-16 15:13:22.15hr 47min ago 35.90 N \n",
+ "50 NaN NaN NaN \n",
+ "51 12345678910» 12345678910» 12345678910» \n",
+ "52 NaN NaN NaN \n",
+ "\n",
+ " Longitude degrees Region name [+] \n",
+ " 12345678910» 12345678910».1 12345678910» \n",
+ "0 117.84 W 2.0 \n",
+ "1 117.65 W 2.0 \n",
+ "2 19.91 E 2.7 \n",
+ "3 66.86 W 4.5 \n",
+ "4 117.69 W 2.2 \n",
+ "5 117.84 W 2.0 \n",
+ "6 12.42 W 2.8 \n",
+ "7 117.43 W 2.8 \n",
+ "8 117.62 W 4.5 \n",
+ "9 121.77 W 4.3 \n",
+ "10 117.47 W 2.3 \n",
+ "11 117.45 W 2.1 \n",
+ "12 117.89 W 2.7 \n",
+ "13 16.94 E 3.1 \n",
+ "14 16.91 E 2.6 \n",
+ "15 152.44 W 2.4 \n",
+ "16 117.87 W 2.5 \n",
+ "17 117.71 W 2.0 \n",
+ "18 67.17 E 3.6 \n",
+ "19 117.54 W 2.5 \n",
+ "20 126.36 E 4.0 \n",
+ "21 75.40 E 3.2 \n",
+ "22 117.42 W 2.1 \n",
+ "23 117.56 W 2.7 \n",
+ "24 117.52 W 2.0 \n",
+ "25 56.76 E 4.3 \n",
+ "26 150.73 E 2.4 \n",
+ "27 118.23 E 4.1 \n",
+ "28 117.54 W 2.0 \n",
+ "29 127.86 E 4.1 \n",
+ "30 117.47 W 2.3 \n",
+ "31 117.90 W 2.9 \n",
+ "32 94.64 W 4.2 \n",
+ "33 117.90 W 2.1 \n",
+ "34 127.58 E 4.7 \n",
+ "35 70.09 W 2.9 \n",
+ "36 28.27 E 2.7 \n",
+ "37 117.79 W 2.4 \n",
+ "38 117.30 W 2.1 \n",
+ "39 117.68 W 2.3 \n",
+ "40 120.55 E 2.9 \n",
+ "41 141.98 E 4.8 \n",
+ "42 56.70 E 4.5 \n",
+ "43 150.00 W 2.3 \n",
+ "44 117.63 W 2.9 \n",
+ "45 117.82 W 3.0 \n",
+ "46 120.37 E 3.3 \n",
+ "47 117.51 W 2.0 \n",
+ "48 141.54 E 4.6 \n",
+ "49 117.67 W 2.1 \n",
+ "50 NaN NaN NaN \n",
+ "51 12345678910» 12345678910» 12345678910» \n",
+ "52 NaN NaN NaN "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.emsc-csem.org/Earthquake/'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "url = 'https://www.emsc-csem.org/Earthquake/'\n",
+ "html = requests.get(url).content\n",
+ "df_list = pd.read_html(html)\n",
+ "df_list[3][['Date & Time UTC','Latitude degrees','Longitude degrees','Region name [+]']]"
]
},
{
@@ -308,21 +5111,88 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Code Geist Hackathon by SefrWahed \n",
+ " [7/29/2019] \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " The Code Factor \n",
+ " [5/21/2019] \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " TECHFEST MUNICH \n",
+ " [9/6/2019] \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Galileo App Competition \n",
+ " [1/31/2019] \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1\n",
+ "0 Code Geist Hackathon by SefrWahed [7/29/2019]\n",
+ "1 The Code Factor [5/21/2019]\n",
+ "2 TECHFEST MUNICH [9/6/2019]\n",
+ "3 Galileo App Competition [1/31/2019]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://hackevents.co/hackathons'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "import re\n",
+ "\n",
+ "url ='https://hackevents.co/hackathons'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html5lib')\n",
+ "\n",
+ "hacks_titles = soup_html.find_all('h5',{'class':'card-title'})\n",
+ "hacks_fechas = soup_html.find_all('p',{'class':'card-text'})\n",
+ "\n",
+ "hacks_list = [[j.text,re.findall(r'\\d+/\\d+/\\d+',hacks_fechas[i].text)] for i,j in enumerate(hacks_titles)]\n",
+ "#hacks_list\n",
+ "\n",
+ "df = pd.DataFrame(hacks_list)\n",
+ "df"
]
},
{
@@ -342,22 +5212,47 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Ingresa tu usuario de twitter: paolalean\n"
+ ]
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
+ "usuario = input('Ingresa tu usuario de twitter: ')\n",
+ "url = f'https://twitter.com/{usuario}'\n",
+ "#para prueba de cuenta sin tweets usar la de victor: vik54076455"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "307\n"
+ ]
+ }
+ ],
"source": [
- "#your code"
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "tweets = soup_html.find_all('span',{'class':'ProfileNav-value'})\n",
+ "\n",
+ "try:\n",
+ " print(int(tweets[0].text))\n",
+ "except:\n",
+ " print(\"no tienes tweets\")\n"
]
},
{
@@ -377,22 +5272,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Ingresa tu usuario de twitter: paolalean\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tienes la increíble cantidad de 27 followers\n"
+ ]
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "usuario = input('Ingresa tu usuario de twitter: ')\n",
+ "url = f'https://twitter.com/{usuario}'\n",
+ "\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "tweets = soup_html.find_all('span',{'class':'ProfileNav-value'})\n",
+ "\n",
+ "try:\n",
+ " print(f\"Tienes la increíble cantidad de {int(tweets[1].text)} followers\")\n",
+ "except:\n",
+ " print(\"Sin followers\")"
]
},
{
@@ -404,12 +5315,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[['The Free Encyclopedia', '5892000'],\n",
+ " ['English', '1159000'],\n",
+ " ['日本語', '1532000'],\n",
+ " ['Español', '2323000'],\n",
+ " ['Deutsch', '1556000'],\n",
+ " ['Русский', '2123000'],\n",
+ " ['Français', '1541000'],\n",
+ " ['Italiano', '1065000'],\n",
+ " ['中文', '1010000'],\n",
+ " ['Português', '1346000'],\n",
+ " ['Polski', '1000000'],\n",
+ " ['Wikipedia apps are now available:', '100000']]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.wikipedia.org/'"
+ "url = 'https://www.wikipedia.org/'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "language = soup_html.find_all('strong')\n",
+ "number = soup_html.find_all('bdi')\n",
+ "\n",
+ "lista = [[j.text,''.join(re.findall('\\d+', number[i].text))] for i,j in enumerate(language)]\n",
+ "lista"
]
},
{
@@ -430,21 +5371,40 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 52,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Business and economy',\n",
+ " 'Crime and justice',\n",
+ " 'Defence',\n",
+ " 'Education',\n",
+ " 'Environment',\n",
+ " 'Government',\n",
+ " 'Government spending',\n",
+ " 'Health',\n",
+ " 'Mapping',\n",
+ " 'Society',\n",
+ " 'Towns and cities',\n",
+ " 'Transport']"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://data.gov.uk/'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code "
+ "url = 'https://data.gov.uk/'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "datasets = soup_html.find_all('h2')\n",
+ "datas = [d.text for d in datasets]\n",
+ "datas"
]
},
{
@@ -456,12 +5416,135 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 57,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Rank \n",
+ " Language \n",
+ " Speakers(millions) \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " — \n",
+ " Chinese (macrolanguage) \n",
+ " 1311.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " Mandarin \n",
+ " 918.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2 \n",
+ " Spanish \n",
+ " 460.0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 3 \n",
+ " English \n",
+ " 379.0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 4 \n",
+ " Hindi \n",
+ " 341.0 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " — \n",
+ " Arabic (macrolanguage) \n",
+ " 319.0 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 5 \n",
+ " Bengali \n",
+ " 228.0 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 6 \n",
+ " Portuguese \n",
+ " 221.0 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 7 \n",
+ " Russian \n",
+ " 154.0 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 8 \n",
+ " Japanese \n",
+ " 128.0 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " — \n",
+ " Lahnda (macrolanguage) \n",
+ " 119.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Rank Language Speakers(millions)\n",
+ "0 — Chinese (macrolanguage) 1311.0\n",
+ "1 1 Mandarin 918.0\n",
+ "2 2 Spanish 460.0\n",
+ "3 3 English 379.0\n",
+ "4 4 Hindi 341.0\n",
+ "5 — Arabic (macrolanguage) 319.0\n",
+ "6 5 Bengali 228.0\n",
+ "7 6 Portuguese 221.0\n",
+ "8 7 Russian 154.0\n",
+ "9 8 Japanese 128.0\n",
+ "10 — Lahnda (macrolanguage) 119.0"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'"
+ "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'\n",
+ "html = requests.get(url).content\n",
+ "\n",
+ "df_read_tables = pd.read_html(html)\n",
+ "df_read_tables[0][['Rank','Language','Speakers(millions)']].head(11)\n"
]
},
{
@@ -608,5 +5691,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 812f7a4..5a88557 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -40,9 +40,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: bs4 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (0.0.1)\n",
+ "Requirement already satisfied: requests in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (2.22.0)\n",
+ "Requirement already satisfied: pandas in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (0.24.2)\n",
+ "Requirement already satisfied: html5lib in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (1.0.1)\n",
+ "Requirement already satisfied: lxml in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.3.4)\n",
+ "Requirement already satisfied: beautifulsoup4 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from bs4) (4.7.1)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (1.25.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (2019.6.16)\n",
+ "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (2.8)\n",
+ "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (3.0.4)\n",
+ "Requirement already satisfied: numpy>=1.12.0 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from pandas) (1.16.4)\n",
+ "Requirement already satisfied: pytz>=2011k in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from pandas) (2019.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.5.0 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from pandas) (2.8.0)\n",
+ "Requirement already satisfied: webencodings in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from html5lib) (0.5.1)\n",
+ "Requirement already satisfied: six>=1.9 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from html5lib) (1.12.0)\n",
+ "Requirement already satisfied: soupsieve>=1.2 in c:\\users\\paola\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from beautifulsoup4->bs4) (1.9.2)\n"
+ ]
+ }
+ ],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
@@ -52,9 +75,11 @@
"# from lxml.html import fromstring\n",
"# import urllib.request\n",
"# from urllib.request import urlopen\n",
- "# import random\n",
+ "import random\n",
"# import re\n",
- "# import scrapy"
+ "#import scrapy\n",
+ "import sys\n",
+ "!{sys.executable} -m pip install bs4 requests pandas html5lib lxml"
]
},
{
@@ -66,12 +91,3793 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://github.com/trending/developers'"
+ "url = 'https://github.com/trending/developers'\n",
+ "html = requests.get(url).content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "Trending developers on GitHub today · GitHub \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
Trending \n",
+ "
\n",
+ " These are the\n",
+ " developers\n",
+ " building the hot tools today.\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ " 1\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "ericmjl \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " For PyCon, PyData, ODSC, and beyond!\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 2\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "bfred-it \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "📱 Make videos playable inline on the iPhone (prevents automatic fullscreen)\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 3\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "iRoachie \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Material Design implementation of Tabs\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 4\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " wav2c makes arrays out of sounds - useful for Arduino SMAPLER v2 (and for GameBoy Advance) etc\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 5\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "sobolevn \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A curated list of cryptography resources and links.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 6\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "fthomas \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Simple refinement types for Scala\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 7\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "syuilo \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "🌎 A federated blogging platform 🚀 \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 8\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "CompuIves \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 9\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "balloob \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Library for Python 3 to communicate with the Google Chromecast.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 10\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "sdras \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A curated list of awesome actions to use on GitHub\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 11\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "skmp \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Module to export Word, Excel & PowerPoint to PDF. Requires windows and installed office 2013\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 12\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "hovancik \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " break time reminder app\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 13\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "amueller \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A little word cloud generator in Python\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 14\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A process viewer GUI in rust\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 15\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "mholt \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Fast and powerful CSV (delimited text) parser that gracefully handles large files and malformed input\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 16\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " PicoRV32 - A Size-Optimized RISC-V CPU\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 17\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "frenck \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A curated list of amazingly awesome Home Assistant resources.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 18\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "unixorn \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A collection of ZSH frameworks, plugins & themes inspired by the various awesome list collections out there.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 19\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "aneagoie \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Tutorial for udemy course - React\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 20\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "randombit \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Crypto and TLS for Modern C++\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 21\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "rauchg \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Public Slack organizations made easy\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 22\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "tgriesser \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " A query builder for PostgreSQL, MySQL and SQLite3, designed to be flexible, portable, and fun to use.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 23\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "vtjnash \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Posix-compliant file name pattern matching\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 24\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "andersy005 \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Deep Learning Specialization by Andrew Ng on Coursera.\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "\n",
+ " 25\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "djrtwo \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " Gas Costs from Ethereum Yellow Paper\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Follow \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " You can’t perform that action at this time.\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
You signed in with another tab or window. Reload to refresh your session. \n",
+ "
You signed out in another tab or window. Reload to refresh your session. \n",
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "
\n",
+ "\n",
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#printing the content from the Trending Developers\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "soup_html"
]
},
{
@@ -79,9 +3885,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "#your code"
- ]
+ "source": []
},
{
"cell_type": "markdown",
@@ -134,11 +3938,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Eric Ma',\n",
+ " 'Federico Brigante',\n",
+ " 'Kyle Roach',\n",
+ " 'Olle Jonsson',\n",
+ " 'Nikita Sobolev',\n",
+ " 'Frank S. Thomas',\n",
+ " 'syuilo',\n",
+ " 'Ives van Hoorne',\n",
+ " 'Paulus Schoutsen',\n",
+ " 'Sarah Drasner',\n",
+ " 'Stefanos Kornilios Mitsis Poiitidis',\n",
+ " 'Jan Hovancik',\n",
+ " 'Andreas Mueller',\n",
+ " 'Guillaume Gomez',\n",
+ " 'Matt Holt',\n",
+ " 'Clifford Wolf',\n",
+ " 'Franck Nijhof',\n",
+ " 'Joe Block',\n",
+ " 'Andrei Neagoie',\n",
+ " 'Jack Lloyd',\n",
+ " 'Guillermo Rauch',\n",
+ " 'Tim Griesser',\n",
+ " 'Jameson Nash',\n",
+ " 'Anderson Banihirwe',\n",
+ " 'Danny Ryan']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#To reach the names\n",
+ "tags = ['h1'] #esto es una lista!\n",
+ "texto = soup_html.find_all(tags,{'class':'h3 lh-condensed'})\n",
+ "\n",
+ "#Método largo\n",
+ "#names = []\n",
+ "#for t in texto:\n",
+ "# names.append(t.text)\n",
+ "#names \n",
+ " \n",
+ "#list comprenhension\n",
+ "names = [t.text for t in texto]\n",
+ "names"
]
},
{
@@ -152,7 +4003,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -162,37 +4013,148 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 9,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "b'\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n Trending Python repositories on GitHub today \\xc2\\xb7 GitHub \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n\\n\\n \\n\\n \\n\\n \\n \\n \\n\\n \\n\\n\\n\\n\\n \\n\\n\\n \\n\\n\\n\\n \\n\\n \\n \\n\\n \\n \\n\\n \\n\\n \\n\\n \\n \\n\\n \\n\\n\\n \\n\\n\\n \\n\\n \\n\\n \\n \\n\\n \\n\\n\\n\\n\\n\\n \\n\\n \\n\\n \\n \\n\\n \\n\\n
\\n\\n\\n \\n\\n
\\n\\n\\n\\n \\n
\\n\\n\\n\\n\\n
\\n
Trending \\n
See what the GitHub community is most excited about today.
\\n
\\n
\\n\\n\\n\\n
\\n \\n
\\n
\\n \\n\\n \\n\\n \\n Comprehensive Python Cheatsheet\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Fully automated offensive security framework for reconnaissance and vulnerability scanning\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 1,189\\n \\n\\n
\\n \\n 211\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n\\n
\\n \\n 188 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Ludwig is a toolbox built on top of TensorFlow that allows to train and test deep learning models without the need to write code.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 5,146\\n \\n\\n
\\n \\n 547\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 74 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Official python implementation for \"A Baseline for 3D Multi-Object Tracking\"\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n StyleGAN - Official TensorFlow Implementation\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Dagster is an open-source system for building data applications.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 507\\n \\n\\n
\\n \\n 31\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 24 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Models and examples built with TensorFlow\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n text detection mainly based on ctpn model in tensorflow, id card detect, connectionist text proposal network\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n \\n\\xf0\\x9f\\x94\\x8e Find usernames across social networks\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 5,022\\n \\n\\n
\\n \\n 414\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 273 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Deepfakes Software For All\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n pytorch implementation for \"Deep Flow-Guided Video Inpainting\"(CVPR\\'19)\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n BCC - Tools for BPF-based Linux IO analysis, networking, monitoring, and more\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Crypto Trading Bots in Python - Triangular Arbitrage, Beginner & Advanced Cryptocurrency Trading Bots Written in Python\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Deep Learning Examples\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 930\\n \\n\\n
\\n \\n 276\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 41 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Putting TensorFlow back in PyTorch, back in TensorFlow (differentiable TensorFlow PyTorch adapters).\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Text recognition (optical character recognition) with deep learning methods.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 321\\n \\n\\n
\\n \\n 92\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n\\n
\\n \\n 18 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Unofficial PyTorch implementation of BlazeFace\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n A library for encrypted, privacy preserving deep learning\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 3,542\\n \\n\\n
\\n \\n 790\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n \\n\\n
\\n \\n 8 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n A collective list of free APIs for use in software and web development.\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n \\xe3\\x80\\x8a\\xe5\\x8a\\xa8\\xe6\\x89\\x8b\\xe5\\xad\\xa6\\xe6\\xb7\\xb1\\xe5\\xba\\xa6\\xe5\\xad\\xa6\\xe4\\xb9\\xa0\\xe3\\x80\\x8b\\xef\\xbc\\x9a\\xe9\\x9d\\xa2\\xe5\\x90\\x91\\xe4\\xb8\\xad\\xe6\\x96\\x87\\xe8\\xaf\\xbb\\xe8\\x80\\x85\\xe3\\x80\\x81\\xe8\\x83\\xbd\\xe8\\xbf\\x90\\xe8\\xa1\\x8c\\xe3\\x80\\x81\\xe5\\x8f\\xaf\\xe8\\xae\\xa8\\xe8\\xae\\xba\\xe3\\x80\\x82\\xe8\\x8b\\xb1\\xe6\\x96\\x87\\xe7\\x89\\x88\\xe5\\x8d\\xb3\\xe4\\xbc\\xaf\\xe5\\x85\\x8b\\xe5\\x88\\xa9\\xe2\\x80\\x9c\\xe6\\xb7\\xb1\\xe5\\xba\\xa6\\xe5\\xad\\xa6\\xe4\\xb9\\xa0\\xe5\\xaf\\xbc\\xe8\\xae\\xba\\xef\\xbc\\x88STAT 157\\xef\\xbc\\x89\\xe2\\x80\\x9d\\xe6\\x95\\x99\\xe6\\x9d\\x90\\xe3\\x80\\x82\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Apache Airflow\\n
\\n\\n \\n \\n\\n
\\n \\n\\n \\n\\n \\n Project thu th\\xe1\\xba\\xadp \\xc4\\x91i\\xe1\\xbb\\x83m chu\\xe1\\xba\\xa9n \\xc4\\x91\\xe1\\xba\\xa1i h\\xe1\\xbb\\x8dc 2014 - 2018 v\\xc3\\xa0 ph\\xc3\\xa2n t\\xc3\\xadch d\\xe1\\xbb\\xaf li\\xe1\\xbb\\x87u\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 61\\n \\n\\n
\\n \\n 12\\n \\n\\n \\n
\\n Built by\\n \\n \\n\\n
\\n \\n 5 stars today\\n \\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n Advanced information gathering & OSINT tool for phone numbers.\\n
\\n\\n \\n
\\n \\n Python \\n \\n\\n\\n
\\n \\n 631\\n \\n\\n
\\n \\n 149\\n \\n\\n \\n
\\n Built by\\n \\n \\n \\n \\n \\n\\n
\\n \\n 10 stars today\\n \\n
\\n \\n\\n
\\n
\\n
\\n \\n
\\n\\n
\\n\\n \\n\\n\\n\\n\\n \\n
\\n
\\n \\n \\n You can\\xe2\\x80\\x99t perform that action at this time.\\n
\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n
\\n
You signed in with another tab or window. Reload to refresh your session. \\n
You signed out in another tab or window. Reload to refresh your session. \\n
\\n \\n \\n \\n \\n \\n \\n \\n
\\n \\n \\n \\n\\n \\n\\n
\\n\\n \\n\\n\\n'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "html = requests.get(url).content\n",
+ "html"
]
},
{
- "cell_type": "markdown",
- "metadata": {},
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['gto76/python-cheatsheet',\n",
+ " 'j3ssie/Osmedeus',\n",
+ " 'tangzixiang0304/Shielded_detector',\n",
+ " 'uber/ludwig',\n",
+ " 'xinshuoweng/AB3DMOT',\n",
+ " 'NVlabs/stylegan',\n",
+ " 'dagster-io/dagster',\n",
+ " 'tensorflow/models',\n",
+ " 'eragonruan/text-detection-ctpn',\n",
+ " 'sherlock-project/sherlock',\n",
+ " 'deepfakes/faceswap',\n",
+ " 'nbei/Deep-Flow-Guided-Video-Inpainting',\n",
+ " 'iovisor/bcc',\n",
+ " 'Roibal/Cryptocurrency-Trading-Bots-Python-Beginner-Advance',\n",
+ " 'NVIDIA/DeepLearningExamples',\n",
+ " 'BlackHC/tfpyth',\n",
+ " 'clovaai/deep-text-recognition-benchmark',\n",
+ " 'tkat0/PyTorch_BlazeFace',\n",
+ " 'OpenMined/PySyft',\n",
+ " 'CoreyMSchafer/code_snippets',\n",
+ " 'public-apis/public-apis',\n",
+ " 'd2l-ai/d2l-zh',\n",
+ " 'apache/airflow',\n",
+ " 'beecost/bee-university',\n",
+ " 'sundowndev/PhoneInfoga']"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#### Display all the image links from Walt Disney wikipedia page"
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "tags = ['h1']\n",
+ "texto = soup_html.find_all(tags,{'class':'h3 lh-condensed'})\n",
+ "\n",
+ "names = [t.text.replace('\\n','').replace(' ','') for t in texto]\n",
+ "names\n",
+ "\n"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/Walt_Disney'"
+ "#### Display all the image links from Walt Disney wikipedia page"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 11,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/30px-Animation_disc.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/29px-P_vip.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/24px-Magic_Kingdom_castle.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/30px-Video-x-generic.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/30px-Flag_of_Los_Angeles_County%2C_California.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/USA_flag_on_television.svg/30px-USA_flag_on_television.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/22px-Commons-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/25px-Wikiquote-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/30px-Wikidata-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1',\n",
+ " '/static/images/wikimedia-button.png',\n",
+ " '/static/images/poweredby_mediawiki_88x31.png']"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "# This is the url you will scrape in this exercise\n",
+ "url = 'https://en.wikipedia.org/wiki/Walt_Disney'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "tags = ['img']\n",
+ "images = soup_html.find_all(tags)\n",
+ "\n",
+ "image_list = [i.attrs['src'] for i in images]\n",
+ "image_list"
]
},
{
@@ -204,21 +4166,189 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://en.wikipedia.org/wiki/Python' "
+ "url ='https://en.wikipedia.org/wiki/Python'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 13,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['https://en.wiktionary.org/wiki/Python',\n",
+ " 'https://en.wiktionary.org/wiki/python',\n",
+ " '/w/index.php?title=Python&action=edit§ion=1',\n",
+ " '/wiki/Pythonidae',\n",
+ " '/wiki/Python_(genus)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=2',\n",
+ " '/wiki/Python_(mythology)',\n",
+ " '/wiki/Python_of_Aenus',\n",
+ " '/wiki/Python_(painter)',\n",
+ " '/wiki/Python_of_Byzantium',\n",
+ " '/wiki/Python_of_Catana',\n",
+ " '/w/index.php?title=Python&action=edit§ion=3',\n",
+ " '/wiki/Python_(film)',\n",
+ " '/wiki/Pythons_2',\n",
+ " '/wiki/Monty_Python',\n",
+ " '/wiki/Python_(Monty)_Pictures',\n",
+ " '/w/index.php?title=Python&action=edit§ion=4',\n",
+ " '/wiki/Python_(programming_language)',\n",
+ " '/wiki/CPython',\n",
+ " '/wiki/CMU_Common_Lisp',\n",
+ " '/wiki/PERQ#PERQ_3',\n",
+ " '/w/index.php?title=Python&action=edit§ion=5',\n",
+ " '/w/index.php?title=Python&action=edit§ion=6',\n",
+ " '/wiki/Python_(Busch_Gardens_Tampa_Bay)',\n",
+ " '/wiki/Python_(Coney_Island,_Cincinnati,_Ohio)',\n",
+ " '/wiki/Python_(Efteling)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=7',\n",
+ " '/wiki/Python_(automobile_maker)',\n",
+ " '/wiki/Python_(Ford_prototype)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=8',\n",
+ " '/wiki/Colt_Python',\n",
+ " '/wiki/Python_(missile)',\n",
+ " '/wiki/Python_(nuclear_primary)',\n",
+ " '/w/index.php?title=Python&action=edit§ion=9',\n",
+ " '/wiki/Python_Anghelo',\n",
+ " '/w/index.php?title=Python&action=edit§ion=10',\n",
+ " '/wiki/PYTHON',\n",
+ " '/w/index.php?title=Python&action=edit§ion=11',\n",
+ " '/wiki/Cython',\n",
+ " '/wiki/Pyton',\n",
+ " '/wiki/File:Disambig_gray.svg',\n",
+ " '/wiki/Help:Disambiguation',\n",
+ " '//en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Python&namespace=0',\n",
+ " 'https://en.wikipedia.org/w/index.php?title=Python&oldid=905477736',\n",
+ " '/wiki/Help:Category',\n",
+ " '/wiki/Category:Disambiguation_pages',\n",
+ " '/wiki/Category:Disambiguation_pages_with_short_description',\n",
+ " '/wiki/Category:All_article_disambiguation_pages',\n",
+ " '/wiki/Category:All_disambiguation_pages',\n",
+ " '/wiki/Category:Animal_common_name_disambiguation_pages',\n",
+ " '/wiki/Special:MyTalk',\n",
+ " '/wiki/Special:MyContributions',\n",
+ " '/w/index.php?title=Special:CreateAccount&returnto=Python',\n",
+ " '/w/index.php?title=Special:UserLogin&returnto=Python',\n",
+ " '/wiki/Python',\n",
+ " '/wiki/Talk:Python',\n",
+ " '/wiki/Python',\n",
+ " '/w/index.php?title=Python&action=edit',\n",
+ " '/w/index.php?title=Python&action=history',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Portal:Contents',\n",
+ " '/wiki/Portal:Featured_content',\n",
+ " '/wiki/Portal:Current_events',\n",
+ " '/wiki/Special:Random',\n",
+ " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n",
+ " '//shop.wikimedia.org',\n",
+ " '/wiki/Help:Contents',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:Community_portal',\n",
+ " '/wiki/Special:RecentChanges',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n",
+ " '/wiki/Special:WhatLinksHere/Python',\n",
+ " '/wiki/Special:RecentChangesLinked/Python',\n",
+ " '/wiki/Wikipedia:File_Upload_Wizard',\n",
+ " '/wiki/Special:SpecialPages',\n",
+ " '/w/index.php?title=Python&oldid=905477736',\n",
+ " '/w/index.php?title=Python&action=info',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452',\n",
+ " '/w/index.php?title=Special:CiteThisPage&page=Python&id=905477736',\n",
+ " 'https://commons.wikimedia.org/wiki/Category:Python',\n",
+ " '/w/index.php?title=Special:Book&bookcmd=book_creator&referer=Python',\n",
+ " '/w/index.php?title=Special:ElectronPdf&page=Python&action=show-download-screen',\n",
+ " '/w/index.php?title=Python&printable=yes',\n",
+ " 'https://af.wikipedia.org/wiki/Python',\n",
+ " 'https://als.wikipedia.org/wiki/Python',\n",
+ " 'https://az.wikipedia.org/wiki/Python',\n",
+ " 'https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)',\n",
+ " 'https://be.wikipedia.org/wiki/Python',\n",
+ " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)',\n",
+ " 'https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)',\n",
+ " 'https://da.wikipedia.org/wiki/Python',\n",
+ " 'https://de.wikipedia.org/wiki/Python',\n",
+ " 'https://eo.wikipedia.org/wiki/Pitono_(apartigilo)',\n",
+ " 'https://eu.wikipedia.org/wiki/Python_(argipena)',\n",
+ " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86',\n",
+ " 'https://fr.wikipedia.org/wiki/Python',\n",
+ " 'https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0',\n",
+ " 'https://hr.wikipedia.org/wiki/Python_(razdvojba)',\n",
+ " 'https://io.wikipedia.org/wiki/Pitono',\n",
+ " 'https://id.wikipedia.org/wiki/Python',\n",
+ " 'https://ia.wikipedia.org/wiki/Python_(disambiguation)',\n",
+ " 'https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)',\n",
+ " 'https://it.wikipedia.org/wiki/Python_(disambigua)',\n",
+ " 'https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F',\n",
+ " 'https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)',\n",
+ " 'https://kg.wikipedia.org/wiki/Mboma_(nyoka)',\n",
+ " 'https://la.wikipedia.org/wiki/Python_(discretiva)',\n",
+ " 'https://lb.wikipedia.org/wiki/Python',\n",
+ " 'https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)',\n",
+ " 'https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)',\n",
+ " 'https://nl.wikipedia.org/wiki/Python',\n",
+ " 'https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3',\n",
+ " 'https://no.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pl.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)',\n",
+ " 'https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)',\n",
+ " 'https://sd.wikipedia.org/wiki/%D8%A7%D8%B1%DA%99',\n",
+ " 'https://sk.wikipedia.org/wiki/Python',\n",
+ " 'https://sh.wikipedia.org/wiki/Python',\n",
+ " 'https://fi.wikipedia.org/wiki/Python',\n",
+ " 'https://sv.wikipedia.org/wiki/Pyton',\n",
+ " 'https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99',\n",
+ " 'https://tr.wikipedia.org/wiki/Python',\n",
+ " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n",
+ " 'https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86',\n",
+ " 'https://vi.wikipedia.org/wiki/Python',\n",
+ " 'https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License',\n",
+ " '//creativecommons.org/licenses/by-sa/3.0/',\n",
+ " '//foundation.wikimedia.org/wiki/Terms_of_Use',\n",
+ " '//foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " '//www.wikimediafoundation.org/',\n",
+ " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:General_disclaimer',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n",
+ " 'https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute',\n",
+ " 'https://foundation.wikimedia.org/wiki/Cookie_statement',\n",
+ " '//en.m.wikipedia.org/w/index.php?title=Python&mobileaction=toggle_view_mobile',\n",
+ " 'https://wikimediafoundation.org/',\n",
+ " 'https://www.mediawiki.org/']"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "wiki_links = soup_html.find_all('a',{'href':True})\n",
+ "\n",
+ "wiki_links_list = [w['href'] for w in wiki_links if not w['href'].startswith('#')]\n",
+ "wiki_links_list\n",
+ "\n",
+ "#wiki_links_list = []\n",
+ "#for i in wiki_links:\n",
+ "# try:\n",
+ "# wiki_links_list.append(i.attrs['href'])\n",
+ "# except:\n",
+ "# pass\n",
+ "#wiki_links"
]
},
{
@@ -230,21 +4360,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Title 2 - The Congress',\n",
+ " 'Title 6 - Domestic Security',\n",
+ " 'Title 7 - Agriculture',\n",
+ " 'Title 15 - Commerce and Trade',\n",
+ " 'Title 16 - Conservation',\n",
+ " 'Title 19 - Customs Duties',\n",
+ " 'Title 21 - Food and Drugs',\n",
+ " 'Title 26 - Internal Revenue Code',\n",
+ " 'Title 34 - Crime Control and Law Enforcement',\n",
+ " \"Title 38 - Veterans' Benefits\",\n",
+ " 'Title 42 - The Public Health and Welfare',\n",
+ " 'Title 43 - Public Lands',\n",
+ " 'Title 48 - Territories and Insular Possessions',\n",
+ " 'Title 49 - Transportation',\n",
+ " 'Title 50 - War and National Defense']"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'http://uscode.house.gov/download/download.shtml'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "url = 'http://uscode.house.gov/download/download.shtml'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "titles = soup_html.find_all('div',{'class':'usctitlechanged'})\n",
+ "\n",
+ "titles_list = [t.text.replace('\\n\\n ','').replace('\\n\\n ','') for t in titles]\n",
+ "titles_list"
]
},
{
@@ -256,21 +4409,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ALEJANDRO ROSALES CASTILLO',\n",
+ " 'YASER ABDEL SAID',\n",
+ " 'JASON DEREK BROWN',\n",
+ " 'RAFAEL CARO-QUINTERO',\n",
+ " 'ALEXIS FLORES',\n",
+ " 'EUGENE PALMER',\n",
+ " 'SANTIAGO VILLALBA MEDEROS',\n",
+ " 'ROBERT WILLIAM FISHER',\n",
+ " 'BHADRESHKUMAR CHETANBHAI PATEL',\n",
+ " 'ARNOLDO JIMENEZ']"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.fbi.gov/wanted/topten'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code "
+ "url = 'https://www.fbi.gov/wanted/topten'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html5lib')\n",
+ "\n",
+ "wanted = soup_html.find_all('h3',{'class':'title'})\n",
+ "wanted_list = [w.text.replace('\\n','') for w in wanted]\n",
+ "wanted_list"
]
},
{
@@ -282,21 +4452,654 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 16,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Date & Time UTC \n",
+ " Latitude degrees \n",
+ " Longitude degrees \n",
+ " Region name [+] \n",
+ " \n",
+ " \n",
+ " \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910».1 \n",
+ " 12345678910» \n",
+ " 12345678910».1 \n",
+ " 12345678910» \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2019-07-16 20:52:18.608min ago \n",
+ " 36.07 \n",
+ " N \n",
+ " 117.84 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2019-07-16 20:49:09.111min ago \n",
+ " 36.07 \n",
+ " N \n",
+ " 117.65 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2019-07-16 20:33:52.927min ago \n",
+ " 40.09 \n",
+ " N \n",
+ " 19.91 \n",
+ " E \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2019-07-16 20:31:33.329min ago \n",
+ " 23.45 \n",
+ " S \n",
+ " 66.86 \n",
+ " W \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2019-07-16 20:29:07.731min ago \n",
+ " 35.86 \n",
+ " N \n",
+ " 117.69 \n",
+ " W \n",
+ " 2.2 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2019-07-16 20:23:34.737min ago \n",
+ " 36.07 \n",
+ " N \n",
+ " 117.84 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2019-07-16 20:19:00.142min ago \n",
+ " 33.10 \n",
+ " N \n",
+ " 12.42 \n",
+ " W \n",
+ " 2.8 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2019-07-16 20:17:51.643min ago \n",
+ " 35.55 \n",
+ " N \n",
+ " 117.43 \n",
+ " W \n",
+ " 2.8 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2019-07-16 20:15:36.845min ago \n",
+ " 35.78 \n",
+ " N \n",
+ " 117.62 \n",
+ " W \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2019-07-16 20:11:01.550min ago \n",
+ " 37.82 \n",
+ " N \n",
+ " 121.77 \n",
+ " W \n",
+ " 4.3 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 2019-07-16 19:42:25.91hr 18min ago \n",
+ " 35.61 \n",
+ " N \n",
+ " 117.47 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 2019-07-16 19:35:57.01hr 25min ago \n",
+ " 35.62 \n",
+ " N \n",
+ " 117.45 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 2019-07-16 19:23:50.11hr 37min ago \n",
+ " 36.19 \n",
+ " N \n",
+ " 117.89 \n",
+ " W \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 2019-07-16 19:20:21.41hr 40min ago \n",
+ " 38.39 \n",
+ " N \n",
+ " 16.94 \n",
+ " E \n",
+ " 3.1 \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 2019-07-16 19:16:53.81hr 44min ago \n",
+ " 38.45 \n",
+ " N \n",
+ " 16.91 \n",
+ " E \n",
+ " 2.6 \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 2019-07-16 19:16:15.91hr 44min ago \n",
+ " 61.27 \n",
+ " N \n",
+ " 152.44 \n",
+ " W \n",
+ " 2.4 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2019-07-16 19:11:48.91hr 49min ago \n",
+ " 36.03 \n",
+ " N \n",
+ " 117.87 \n",
+ " W \n",
+ " 2.5 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2019-07-16 19:04:00.21hr 57min ago \n",
+ " 35.96 \n",
+ " N \n",
+ " 117.71 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 18 \n",
+ " 2019-07-16 19:01:48.01hr 59min ago \n",
+ " 39.56 \n",
+ " N \n",
+ " 67.17 \n",
+ " E \n",
+ " 3.6 \n",
+ " \n",
+ " \n",
+ " 19 \n",
+ " 2019-07-16 19:01:00.82hr 00min ago \n",
+ " 35.68 \n",
+ " N \n",
+ " 117.54 \n",
+ " W \n",
+ " 2.5 \n",
+ " \n",
+ " \n",
+ " 20 \n",
+ " 2019-07-16 18:53:32.02hr 07min ago \n",
+ " 0.68 \n",
+ " S \n",
+ " 126.36 \n",
+ " E \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ " 21 \n",
+ " 2019-07-16 18:50:16.22hr 10min ago \n",
+ " 43.62 \n",
+ " N \n",
+ " 75.40 \n",
+ " E \n",
+ " 3.2 \n",
+ " \n",
+ " \n",
+ " 22 \n",
+ " 2019-07-16 18:47:48.92hr 13min ago \n",
+ " 35.59 \n",
+ " N \n",
+ " 117.42 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 23 \n",
+ " 2019-07-16 18:36:26.82hr 24min ago \n",
+ " 35.74 \n",
+ " N \n",
+ " 117.56 \n",
+ " W \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 24 \n",
+ " 2019-07-16 18:22:31.92hr 38min ago \n",
+ " 35.65 \n",
+ " N \n",
+ " 117.52 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 25 \n",
+ " 2019-07-16 18:15:26.52hr 45min ago \n",
+ " 28.47 \n",
+ " N \n",
+ " 56.76 \n",
+ " E \n",
+ " 4.3 \n",
+ " \n",
+ " \n",
+ " 26 \n",
+ " 2019-07-16 18:10:01.02hr 51min ago \n",
+ " 34.41 \n",
+ " S \n",
+ " 150.73 \n",
+ " E \n",
+ " 2.4 \n",
+ " \n",
+ " \n",
+ " 27 \n",
+ " 2019-07-16 17:48:24.03hr 12min ago \n",
+ " 9.93 \n",
+ " S \n",
+ " 118.23 \n",
+ " E \n",
+ " 4.1 \n",
+ " \n",
+ " \n",
+ " 28 \n",
+ " 2019-07-16 17:42:29.93hr 18min ago \n",
+ " 35.67 \n",
+ " N \n",
+ " 117.54 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 29 \n",
+ " 2019-07-16 17:39:43.03hr 21min ago \n",
+ " 0.54 \n",
+ " S \n",
+ " 127.86 \n",
+ " E \n",
+ " 4.1 \n",
+ " \n",
+ " \n",
+ " 30 \n",
+ " 2019-07-16 17:31:56.03hr 29min ago \n",
+ " 35.67 \n",
+ " N \n",
+ " 117.47 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 31 \n",
+ " 2019-07-16 17:05:45.03hr 55min ago \n",
+ " 36.20 \n",
+ " N \n",
+ " 117.90 \n",
+ " W \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 32 \n",
+ " 2019-07-16 17:05:08.03hr 55min ago \n",
+ " 15.40 \n",
+ " N \n",
+ " 94.64 \n",
+ " W \n",
+ " 4.2 \n",
+ " \n",
+ " \n",
+ " 33 \n",
+ " 2019-07-16 17:01:30.83hr 59min ago \n",
+ " 36.10 \n",
+ " N \n",
+ " 117.90 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 34 \n",
+ " 2019-07-16 16:45:56.54hr 15min ago \n",
+ " 0.68 \n",
+ " S \n",
+ " 127.58 \n",
+ " E \n",
+ " 4.7 \n",
+ " \n",
+ " \n",
+ " 35 \n",
+ " 2019-07-16 16:45:55.04hr 15min ago \n",
+ " 18.99 \n",
+ " N \n",
+ " 70.09 \n",
+ " W \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 36 \n",
+ " 2019-07-16 16:43:40.94hr 17min ago \n",
+ " 37.23 \n",
+ " N \n",
+ " 28.27 \n",
+ " E \n",
+ " 2.7 \n",
+ " \n",
+ " \n",
+ " 37 \n",
+ " 2019-07-16 16:43:21.74hr 17min ago \n",
+ " 36.03 \n",
+ " N \n",
+ " 117.79 \n",
+ " W \n",
+ " 2.4 \n",
+ " \n",
+ " \n",
+ " 38 \n",
+ " 2019-07-16 16:36:41.54hr 24min ago \n",
+ " 35.96 \n",
+ " N \n",
+ " 117.30 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 39 \n",
+ " 2019-07-16 16:28:38.14hr 32min ago \n",
+ " 35.92 \n",
+ " N \n",
+ " 117.68 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 40 \n",
+ " 2019-07-16 16:27:59.04hr 33min ago \n",
+ " 18.51 \n",
+ " S \n",
+ " 120.55 \n",
+ " E \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 41 \n",
+ " 2019-07-16 16:26:00.54hr 35min ago \n",
+ " 30.57 \n",
+ " N \n",
+ " 141.98 \n",
+ " E \n",
+ " 4.8 \n",
+ " \n",
+ " \n",
+ " 42 \n",
+ " 2019-07-16 16:21:05.14hr 39min ago \n",
+ " 28.45 \n",
+ " N \n",
+ " 56.70 \n",
+ " E \n",
+ " 4.5 \n",
+ " \n",
+ " \n",
+ " 43 \n",
+ " 2019-07-16 16:01:04.34hr 59min ago \n",
+ " 62.22 \n",
+ " N \n",
+ " 150.00 \n",
+ " W \n",
+ " 2.3 \n",
+ " \n",
+ " \n",
+ " 44 \n",
+ " 2019-07-16 15:59:52.05hr 01min ago \n",
+ " 35.81 \n",
+ " N \n",
+ " 117.63 \n",
+ " W \n",
+ " 2.9 \n",
+ " \n",
+ " \n",
+ " 45 \n",
+ " 2019-07-16 15:57:04.45hr 03min ago \n",
+ " 36.10 \n",
+ " N \n",
+ " 117.82 \n",
+ " W \n",
+ " 3.0 \n",
+ " \n",
+ " \n",
+ " 46 \n",
+ " 2019-07-16 15:28:58.05hr 32min ago \n",
+ " 18.26 \n",
+ " S \n",
+ " 120.37 \n",
+ " E \n",
+ " 3.3 \n",
+ " \n",
+ " \n",
+ " 47 \n",
+ " 2019-07-16 15:26:26.65hr 34min ago \n",
+ " 35.68 \n",
+ " N \n",
+ " 117.51 \n",
+ " W \n",
+ " 2.0 \n",
+ " \n",
+ " \n",
+ " 48 \n",
+ " 2019-07-16 15:25:42.65hr 35min ago \n",
+ " 37.16 \n",
+ " N \n",
+ " 141.54 \n",
+ " E \n",
+ " 4.6 \n",
+ " \n",
+ " \n",
+ " 49 \n",
+ " 2019-07-16 15:13:22.15hr 47min ago \n",
+ " 35.90 \n",
+ " N \n",
+ " 117.67 \n",
+ " W \n",
+ " 2.1 \n",
+ " \n",
+ " \n",
+ " 50 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " 51 \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " 12345678910» \n",
+ " \n",
+ " \n",
+ " 52 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date & Time UTC Latitude degrees \\\n",
+ " 12345678910» 12345678910» 12345678910».1 \n",
+ "0 2019-07-16 20:52:18.608min ago 36.07 N \n",
+ "1 2019-07-16 20:49:09.111min ago 36.07 N \n",
+ "2 2019-07-16 20:33:52.927min ago 40.09 N \n",
+ "3 2019-07-16 20:31:33.329min ago 23.45 S \n",
+ "4 2019-07-16 20:29:07.731min ago 35.86 N \n",
+ "5 2019-07-16 20:23:34.737min ago 36.07 N \n",
+ "6 2019-07-16 20:19:00.142min ago 33.10 N \n",
+ "7 2019-07-16 20:17:51.643min ago 35.55 N \n",
+ "8 2019-07-16 20:15:36.845min ago 35.78 N \n",
+ "9 2019-07-16 20:11:01.550min ago 37.82 N \n",
+ "10 2019-07-16 19:42:25.91hr 18min ago 35.61 N \n",
+ "11 2019-07-16 19:35:57.01hr 25min ago 35.62 N \n",
+ "12 2019-07-16 19:23:50.11hr 37min ago 36.19 N \n",
+ "13 2019-07-16 19:20:21.41hr 40min ago 38.39 N \n",
+ "14 2019-07-16 19:16:53.81hr 44min ago 38.45 N \n",
+ "15 2019-07-16 19:16:15.91hr 44min ago 61.27 N \n",
+ "16 2019-07-16 19:11:48.91hr 49min ago 36.03 N \n",
+ "17 2019-07-16 19:04:00.21hr 57min ago 35.96 N \n",
+ "18 2019-07-16 19:01:48.01hr 59min ago 39.56 N \n",
+ "19 2019-07-16 19:01:00.82hr 00min ago 35.68 N \n",
+ "20 2019-07-16 18:53:32.02hr 07min ago 0.68 S \n",
+ "21 2019-07-16 18:50:16.22hr 10min ago 43.62 N \n",
+ "22 2019-07-16 18:47:48.92hr 13min ago 35.59 N \n",
+ "23 2019-07-16 18:36:26.82hr 24min ago 35.74 N \n",
+ "24 2019-07-16 18:22:31.92hr 38min ago 35.65 N \n",
+ "25 2019-07-16 18:15:26.52hr 45min ago 28.47 N \n",
+ "26 2019-07-16 18:10:01.02hr 51min ago 34.41 S \n",
+ "27 2019-07-16 17:48:24.03hr 12min ago 9.93 S \n",
+ "28 2019-07-16 17:42:29.93hr 18min ago 35.67 N \n",
+ "29 2019-07-16 17:39:43.03hr 21min ago 0.54 S \n",
+ "30 2019-07-16 17:31:56.03hr 29min ago 35.67 N \n",
+ "31 2019-07-16 17:05:45.03hr 55min ago 36.20 N \n",
+ "32 2019-07-16 17:05:08.03hr 55min ago 15.40 N \n",
+ "33 2019-07-16 17:01:30.83hr 59min ago 36.10 N \n",
+ "34 2019-07-16 16:45:56.54hr 15min ago 0.68 S \n",
+ "35 2019-07-16 16:45:55.04hr 15min ago 18.99 N \n",
+ "36 2019-07-16 16:43:40.94hr 17min ago 37.23 N \n",
+ "37 2019-07-16 16:43:21.74hr 17min ago 36.03 N \n",
+ "38 2019-07-16 16:36:41.54hr 24min ago 35.96 N \n",
+ "39 2019-07-16 16:28:38.14hr 32min ago 35.92 N \n",
+ "40 2019-07-16 16:27:59.04hr 33min ago 18.51 S \n",
+ "41 2019-07-16 16:26:00.54hr 35min ago 30.57 N \n",
+ "42 2019-07-16 16:21:05.14hr 39min ago 28.45 N \n",
+ "43 2019-07-16 16:01:04.34hr 59min ago 62.22 N \n",
+ "44 2019-07-16 15:59:52.05hr 01min ago 35.81 N \n",
+ "45 2019-07-16 15:57:04.45hr 03min ago 36.10 N \n",
+ "46 2019-07-16 15:28:58.05hr 32min ago 18.26 S \n",
+ "47 2019-07-16 15:26:26.65hr 34min ago 35.68 N \n",
+ "48 2019-07-16 15:25:42.65hr 35min ago 37.16 N \n",
+ "49 2019-07-16 15:13:22.15hr 47min ago 35.90 N \n",
+ "50 NaN NaN NaN \n",
+ "51 12345678910» 12345678910» 12345678910» \n",
+ "52 NaN NaN NaN \n",
+ "\n",
+ " Longitude degrees Region name [+] \n",
+ " 12345678910» 12345678910».1 12345678910» \n",
+ "0 117.84 W 2.0 \n",
+ "1 117.65 W 2.0 \n",
+ "2 19.91 E 2.7 \n",
+ "3 66.86 W 4.5 \n",
+ "4 117.69 W 2.2 \n",
+ "5 117.84 W 2.0 \n",
+ "6 12.42 W 2.8 \n",
+ "7 117.43 W 2.8 \n",
+ "8 117.62 W 4.5 \n",
+ "9 121.77 W 4.3 \n",
+ "10 117.47 W 2.3 \n",
+ "11 117.45 W 2.1 \n",
+ "12 117.89 W 2.7 \n",
+ "13 16.94 E 3.1 \n",
+ "14 16.91 E 2.6 \n",
+ "15 152.44 W 2.4 \n",
+ "16 117.87 W 2.5 \n",
+ "17 117.71 W 2.0 \n",
+ "18 67.17 E 3.6 \n",
+ "19 117.54 W 2.5 \n",
+ "20 126.36 E 4.0 \n",
+ "21 75.40 E 3.2 \n",
+ "22 117.42 W 2.1 \n",
+ "23 117.56 W 2.7 \n",
+ "24 117.52 W 2.0 \n",
+ "25 56.76 E 4.3 \n",
+ "26 150.73 E 2.4 \n",
+ "27 118.23 E 4.1 \n",
+ "28 117.54 W 2.0 \n",
+ "29 127.86 E 4.1 \n",
+ "30 117.47 W 2.3 \n",
+ "31 117.90 W 2.9 \n",
+ "32 94.64 W 4.2 \n",
+ "33 117.90 W 2.1 \n",
+ "34 127.58 E 4.7 \n",
+ "35 70.09 W 2.9 \n",
+ "36 28.27 E 2.7 \n",
+ "37 117.79 W 2.4 \n",
+ "38 117.30 W 2.1 \n",
+ "39 117.68 W 2.3 \n",
+ "40 120.55 E 2.9 \n",
+ "41 141.98 E 4.8 \n",
+ "42 56.70 E 4.5 \n",
+ "43 150.00 W 2.3 \n",
+ "44 117.63 W 2.9 \n",
+ "45 117.82 W 3.0 \n",
+ "46 120.37 E 3.3 \n",
+ "47 117.51 W 2.0 \n",
+ "48 141.54 E 4.6 \n",
+ "49 117.67 W 2.1 \n",
+ "50 NaN NaN NaN \n",
+ "51 12345678910» 12345678910» 12345678910» \n",
+ "52 NaN NaN NaN "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.emsc-csem.org/Earthquake/'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "url = 'https://www.emsc-csem.org/Earthquake/'\n",
+ "html = requests.get(url).content\n",
+ "df_list = pd.read_html(html)\n",
+ "df_list[3][['Date & Time UTC','Latitude degrees','Longitude degrees','Region name [+]']]"
]
},
{
@@ -308,21 +5111,88 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Code Geist Hackathon by SefrWahed \n",
+ " [7/29/2019] \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " The Code Factor \n",
+ " [5/21/2019] \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " TECHFEST MUNICH \n",
+ " [9/6/2019] \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Galileo App Competition \n",
+ " [1/31/2019] \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1\n",
+ "0 Code Geist Hackathon by SefrWahed [7/29/2019]\n",
+ "1 The Code Factor [5/21/2019]\n",
+ "2 TECHFEST MUNICH [9/6/2019]\n",
+ "3 Galileo App Competition [1/31/2019]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://hackevents.co/hackathons'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "import re\n",
+ "\n",
+ "url ='https://hackevents.co/hackathons'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html5lib')\n",
+ "\n",
+ "hacks_titles = soup_html.find_all('h5',{'class':'card-title'})\n",
+ "hacks_fechas = soup_html.find_all('p',{'class':'card-text'})\n",
+ "\n",
+ "hacks_list = [[j.text,re.findall(r'\\d+/\\d+/\\d+',hacks_fechas[i].text)] for i,j in enumerate(hacks_titles)]\n",
+ "#hacks_list\n",
+ "\n",
+ "df = pd.DataFrame(hacks_list)\n",
+ "df"
]
},
{
@@ -342,22 +5212,47 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Ingresa tu usuario de twitter: paolalean\n"
+ ]
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
+ "usuario = input('Ingresa tu usuario de twitter: ')\n",
+ "url = f'https://twitter.com/{usuario}'\n",
+ "#para prueba de cuenta sin tweets usar la de victor: vik54076455"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "307\n"
+ ]
+ }
+ ],
"source": [
- "#your code"
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "tweets = soup_html.find_all('span',{'class':'ProfileNav-value'})\n",
+ "\n",
+ "try:\n",
+ " print(int(tweets[0].text))\n",
+ "except:\n",
+ " print(\"no tienes tweets\")\n"
]
},
{
@@ -377,22 +5272,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Ingresa tu usuario de twitter: paolalean\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Tienes la increíble cantidad de 27 followers\n"
+ ]
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code"
+ "usuario = input('Ingresa tu usuario de twitter: ')\n",
+ "url = f'https://twitter.com/{usuario}'\n",
+ "\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "tweets = soup_html.find_all('span',{'class':'ProfileNav-value'})\n",
+ "\n",
+ "try:\n",
+ " print(f\"Tienes la increíble cantidad de {int(tweets[1].text)} followers\")\n",
+ "except:\n",
+ " print(\"Sin followers\")"
]
},
{
@@ -404,12 +5315,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[['The Free Encyclopedia', '5892000'],\n",
+ " ['English', '1159000'],\n",
+ " ['日本語', '1532000'],\n",
+ " ['Español', '2323000'],\n",
+ " ['Deutsch', '1556000'],\n",
+ " ['Русский', '2123000'],\n",
+ " ['Français', '1541000'],\n",
+ " ['Italiano', '1065000'],\n",
+ " ['中文', '1010000'],\n",
+ " ['Português', '1346000'],\n",
+ " ['Polski', '1000000'],\n",
+ " ['Wikipedia apps are now available:', '100000']]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.wikipedia.org/'"
+ "url = 'https://www.wikipedia.org/'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "language = soup_html.find_all('strong')\n",
+ "number = soup_html.find_all('bdi')\n",
+ "\n",
+ "lista = [[j.text,''.join(re.findall('\\d+', number[i].text))] for i,j in enumerate(language)]\n",
+ "lista"
]
},
{
@@ -430,21 +5371,40 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 52,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Business and economy',\n",
+ " 'Crime and justice',\n",
+ " 'Defence',\n",
+ " 'Education',\n",
+ " 'Environment',\n",
+ " 'Government',\n",
+ " 'Government spending',\n",
+ " 'Health',\n",
+ " 'Mapping',\n",
+ " 'Society',\n",
+ " 'Towns and cities',\n",
+ " 'Transport']"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://data.gov.uk/'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code "
+ "url = 'https://data.gov.uk/'\n",
+ "html = requests.get(url).content\n",
+ "soup_html = BeautifulSoup(html,'html')\n",
+ "\n",
+ "datasets = soup_html.find_all('h2')\n",
+ "datas = [d.text for d in datasets]\n",
+ "datas"
]
},
{
@@ -456,12 +5416,135 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 57,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Rank \n",
+ " Language \n",
+ " Speakers(millions) \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " — \n",
+ " Chinese (macrolanguage) \n",
+ " 1311.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " Mandarin \n",
+ " 918.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2 \n",
+ " Spanish \n",
+ " 460.0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 3 \n",
+ " English \n",
+ " 379.0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 4 \n",
+ " Hindi \n",
+ " 341.0 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " — \n",
+ " Arabic (macrolanguage) \n",
+ " 319.0 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 5 \n",
+ " Bengali \n",
+ " 228.0 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 6 \n",
+ " Portuguese \n",
+ " 221.0 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 7 \n",
+ " Russian \n",
+ " 154.0 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 8 \n",
+ " Japanese \n",
+ " 128.0 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " — \n",
+ " Lahnda (macrolanguage) \n",
+ " 119.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Rank Language Speakers(millions)\n",
+ "0 — Chinese (macrolanguage) 1311.0\n",
+ "1 1 Mandarin 918.0\n",
+ "2 2 Spanish 460.0\n",
+ "3 3 English 379.0\n",
+ "4 4 Hindi 341.0\n",
+ "5 — Arabic (macrolanguage) 319.0\n",
+ "6 5 Bengali 228.0\n",
+ "7 6 Portuguese 221.0\n",
+ "8 7 Russian 154.0\n",
+ "9 8 Japanese 128.0\n",
+ "10 — Lahnda (macrolanguage) 119.0"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'"
+ "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'\n",
+ "html = requests.get(url).content\n",
+ "\n",
+ "df_read_tables = pd.read_html(html)\n",
+ "df_read_tables[0][['Rank','Language','Speakers(millions)']].head(11)\n"
]
},
{
@@ -608,5 +5691,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}