diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
index 812f7a4..4138883 100644
--- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -53,7 +53,7 @@
"# import urllib.request\n",
"# from urllib.request import urlopen\n",
"# import random\n",
- "# import re\n",
+ "import re\n",
"# import scrapy"
]
},
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -76,11 +76,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "#your code\n",
+ "html = requests.get(url).content\n",
+ "TrenDev = BeautifulSoup(html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "#TrenDev #lo comentamos porque salian demasiados datos"
]
},
{
@@ -134,11 +147,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "user = TrenDev.findAll('h1',{'class': 'h3'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "userlist = str(user)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Frank Denis', 'Vladimir Mihailenco', 'Henrique Dias', 'Kyle Roach', 'Erik Rasmussen', 'Franck Nijhof', 'Robert Wagner', 'François Beaufort', 'Pascal Birchler', 'Francois Zaninotto', 'Olle Jonsson', 'Samuel Reed', 'Robert Mosolgo', 'William Durand', 'Felix Rieseberg', 'Felix Angelov', 'Artur Arseniev', 'Michael Skelton', 'Jack Lloyd', 'Federico Brigante', 'Raphaël Benitte', 'Richard Littauer', 'Steven Macenski']\n"
+ ]
+ }
+ ],
+ "source": [
+ "user_names = re.findall(r'[A-Z]\\w*\\s\\w*\\b', userlist)\n",
+ "print(user_names)"
]
},
{
@@ -152,21 +192,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://github.com/trending/python?since=daily'"
+ "url2 = 'https://github.com/trending/python?since=daily'\n",
+ "html2 = requests.get(url2).content\n",
+ "TrenRepo = BeautifulSoup(html2)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "repo = TrenRepo.findAll('h1',{'class': 'h3'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'[
, , , , , , , , , , , , , , , , , , , , , , , , ]'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "repostr = str(repo)\n",
+ "repostr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Osmedeus', 'Shielded_detector', 'ludwig', 'AB3DMOT', 'stylegan', 'dagster', 'models', 'sherlock', 'faceswap', 'bcc', 'DeepLearningExamples', 'tfpyth', 'PyTorch_BlazeFace', 'PySyft', 'code_snippets', 'airflow', 'PhoneInfoga']\n"
+ ]
+ }
+ ],
+ "source": [
+ "reponame = re.findall(r'\\/(\\w*)\"', repostr)\n",
+ "\n",
+ "print(reponame)"
]
},
{
@@ -178,21 +260,56 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/Walt_Disney'"
+ "url3 = 'https://en.wikipedia.org/wiki/Walt_Disney'\n",
+ "html3 = requests.get(url3).content\n",
+ "image = BeautifulSoup(html3)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 15,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['http://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg']"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "links = image.find_all('div', {'class' : 'thumbinner'})\n",
+ "linkimage = []\n",
+ "dire = 'http:'\n",
+ "\n",
+ "for i in links:\n",
+ " linkimage.append(dire + i.find('img')['src'])\n",
+ "linkimage "
]
},
{
@@ -204,21 +321,97 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://en.wikipedia.org/wiki/Python' "
+ "url4 ='https://en.wikipedia.org/wiki/Python'\n",
+ "html4 = requests.get(url4).content\n",
+ "link = BeautifulSoup(html4)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['https://en.wiktionary.org/wiki/Python',\n",
+ " 'https://en.wiktionary.org/wiki/python',\n",
+ " 'https://en.wikipedia.org/w/index.php?title=Python&oldid=905477736',\n",
+ " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452',\n",
+ " 'https://commons.wikimedia.org/wiki/Category:Python',\n",
+ " 'https://af.wikipedia.org/wiki/Python',\n",
+ " 'https://als.wikipedia.org/wiki/Python',\n",
+ " 'https://az.wikipedia.org/wiki/Python',\n",
+ " 'https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)',\n",
+ " 'https://be.wikipedia.org/wiki/Python',\n",
+ " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)',\n",
+ " 'https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)',\n",
+ " 'https://da.wikipedia.org/wiki/Python',\n",
+ " 'https://de.wikipedia.org/wiki/Python',\n",
+ " 'https://eo.wikipedia.org/wiki/Pitono_(apartigilo)',\n",
+ " 'https://eu.wikipedia.org/wiki/Python_(argipena)',\n",
+ " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86',\n",
+ " 'https://fr.wikipedia.org/wiki/Python',\n",
+ " 'https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0',\n",
+ " 'https://hr.wikipedia.org/wiki/Python_(razdvojba)',\n",
+ " 'https://io.wikipedia.org/wiki/Pitono',\n",
+ " 'https://id.wikipedia.org/wiki/Python',\n",
+ " 'https://ia.wikipedia.org/wiki/Python_(disambiguation)',\n",
+ " 'https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)',\n",
+ " 'https://it.wikipedia.org/wiki/Python_(disambigua)',\n",
+ " 'https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F',\n",
+ " 'https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)',\n",
+ " 'https://kg.wikipedia.org/wiki/Mboma_(nyoka)',\n",
+ " 'https://la.wikipedia.org/wiki/Python_(discretiva)',\n",
+ " 'https://lb.wikipedia.org/wiki/Python',\n",
+ " 'https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)',\n",
+ " 'https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)',\n",
+ " 'https://nl.wikipedia.org/wiki/Python',\n",
+ " 'https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3',\n",
+ " 'https://no.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pl.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)',\n",
+ " 'https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)',\n",
+ " 'https://sd.wikipedia.org/wiki/%D8%A7%D8%B1%DA%99',\n",
+ " 'https://sk.wikipedia.org/wiki/Python',\n",
+ " 'https://sh.wikipedia.org/wiki/Python',\n",
+ " 'https://fi.wikipedia.org/wiki/Python',\n",
+ " 'https://sv.wikipedia.org/wiki/Pyton',\n",
+ " 'https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99',\n",
+ " 'https://tr.wikipedia.org/wiki/Python',\n",
+ " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n",
+ " 'https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86',\n",
+ " 'https://vi.wikipedia.org/wiki/Python',\n",
+ " 'https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia',\n",
+ " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " 'https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute',\n",
+ " 'https://foundation.wikimedia.org/wiki/Cookie_statement',\n",
+ " 'https://wikimediafoundation.org/',\n",
+ " 'https://www.mediawiki.org/']"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "links2 = link.find_all('a', {'href':True})\n",
+ "linkl2 = []\n",
+ "dire = 'http'\n",
+ "\n",
+ "for i in links2:\n",
+ " if 'http' in i['href']:\n",
+ " linkl2.append(i['href'])\n",
+ "\n",
+ "linkl2"
]
},
{
@@ -230,21 +423,56 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'http://uscode.house.gov/download/download.shtml'"
+ "url5 = 'http://uscode.house.gov/download/download.shtml'\n",
+ "html5 = requests.get(url5).content\n",
+ "link5 = BeautifulSoup(html5)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 86,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Title 2 - The Congress',\n",
+ " ' Title 6 - Domestic Security',\n",
+ " ' Title 7 - Agriculture',\n",
+ " ' Title 15 - Commerce and Trade',\n",
+ " ' Title 16 - Conservation',\n",
+ " ' Title 19 - Customs Duties',\n",
+ " ' Title 21 - Food and Drugs',\n",
+ " ' Title 26 - Internal Revenue Code',\n",
+ " ' Title 34 - Crime Control and Law Enforcement',\n",
+ " \" Title 38 - Veterans' Benefits\",\n",
+ " ' Title 42 - The Public Health and Welfare',\n",
+ " ' Title 43 - Public Lands',\n",
+ " ' Title 48 - Territories and Insular Possessions',\n",
+ " ' Title 49 - Transportation',\n",
+ " ' Title 50 - War and National Defense']"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "items = link5.find_all('div', {'class': 'usctitlechanged'})\n",
+ "items = str(items)\n",
+ "\n",
+ "titles = re.sub(r'<(.*?)>','', items)\n",
+ "\n",
+ "titles = titles.replace('\\n', '').replace(' ', '').replace(' ','')\n",
+ "titles_final = titles.split(',')\n",
+ "titles_final = [i.replace('[','').replace(']','') for i in titles_final]\n",
+ "titles_final"
]
},
{
@@ -256,21 +484,64 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.fbi.gov/wanted/topten'"
+ "url6 = 'https://www.fbi.gov/wanted/topten'\n",
+ "html6 = requests.get(url6).content\n",
+ "link6 = BeautifulSoup(html6)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
- "#your code "
+ "wanted = link6.findAll('h3',{'class': 'title'})\n",
+ "wanted = str(wanted)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "namesw = re.sub(r'<(.*?)>','', wanted)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Alejandro Rosales Castillo',\n",
+ " ' Yaser Abdel Said',\n",
+ " ' Jason Derek Brown',\n",
+ " ' Rafael Caro-Quintero',\n",
+ " ' Alexis Flores',\n",
+ " ' Eugene Palmer',\n",
+ " ' Santiago Villalba Mederos',\n",
+ " ' Robert William Fisher',\n",
+ " ' Bhadreshkumar Chetanbhai Patel',\n",
+ " ' Arnoldo Jimenez']"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "namesw = namesw.replace('\\n','').replace('[','').replace(']','').title()\n",
+ "namesw2 = namesw.split(',')\n",
+ "namesw2"
]
},
{
@@ -282,21 +553,629 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.emsc-csem.org/Earthquake/'"
+ "url7 = 'https://www.emsc-csem.org/Earthquake/'\n",
+ "html7 = requests.get(url7).content\n",
+ "link7 = BeautifulSoup(html7)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "#date = link7.findAll('td', {'class':'tabev6'})\n",
+ "#laton = link7.findAll('td', {'class':'tabev1'})\n",
+ "#reg = link7.findAll('td',{'class':'tb_region'})\n",
+ "\n",
+ "date = link7.select('.tabev6')\n",
+ "laton = link7.select('.tabev1')\n",
+ "reg = link7.select('.tb_region')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "date_l = [i.select('a') for i in date]\n",
+ "date2 = str(date_l)\n",
+ "\n",
+ "date3 = re.findall(r'\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}.\\d', date2)\n",
+ "date4 = str(date3)\n",
+ "date4 = date4.replace('\\xa0','').replace('\\\\xa0',' ')\n",
+ "date_final = date4.split(',')\n",
+ "date_final = [i.replace('[\\'','').replace(' \\'','').replace('\\']','').replace('\\'','') for i in date_final]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "laton2 = str(laton)\n",
+ "laton3 = re.sub(r'<(.*?)>','', laton2)\n",
+ "laton4 = laton3.replace('\\xa0','')\n",
+ "laton_final = laton4.split(',')\n",
+ "laton_final = [i.replace(' ','').replace('[','').replace(']','') for i in laton_final]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "latitude = [i for i in laton_final if laton_final.index(i) % 2 == 0]\n",
+ "longitude = [i for i in laton_final if laton_final.index(i) % 2 != 0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "reg1 = str(reg)\n",
+ "reg2 = re.sub(r'<(.*?)>','', reg1)\n",
+ "reg3 = reg2.replace('\\xa0','')\n",
+ "reg_final = reg3.split(',')\n",
+ "reg_final = [i.replace('[','').replace(']','') for i in reg_final]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "date_df = pd.DataFrame(date_final)\n",
+ "latitude_df = pd.DataFrame(latitude)\n",
+ "longitude_df = pd.DataFrame(longitude)\n",
+ "reg_df = pd.DataFrame(reg_final)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "columns = ['Date and time','Latitude and longitude', 'Region']\n",
+ "\n",
+ "earthquake_df = pd.concat([date_df, latitude_df, longitude_df, reg_df], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date and time | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " Region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2019-07-17 12:11:09.0 | \n",
+ " 35.67 | \n",
+ " 117.42 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2019-07-17 12:10:41.0 | \n",
+ " 46.12 | \n",
+ " 2.11 | \n",
+ " FRANCE | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2019-07-17 11:44:28.1 | \n",
+ " 36.12 | \n",
+ " 117.84 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2019-07-17 11:32:47.6 | \n",
+ " 36.11 | \n",
+ " 117.88 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2019-07-17 11:29:31.0 | \n",
+ " 36.11 | \n",
+ " 117.89 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 2019-07-17 11:18:39.2 | \n",
+ " 19.40 | \n",
+ " 155.28 | \n",
+ " ISLAND OF HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 2019-07-17 11:12:28.5 | \n",
+ " 35.93 | \n",
+ " 117.68 | \n",
+ " HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 2019-07-17 10:48:30.2 | \n",
+ " 36.14 | \n",
+ " 117.95 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 2019-07-17 10:44:10.0 | \n",
+ " 18.19 | \n",
+ " 120.25 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 2019-07-17 10:41:30.3 | \n",
+ " 66.32 | \n",
+ " 157.08 | \n",
+ " WESTERN AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2019-07-17 10:20:09.9 | \n",
+ " 18.44 | \n",
+ " 120.24 | \n",
+ " NORTHERN ALASKA | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2019-07-17 10:19:17.2 | \n",
+ " 40.42 | \n",
+ " 41.75 | \n",
+ " WESTERN AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 2019-07-17 10:09:19.5 | \n",
+ " 15.23 | \n",
+ " 173.40 | \n",
+ " EASTERN TURKEY | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2019-07-17 10:00:55.9 | \n",
+ " 35.87 | \n",
+ " 117.74 | \n",
+ " TONGA | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 2019-07-17 09:57:38.0 | \n",
+ " 39.72 | \n",
+ " 41.37 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 2019-07-17 09:53:41.9 | \n",
+ " 19.14 | \n",
+ " 155.53 | \n",
+ " EASTERN TURKEY | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 2019-07-17 09:46:06.6 | \n",
+ " 35.65 | \n",
+ " 117.45 | \n",
+ " ISLAND OF HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2019-07-17 09:44:17.9 | \n",
+ " 3.30 | \n",
+ " 83.06 | \n",
+ " HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2019-07-17 09:43:40.2 | \n",
+ " 18.00 | \n",
+ " 66.86 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 2019-07-17 09:41:09.0 | \n",
+ " 39.63 | \n",
+ " 38.60 | \n",
+ " OFF COAST OF CENTRAL AMERICA | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 2019-07-17 09:37:56.8 | \n",
+ " 19.16 | \n",
+ " 155.44 | \n",
+ " PUERTO RICO | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 2019-07-17 09:27:04.2 | \n",
+ " 35.59 | \n",
+ " 117.36 | \n",
+ " EASTERN TURKEY | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2019-07-17 09:21:21.0 | \n",
+ " 18.65 | \n",
+ " 69.48 | \n",
+ " ISLAND OF HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 2019-07-17 09:16:53.6 | \n",
+ " 36.20 | \n",
+ " 117.89 | \n",
+ " HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 2019-07-17 09:13:30.2 | \n",
+ " 36.61 | \n",
+ " 112.39 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 2019-07-17 09:00:20.5 | \n",
+ " 39.01 | \n",
+ " 141.68 | \n",
+ " DOMINICAN REPUBLIC | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 2019-07-17 08:54:30.0 | \n",
+ " 9.55 | \n",
+ " 84.17 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 2019-07-17 08:16:20.3 | \n",
+ " 37.17 | \n",
+ " 55.76 | \n",
+ " ARIZONA | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 2019-07-17 08:15:20.0 | \n",
+ " 15.53 | \n",
+ " 95.08 | \n",
+ " EASTERN HONSHU | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 2019-07-17 08:09:01.1 | \n",
+ " 39.88 | \n",
+ " 98.33 | \n",
+ " JAPAN | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 2019-07-17 08:02:48.0 | \n",
+ " 25.95 | \n",
+ " 112.90 | \n",
+ " COSTA RICA | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 2019-07-17 07:41:21.4 | \n",
+ " 36.52 | \n",
+ " 121.11 | \n",
+ " NORTHERN IRAN | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 2019-07-17 07:34:44.6 | \n",
+ " 38.82 | \n",
+ " 20.58 | \n",
+ " OFFSHORE OAXACA | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 2019-07-17 07:22:09.1 | \n",
+ " 35.67 | \n",
+ " 117.52 | \n",
+ " MEXICO | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2019-07-17 07:16:44.0 | \n",
+ " 16.94 | \n",
+ " 119.68 | \n",
+ " KANSAS | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " 2019-07-17 07:08:57.6 | \n",
+ " 43.37 | \n",
+ " 127.10 | \n",
+ " WESTERN AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " 2019-07-17 07:02:38.9 | \n",
+ " 36.07 | \n",
+ " 117.84 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " 2019-07-17 06:48:54.0 | \n",
+ " 35.65 | \n",
+ " 117.51 | \n",
+ " GREECE | \n",
+ "
\n",
+ " \n",
+ " | 38 | \n",
+ " 2019-07-17 06:44:26.6 | \n",
+ " 35.86 | \n",
+ " 117.68 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " 2019-07-17 06:37:55.1 | \n",
+ " 19.49 | \n",
+ " 65.40 | \n",
+ " NORTHWEST OF AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " 2019-07-17 06:29:38.9 | \n",
+ " 34.76 | \n",
+ " 24.58 | \n",
+ " OFF COAST OF OREGON | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " 2019-07-17 06:28:33.7 | \n",
+ " 37.40 | \n",
+ " 26.97 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " 2019-07-17 06:25:50.1 | \n",
+ " 15.81 | \n",
+ " 94.86 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 2019-07-17 06:24:40.3 | \n",
+ " 33.08 | \n",
+ " 115.78 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " 2019-07-17 06:10:46.4 | \n",
+ " 17.08 | \n",
+ " 176.92 | \n",
+ " PUERTO RICO REGION | \n",
+ "
\n",
+ " \n",
+ " | 45 | \n",
+ " 2019-07-17 06:08:17.5 | \n",
+ " 35.82 | \n",
+ " 117.64 | \n",
+ " CRETE | \n",
+ "
\n",
+ " \n",
+ " | 46 | \n",
+ " 2019-07-17 06:04:20.7 | \n",
+ " 35.54 | \n",
+ " 117.44 | \n",
+ " GREECE | \n",
+ "
\n",
+ " \n",
+ " | 47 | \n",
+ " 2019-07-17 05:58:08.7 | \n",
+ " 3.76 | \n",
+ " 151.35 | \n",
+ " DODECANESE ISLANDS | \n",
+ "
\n",
+ " \n",
+ " | 48 | \n",
+ " 2019-07-17 05:49:52.3 | \n",
+ " 35.86 | \n",
+ " 117.69 | \n",
+ " GREECE | \n",
+ "
\n",
+ " \n",
+ " | 49 | \n",
+ " 2019-07-17 05:24:23.7 | \n",
+ " 36.32 | \n",
+ " 89.50 | \n",
+ " OFFSHORE OAXACA | \n",
+ "
\n",
+ " \n",
+ " | 50 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " MEXICO | \n",
+ "
\n",
+ " \n",
+ " | 51 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 52 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FIJI REGION | \n",
+ "
\n",
+ " \n",
+ " | 53 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 54 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 55 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NEW IRELAND REGION | \n",
+ "
\n",
+ " \n",
+ " | 56 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " P.N.G. | \n",
+ "
\n",
+ " \n",
+ " | 57 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 58 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " TENNESSEE | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date and time Latitude Longitude Region\n",
+ "0 2019-07-17 12:11:09.0 35.67 117.42 SOUTHERN CALIFORNIA\n",
+ "1 2019-07-17 12:10:41.0 46.12 2.11 FRANCE\n",
+ "2 2019-07-17 11:44:28.1 36.12 117.84 CENTRAL CALIFORNIA\n",
+ "3 2019-07-17 11:32:47.6 36.11 117.88 CENTRAL CALIFORNIA\n",
+ "4 2019-07-17 11:29:31.0 36.11 117.89 CENTRAL CALIFORNIA\n",
+ "5 2019-07-17 11:18:39.2 19.40 155.28 ISLAND OF HAWAII\n",
+ "6 2019-07-17 11:12:28.5 35.93 117.68 HAWAII\n",
+ "7 2019-07-17 10:48:30.2 36.14 117.95 CENTRAL CALIFORNIA\n",
+ "8 2019-07-17 10:44:10.0 18.19 120.25 CENTRAL CALIFORNIA\n",
+ "9 2019-07-17 10:41:30.3 66.32 157.08 WESTERN AUSTRALIA\n",
+ "10 2019-07-17 10:20:09.9 18.44 120.24 NORTHERN ALASKA\n",
+ "11 2019-07-17 10:19:17.2 40.42 41.75 WESTERN AUSTRALIA\n",
+ "12 2019-07-17 10:09:19.5 15.23 173.40 EASTERN TURKEY\n",
+ "13 2019-07-17 10:00:55.9 35.87 117.74 TONGA\n",
+ "14 2019-07-17 09:57:38.0 39.72 41.37 CENTRAL CALIFORNIA\n",
+ "15 2019-07-17 09:53:41.9 19.14 155.53 EASTERN TURKEY\n",
+ "16 2019-07-17 09:46:06.6 35.65 117.45 ISLAND OF HAWAII\n",
+ "17 2019-07-17 09:44:17.9 3.30 83.06 HAWAII\n",
+ "18 2019-07-17 09:43:40.2 18.00 66.86 SOUTHERN CALIFORNIA\n",
+ "19 2019-07-17 09:41:09.0 39.63 38.60 OFF COAST OF CENTRAL AMERICA\n",
+ "20 2019-07-17 09:37:56.8 19.16 155.44 PUERTO RICO\n",
+ "21 2019-07-17 09:27:04.2 35.59 117.36 EASTERN TURKEY\n",
+ "22 2019-07-17 09:21:21.0 18.65 69.48 ISLAND OF HAWAII\n",
+ "23 2019-07-17 09:16:53.6 36.20 117.89 HAWAII\n",
+ "24 2019-07-17 09:13:30.2 36.61 112.39 SOUTHERN CALIFORNIA\n",
+ "25 2019-07-17 09:00:20.5 39.01 141.68 DOMINICAN REPUBLIC\n",
+ "26 2019-07-17 08:54:30.0 9.55 84.17 CENTRAL CALIFORNIA\n",
+ "27 2019-07-17 08:16:20.3 37.17 55.76 ARIZONA\n",
+ "28 2019-07-17 08:15:20.0 15.53 95.08 EASTERN HONSHU\n",
+ "29 2019-07-17 08:09:01.1 39.88 98.33 JAPAN\n",
+ "30 2019-07-17 08:02:48.0 25.95 112.90 COSTA RICA\n",
+ "31 2019-07-17 07:41:21.4 36.52 121.11 NORTHERN IRAN\n",
+ "32 2019-07-17 07:34:44.6 38.82 20.58 OFFSHORE OAXACA\n",
+ "33 2019-07-17 07:22:09.1 35.67 117.52 MEXICO\n",
+ "34 2019-07-17 07:16:44.0 16.94 119.68 KANSAS\n",
+ "35 2019-07-17 07:08:57.6 43.37 127.10 WESTERN AUSTRALIA\n",
+ "36 2019-07-17 07:02:38.9 36.07 117.84 CENTRAL CALIFORNIA\n",
+ "37 2019-07-17 06:48:54.0 35.65 117.51 GREECE\n",
+ "38 2019-07-17 06:44:26.6 35.86 117.68 SOUTHERN CALIFORNIA\n",
+ "39 2019-07-17 06:37:55.1 19.49 65.40 NORTHWEST OF AUSTRALIA\n",
+ "40 2019-07-17 06:29:38.9 34.76 24.58 OFF COAST OF OREGON\n",
+ "41 2019-07-17 06:28:33.7 37.40 26.97 CENTRAL CALIFORNIA\n",
+ "42 2019-07-17 06:25:50.1 15.81 94.86 SOUTHERN CALIFORNIA\n",
+ "43 2019-07-17 06:24:40.3 33.08 115.78 CENTRAL CALIFORNIA\n",
+ "44 2019-07-17 06:10:46.4 17.08 176.92 PUERTO RICO REGION\n",
+ "45 2019-07-17 06:08:17.5 35.82 117.64 CRETE\n",
+ "46 2019-07-17 06:04:20.7 35.54 117.44 GREECE\n",
+ "47 2019-07-17 05:58:08.7 3.76 151.35 DODECANESE ISLANDS\n",
+ "48 2019-07-17 05:49:52.3 35.86 117.69 GREECE\n",
+ "49 2019-07-17 05:24:23.7 36.32 89.50 OFFSHORE OAXACA\n",
+ "50 NaN NaN NaN MEXICO\n",
+ "51 NaN NaN NaN SOUTHERN CALIFORNIA\n",
+ "52 NaN NaN NaN FIJI REGION\n",
+ "53 NaN NaN NaN CENTRAL CALIFORNIA\n",
+ "54 NaN NaN NaN SOUTHERN CALIFORNIA\n",
+ "55 NaN NaN NaN NEW IRELAND REGION\n",
+ "56 NaN NaN NaN P.N.G.\n",
+ "57 NaN NaN NaN CENTRAL CALIFORNIA\n",
+ "58 NaN NaN NaN TENNESSEE"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "columns = ['Date and time','Latitude','Longitude','Region']\n",
+ "earthquake_df.columns = columns\n",
+ "\n",
+ "earthquake_df"
]
},
{
@@ -308,21 +1187,92 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://hackevents.co/hackathons'"
+ "url8 ='https://hackevents.co/hackathons'\n",
+ "html8 = requests.get(url8).content\n",
+ "link8 = BeautifulSoup(html8, \"html5lib\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Title | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 7/29/2019 | \n",
+ " Cairo, Egypt | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 5/21/2019 | \n",
+ " Milano, Italy | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 9/6/2019 | \n",
+ " Munich, Germany | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1/31/2019 | \n",
+ " Prague, Czech Republic | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Title\n",
+ "0 7/29/2019 Cairo, Egypt\n",
+ "1 5/21/2019 Milano, Italy\n",
+ "2 9/6/2019 Munich, Germany\n",
+ "3 1/31/2019 Prague, Czech Republic"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "hack1 = link8.find_all(\"p\", {\"class\":\"card-text\"})\n",
+ "hack1 = [element.text.replace(\"\\u2003\", \"\").split(\"\\n\") for element in hack1]\n",
+ "cols = [\"Date\", \"NA\", \"Title\"]\n",
+ "\n",
+ "df = pd.DataFrame(hack1, columns = cols)\n",
+ "del df[\"NA\"]\n",
+ "df"
]
},
{
@@ -348,7 +1298,9 @@
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
+ "url9 = 'https://twitter.com/'\n",
+ "html9 = requests.get(url9).content\n",
+ "link9 = BeautifulSoup(html9)"
]
},
{
@@ -383,7 +1335,9 @@
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
+ "url10 = 'https://twitter.com/'\n",
+ "html10 = requests.get(url10).content\n",
+ "link10 = BeautifulSoup(html10)"
]
},
{
@@ -404,21 +1358,94 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.wikipedia.org/'"
+ "url11 = 'https://www.wikipedia.org/'\n",
+ "html11 = requests.get(url11).content\n",
+ "link11 = BeautifulSoup(html11)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 105,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Português',\n",
+ " '5892000+',\n",
+ " '2123000+',\n",
+ " 'العربية',\n",
+ " 'ܐܬܘܪܝܐ',\n",
+ " 'كوردی',\n",
+ " 'مازِرونی',\n",
+ " '1010000+',\n",
+ " 'Русский',\n",
+ " 'هَوُسَا',\n",
+ " 'ދިވެހިބަސް',\n",
+ " 'مصرى',\n",
+ " 'سنڌي',\n",
+ " 'ייִדיש',\n",
+ " 'ئۇيغۇرچه',\n",
+ " '1159000+',\n",
+ " 'Deutsch',\n",
+ " 'پنجابی(شاہمکھی)',\n",
+ " 'עברית',\n",
+ " '1556000+',\n",
+ " '中文',\n",
+ " '1532000+',\n",
+ " '2323000+',\n",
+ " '日本語',\n",
+ " 'فارسی',\n",
+ " '1346000+',\n",
+ " 'کوردییناوەندی',\n",
+ " 'Français',\n",
+ " 'لۊریشومالی',\n",
+ " '1541000+',\n",
+ " 'تۆرکجه',\n",
+ " 'Italiano',\n",
+ " '1065000+',\n",
+ " 'לאדינו',\n",
+ " 'Español',\n",
+ " 'پښتو',\n",
+ " 'كشميري',\n",
+ " 'اردو',\n",
+ " 'گیلکی',\n",
+ " 'قازاقشا',\n",
+ " 'English',\n",
+ " 'Polski']"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "lang1 = link11.select('a strong')\n",
+ "lang2 = link11.select('a bdi')\n",
+ "lang3 = lang2[0:10]\n",
+ "\n",
+ "langf1 = []\n",
+ "langf2 = []\n",
+ "\n",
+ "for element in lang1:\n",
+ " langf1.append(element.text)\n",
+ "for element in lang2:\n",
+ " langf2.append(element)\n",
+ "\n",
+ "Language = list(set(langf1 + langf2))\n",
+ "\n",
+ "#Falta quitar los numeros del str\n",
+ "langstr = str(Language)\n",
+ "langstr1 = re.sub(r'<(.*?)>','', langstr)\n",
+ "langstr1 = langstr1.replace('\\xa0', '').replace('[\\'','').replace('\\']','').replace(' ','').replace('\\'','')\n",
+ "langstr2 = langstr1.split(',')\n",
+ "langstr2"
]
},
{
@@ -430,21 +1457,52 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://data.gov.uk/'"
+ "url12 = 'https://data.gov.uk/'\n",
+ "html12 = requests.get(url12).content\n",
+ "link12 = BeautifulSoup(html12)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 103,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Business and economy',\n",
+ " ' Crime and justice',\n",
+ " ' Defence',\n",
+ " ' Education',\n",
+ " ' Environment',\n",
+ " ' Government',\n",
+ " ' Government spending',\n",
+ " ' Health',\n",
+ " ' Mapping',\n",
+ " ' Society',\n",
+ " ' Towns and cities',\n",
+ " ' Transport']"
+ ]
+ },
+ "execution_count": 103,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code "
+ "links12 = link12.find_all('a', {'href':True})\n",
+ "links12 = list(links12)\n",
+ "links12 = links12[11:23]\n",
+ "links_list = str(links12) \n",
+ "links_list = re.sub(r'<(.*?)>','', links_list)\n",
+ "links_list = links_list.replace('[','').replace(']','')\n",
+ "links_list = links_list.split(',')\n",
+ "links_list"
]
},
{
@@ -456,21 +1514,62 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'"
+ "url13 = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'\n",
+ "html13 = requests.get(url13).content\n",
+ "link13 = BeautifulSoup(html13)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Chinese',\n",
+ " 'China',\n",
+ " 'Sino-Tibetan',\n",
+ " 'Arabic',\n",
+ " 'Saudi Arabia',\n",
+ " 'Afroasiatic',\n",
+ " 'Lahnda',\n",
+ " 'Pakistan',\n",
+ " 'Indo-European',\n",
+ " 'Malay',\n",
+ " 'Malaysia',\n",
+ " 'Austronesian',\n",
+ " 'Persian',\n",
+ " 'Iran',\n",
+ " 'Indo-European',\n",
+ " 'Pushto',\n",
+ " 'Pakistan',\n",
+ " 'Indo-European',\n",
+ " 'Oriya',\n",
+ " 'India']"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "PDLeng= link13.select('.wikitable td > i > a')\n",
+ "\n",
+ "leng_df = []\n",
+ "\n",
+ "for element in PDLeng:\n",
+ " leng_df.append(element.text)\n",
+ " \n",
+ "leng_df = leng_df[0:20]\n",
+ "leng_df"
]
},
{
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 812f7a4..4138883 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -53,7 +53,7 @@
"# import urllib.request\n",
"# from urllib.request import urlopen\n",
"# import random\n",
- "# import re\n",
+ "import re\n",
"# import scrapy"
]
},
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -76,11 +76,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "#your code\n",
+ "html = requests.get(url).content\n",
+ "TrenDev = BeautifulSoup(html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "#TrenDev #lo comentamos porque salian demasiados datos"
]
},
{
@@ -134,11 +147,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "user = TrenDev.findAll('h1',{'class': 'h3'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "userlist = str(user)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Frank Denis', 'Vladimir Mihailenco', 'Henrique Dias', 'Kyle Roach', 'Erik Rasmussen', 'Franck Nijhof', 'Robert Wagner', 'François Beaufort', 'Pascal Birchler', 'Francois Zaninotto', 'Olle Jonsson', 'Samuel Reed', 'Robert Mosolgo', 'William Durand', 'Felix Rieseberg', 'Felix Angelov', 'Artur Arseniev', 'Michael Skelton', 'Jack Lloyd', 'Federico Brigante', 'Raphaël Benitte', 'Richard Littauer', 'Steven Macenski']\n"
+ ]
+ }
+ ],
+ "source": [
+ "user_names = re.findall(r'[A-Z]\\w*\\s\\w*\\b', userlist)\n",
+ "print(user_names)"
]
},
{
@@ -152,21 +192,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://github.com/trending/python?since=daily'"
+ "url2 = 'https://github.com/trending/python?since=daily'\n",
+ "html2 = requests.get(url2).content\n",
+ "TrenRepo = BeautifulSoup(html2)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "repo = TrenRepo.findAll('h1',{'class': 'h3'})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'[, , , , , , , , , , , , , , , , , , , , , , , , ]'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "repostr = str(repo)\n",
+ "repostr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Osmedeus', 'Shielded_detector', 'ludwig', 'AB3DMOT', 'stylegan', 'dagster', 'models', 'sherlock', 'faceswap', 'bcc', 'DeepLearningExamples', 'tfpyth', 'PyTorch_BlazeFace', 'PySyft', 'code_snippets', 'airflow', 'PhoneInfoga']\n"
+ ]
+ }
+ ],
+ "source": [
+ "reponame = re.findall(r'\\/(\\w*)\"', repostr)\n",
+ "\n",
+ "print(reponame)"
]
},
{
@@ -178,21 +260,56 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/Walt_Disney'"
+ "url3 = 'https://en.wikipedia.org/wiki/Walt_Disney'\n",
+ "html3 = requests.get(url3).content\n",
+ "image = BeautifulSoup(html3)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 15,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['http://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n",
+ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg']"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "links = image.find_all('div', {'class' : 'thumbinner'})\n",
+ "linkimage = []\n",
+ "dire = 'http:'\n",
+ "\n",
+ "for i in links:\n",
+ " linkimage.append(dire + i.find('img')['src'])\n",
+ "linkimage "
]
},
{
@@ -204,21 +321,97 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://en.wikipedia.org/wiki/Python' "
+ "url4 ='https://en.wikipedia.org/wiki/Python'\n",
+ "html4 = requests.get(url4).content\n",
+ "link = BeautifulSoup(html4)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['https://en.wiktionary.org/wiki/Python',\n",
+ " 'https://en.wiktionary.org/wiki/python',\n",
+ " 'https://en.wikipedia.org/w/index.php?title=Python&oldid=905477736',\n",
+ " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452',\n",
+ " 'https://commons.wikimedia.org/wiki/Category:Python',\n",
+ " 'https://af.wikipedia.org/wiki/Python',\n",
+ " 'https://als.wikipedia.org/wiki/Python',\n",
+ " 'https://az.wikipedia.org/wiki/Python',\n",
+ " 'https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)',\n",
+ " 'https://be.wikipedia.org/wiki/Python',\n",
+ " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)',\n",
+ " 'https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)',\n",
+ " 'https://da.wikipedia.org/wiki/Python',\n",
+ " 'https://de.wikipedia.org/wiki/Python',\n",
+ " 'https://eo.wikipedia.org/wiki/Pitono_(apartigilo)',\n",
+ " 'https://eu.wikipedia.org/wiki/Python_(argipena)',\n",
+ " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86',\n",
+ " 'https://fr.wikipedia.org/wiki/Python',\n",
+ " 'https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0',\n",
+ " 'https://hr.wikipedia.org/wiki/Python_(razdvojba)',\n",
+ " 'https://io.wikipedia.org/wiki/Pitono',\n",
+ " 'https://id.wikipedia.org/wiki/Python',\n",
+ " 'https://ia.wikipedia.org/wiki/Python_(disambiguation)',\n",
+ " 'https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)',\n",
+ " 'https://it.wikipedia.org/wiki/Python_(disambigua)',\n",
+ " 'https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F',\n",
+ " 'https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)',\n",
+ " 'https://kg.wikipedia.org/wiki/Mboma_(nyoka)',\n",
+ " 'https://la.wikipedia.org/wiki/Python_(discretiva)',\n",
+ " 'https://lb.wikipedia.org/wiki/Python',\n",
+ " 'https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)',\n",
+ " 'https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)',\n",
+ " 'https://nl.wikipedia.org/wiki/Python',\n",
+ " 'https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3',\n",
+ " 'https://no.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pl.wikipedia.org/wiki/Pyton',\n",
+ " 'https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)',\n",
+ " 'https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)',\n",
+ " 'https://sd.wikipedia.org/wiki/%D8%A7%D8%B1%DA%99',\n",
+ " 'https://sk.wikipedia.org/wiki/Python',\n",
+ " 'https://sh.wikipedia.org/wiki/Python',\n",
+ " 'https://fi.wikipedia.org/wiki/Python',\n",
+ " 'https://sv.wikipedia.org/wiki/Pyton',\n",
+ " 'https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99',\n",
+ " 'https://tr.wikipedia.org/wiki/Python',\n",
+ " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n",
+ " 'https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86',\n",
+ " 'https://vi.wikipedia.org/wiki/Python',\n",
+ " 'https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia',\n",
+ " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " 'https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute',\n",
+ " 'https://foundation.wikimedia.org/wiki/Cookie_statement',\n",
+ " 'https://wikimediafoundation.org/',\n",
+ " 'https://www.mediawiki.org/']"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "links2 = link.find_all('a', {'href':True})\n",
+ "linkl2 = []\n",
+ "dire = 'http'\n",
+ "\n",
+ "for i in links2:\n",
+ " if 'http' in i['href']:\n",
+ " linkl2.append(i['href'])\n",
+ "\n",
+ "linkl2"
]
},
{
@@ -230,21 +423,56 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'http://uscode.house.gov/download/download.shtml'"
+ "url5 = 'http://uscode.house.gov/download/download.shtml'\n",
+ "html5 = requests.get(url5).content\n",
+ "link5 = BeautifulSoup(html5)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 86,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Title 2 - The Congress',\n",
+ " ' Title 6 - Domestic Security',\n",
+ " ' Title 7 - Agriculture',\n",
+ " ' Title 15 - Commerce and Trade',\n",
+ " ' Title 16 - Conservation',\n",
+ " ' Title 19 - Customs Duties',\n",
+ " ' Title 21 - Food and Drugs',\n",
+ " ' Title 26 - Internal Revenue Code',\n",
+ " ' Title 34 - Crime Control and Law Enforcement',\n",
+ " \" Title 38 - Veterans' Benefits\",\n",
+ " ' Title 42 - The Public Health and Welfare',\n",
+ " ' Title 43 - Public Lands',\n",
+ " ' Title 48 - Territories and Insular Possessions',\n",
+ " ' Title 49 - Transportation',\n",
+ " ' Title 50 - War and National Defense']"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "items = link5.find_all('div', {'class': 'usctitlechanged'})\n",
+ "items = str(items)\n",
+ "\n",
+ "titles = re.sub(r'<(.*?)>','', items)\n",
+ "\n",
+ "titles = titles.replace('\\n', '').replace(' ', '').replace(' ','')\n",
+ "titles_final = titles.split(',')\n",
+ "titles_final = [i.replace('[','').replace(']','') for i in titles_final]\n",
+ "titles_final"
]
},
{
@@ -256,21 +484,64 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.fbi.gov/wanted/topten'"
+ "url6 = 'https://www.fbi.gov/wanted/topten'\n",
+ "html6 = requests.get(url6).content\n",
+ "link6 = BeautifulSoup(html6)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
- "#your code "
+ "wanted = link6.findAll('h3',{'class': 'title'})\n",
+ "wanted = str(wanted)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "namesw = re.sub(r'<(.*?)>','', wanted)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Alejandro Rosales Castillo',\n",
+ " ' Yaser Abdel Said',\n",
+ " ' Jason Derek Brown',\n",
+ " ' Rafael Caro-Quintero',\n",
+ " ' Alexis Flores',\n",
+ " ' Eugene Palmer',\n",
+ " ' Santiago Villalba Mederos',\n",
+ " ' Robert William Fisher',\n",
+ " ' Bhadreshkumar Chetanbhai Patel',\n",
+ " ' Arnoldo Jimenez']"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "namesw = namesw.replace('\\n','').replace('[','').replace(']','').title()\n",
+ "namesw2 = namesw.split(',')\n",
+ "namesw2"
]
},
{
@@ -282,21 +553,629 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.emsc-csem.org/Earthquake/'"
+ "url7 = 'https://www.emsc-csem.org/Earthquake/'\n",
+ "html7 = requests.get(url7).content\n",
+ "link7 = BeautifulSoup(html7)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "#date = link7.findAll('td', {'class':'tabev6'})\n",
+ "#laton = link7.findAll('td', {'class':'tabev1'})\n",
+ "#reg = link7.findAll('td',{'class':'tb_region'})\n",
+ "\n",
+ "date = link7.select('.tabev6')\n",
+ "laton = link7.select('.tabev1')\n",
+ "reg = link7.select('.tb_region')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "date_l = [i.select('a') for i in date]\n",
+ "date2 = str(date_l)\n",
+ "\n",
+ "date3 = re.findall(r'\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}.\\d', date2)\n",
+ "date4 = str(date3)\n",
+ "date4 = date4.replace('\\xa0','').replace('\\\\xa0',' ')\n",
+ "date_final = date4.split(',')\n",
+ "date_final = [i.replace('[\\'','').replace(' \\'','').replace('\\']','').replace('\\'','') for i in date_final]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "laton2 = str(laton)\n",
+ "laton3 = re.sub(r'<(.*?)>','', laton2)\n",
+ "laton4 = laton3.replace('\\xa0','')\n",
+ "laton_final = laton4.split(',')\n",
+ "laton_final = [i.replace(' ','').replace('[','').replace(']','') for i in laton_final]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "latitude = [i for i in laton_final if laton_final.index(i) % 2 == 0]\n",
+ "longitude = [i for i in laton_final if laton_final.index(i) % 2 != 0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "reg1 = str(reg)\n",
+ "reg2 = re.sub(r'<(.*?)>','', reg1)\n",
+ "reg3 = reg2.replace('\\xa0','')\n",
+ "reg_final = reg3.split(',')\n",
+ "reg_final = [i.replace('[','').replace(']','') for i in reg_final]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "date_df = pd.DataFrame(date_final)\n",
+ "latitude_df = pd.DataFrame(latitude)\n",
+ "longitude_df = pd.DataFrame(longitude)\n",
+ "reg_df = pd.DataFrame(reg_final)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "columns = ['Date and time','Latitude and longitude', 'Region']\n",
+ "\n",
+ "earthquake_df = pd.concat([date_df, latitude_df, longitude_df, reg_df], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date and time | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " Region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2019-07-17 12:11:09.0 | \n",
+ " 35.67 | \n",
+ " 117.42 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2019-07-17 12:10:41.0 | \n",
+ " 46.12 | \n",
+ " 2.11 | \n",
+ " FRANCE | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2019-07-17 11:44:28.1 | \n",
+ " 36.12 | \n",
+ " 117.84 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2019-07-17 11:32:47.6 | \n",
+ " 36.11 | \n",
+ " 117.88 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2019-07-17 11:29:31.0 | \n",
+ " 36.11 | \n",
+ " 117.89 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 2019-07-17 11:18:39.2 | \n",
+ " 19.40 | \n",
+ " 155.28 | \n",
+ " ISLAND OF HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 2019-07-17 11:12:28.5 | \n",
+ " 35.93 | \n",
+ " 117.68 | \n",
+ " HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 2019-07-17 10:48:30.2 | \n",
+ " 36.14 | \n",
+ " 117.95 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 2019-07-17 10:44:10.0 | \n",
+ " 18.19 | \n",
+ " 120.25 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 2019-07-17 10:41:30.3 | \n",
+ " 66.32 | \n",
+ " 157.08 | \n",
+ " WESTERN AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 2019-07-17 10:20:09.9 | \n",
+ " 18.44 | \n",
+ " 120.24 | \n",
+ " NORTHERN ALASKA | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 2019-07-17 10:19:17.2 | \n",
+ " 40.42 | \n",
+ " 41.75 | \n",
+ " WESTERN AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 2019-07-17 10:09:19.5 | \n",
+ " 15.23 | \n",
+ " 173.40 | \n",
+ " EASTERN TURKEY | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 2019-07-17 10:00:55.9 | \n",
+ " 35.87 | \n",
+ " 117.74 | \n",
+ " TONGA | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 2019-07-17 09:57:38.0 | \n",
+ " 39.72 | \n",
+ " 41.37 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 2019-07-17 09:53:41.9 | \n",
+ " 19.14 | \n",
+ " 155.53 | \n",
+ " EASTERN TURKEY | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 2019-07-17 09:46:06.6 | \n",
+ " 35.65 | \n",
+ " 117.45 | \n",
+ " ISLAND OF HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2019-07-17 09:44:17.9 | \n",
+ " 3.30 | \n",
+ " 83.06 | \n",
+ " HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2019-07-17 09:43:40.2 | \n",
+ " 18.00 | \n",
+ " 66.86 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 2019-07-17 09:41:09.0 | \n",
+ " 39.63 | \n",
+ " 38.60 | \n",
+ " OFF COAST OF CENTRAL AMERICA | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 2019-07-17 09:37:56.8 | \n",
+ " 19.16 | \n",
+ " 155.44 | \n",
+ " PUERTO RICO | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 2019-07-17 09:27:04.2 | \n",
+ " 35.59 | \n",
+ " 117.36 | \n",
+ " EASTERN TURKEY | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2019-07-17 09:21:21.0 | \n",
+ " 18.65 | \n",
+ " 69.48 | \n",
+ " ISLAND OF HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 2019-07-17 09:16:53.6 | \n",
+ " 36.20 | \n",
+ " 117.89 | \n",
+ " HAWAII | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 2019-07-17 09:13:30.2 | \n",
+ " 36.61 | \n",
+ " 112.39 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 2019-07-17 09:00:20.5 | \n",
+ " 39.01 | \n",
+ " 141.68 | \n",
+ " DOMINICAN REPUBLIC | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 2019-07-17 08:54:30.0 | \n",
+ " 9.55 | \n",
+ " 84.17 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 2019-07-17 08:16:20.3 | \n",
+ " 37.17 | \n",
+ " 55.76 | \n",
+ " ARIZONA | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 2019-07-17 08:15:20.0 | \n",
+ " 15.53 | \n",
+ " 95.08 | \n",
+ " EASTERN HONSHU | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 2019-07-17 08:09:01.1 | \n",
+ " 39.88 | \n",
+ " 98.33 | \n",
+ " JAPAN | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 2019-07-17 08:02:48.0 | \n",
+ " 25.95 | \n",
+ " 112.90 | \n",
+ " COSTA RICA | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 2019-07-17 07:41:21.4 | \n",
+ " 36.52 | \n",
+ " 121.11 | \n",
+ " NORTHERN IRAN | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 2019-07-17 07:34:44.6 | \n",
+ " 38.82 | \n",
+ " 20.58 | \n",
+ " OFFSHORE OAXACA | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 2019-07-17 07:22:09.1 | \n",
+ " 35.67 | \n",
+ " 117.52 | \n",
+ " MEXICO | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 2019-07-17 07:16:44.0 | \n",
+ " 16.94 | \n",
+ " 119.68 | \n",
+ " KANSAS | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " 2019-07-17 07:08:57.6 | \n",
+ " 43.37 | \n",
+ " 127.10 | \n",
+ " WESTERN AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " 2019-07-17 07:02:38.9 | \n",
+ " 36.07 | \n",
+ " 117.84 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " 2019-07-17 06:48:54.0 | \n",
+ " 35.65 | \n",
+ " 117.51 | \n",
+ " GREECE | \n",
+ "
\n",
+ " \n",
+ " | 38 | \n",
+ " 2019-07-17 06:44:26.6 | \n",
+ " 35.86 | \n",
+ " 117.68 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " 2019-07-17 06:37:55.1 | \n",
+ " 19.49 | \n",
+ " 65.40 | \n",
+ " NORTHWEST OF AUSTRALIA | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " 2019-07-17 06:29:38.9 | \n",
+ " 34.76 | \n",
+ " 24.58 | \n",
+ " OFF COAST OF OREGON | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " 2019-07-17 06:28:33.7 | \n",
+ " 37.40 | \n",
+ " 26.97 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " 2019-07-17 06:25:50.1 | \n",
+ " 15.81 | \n",
+ " 94.86 | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 2019-07-17 06:24:40.3 | \n",
+ " 33.08 | \n",
+ " 115.78 | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " 2019-07-17 06:10:46.4 | \n",
+ " 17.08 | \n",
+ " 176.92 | \n",
+ " PUERTO RICO REGION | \n",
+ "
\n",
+ " \n",
+ " | 45 | \n",
+ " 2019-07-17 06:08:17.5 | \n",
+ " 35.82 | \n",
+ " 117.64 | \n",
+ " CRETE | \n",
+ "
\n",
+ " \n",
+ " | 46 | \n",
+ " 2019-07-17 06:04:20.7 | \n",
+ " 35.54 | \n",
+ " 117.44 | \n",
+ " GREECE | \n",
+ "
\n",
+ " \n",
+ " | 47 | \n",
+ " 2019-07-17 05:58:08.7 | \n",
+ " 3.76 | \n",
+ " 151.35 | \n",
+ " DODECANESE ISLANDS | \n",
+ "
\n",
+ " \n",
+ " | 48 | \n",
+ " 2019-07-17 05:49:52.3 | \n",
+ " 35.86 | \n",
+ " 117.69 | \n",
+ " GREECE | \n",
+ "
\n",
+ " \n",
+ " | 49 | \n",
+ " 2019-07-17 05:24:23.7 | \n",
+ " 36.32 | \n",
+ " 89.50 | \n",
+ " OFFSHORE OAXACA | \n",
+ "
\n",
+ " \n",
+ " | 50 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " MEXICO | \n",
+ "
\n",
+ " \n",
+ " | 51 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 52 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FIJI REGION | \n",
+ "
\n",
+ " \n",
+ " | 53 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 54 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " SOUTHERN CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 55 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NEW IRELAND REGION | \n",
+ "
\n",
+ " \n",
+ " | 56 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " P.N.G. | \n",
+ "
\n",
+ " \n",
+ " | 57 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " CENTRAL CALIFORNIA | \n",
+ "
\n",
+ " \n",
+ " | 58 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " TENNESSEE | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date and time Latitude Longitude Region\n",
+ "0 2019-07-17 12:11:09.0 35.67 117.42 SOUTHERN CALIFORNIA\n",
+ "1 2019-07-17 12:10:41.0 46.12 2.11 FRANCE\n",
+ "2 2019-07-17 11:44:28.1 36.12 117.84 CENTRAL CALIFORNIA\n",
+ "3 2019-07-17 11:32:47.6 36.11 117.88 CENTRAL CALIFORNIA\n",
+ "4 2019-07-17 11:29:31.0 36.11 117.89 CENTRAL CALIFORNIA\n",
+ "5 2019-07-17 11:18:39.2 19.40 155.28 ISLAND OF HAWAII\n",
+ "6 2019-07-17 11:12:28.5 35.93 117.68 HAWAII\n",
+ "7 2019-07-17 10:48:30.2 36.14 117.95 CENTRAL CALIFORNIA\n",
+ "8 2019-07-17 10:44:10.0 18.19 120.25 CENTRAL CALIFORNIA\n",
+ "9 2019-07-17 10:41:30.3 66.32 157.08 WESTERN AUSTRALIA\n",
+ "10 2019-07-17 10:20:09.9 18.44 120.24 NORTHERN ALASKA\n",
+ "11 2019-07-17 10:19:17.2 40.42 41.75 WESTERN AUSTRALIA\n",
+ "12 2019-07-17 10:09:19.5 15.23 173.40 EASTERN TURKEY\n",
+ "13 2019-07-17 10:00:55.9 35.87 117.74 TONGA\n",
+ "14 2019-07-17 09:57:38.0 39.72 41.37 CENTRAL CALIFORNIA\n",
+ "15 2019-07-17 09:53:41.9 19.14 155.53 EASTERN TURKEY\n",
+ "16 2019-07-17 09:46:06.6 35.65 117.45 ISLAND OF HAWAII\n",
+ "17 2019-07-17 09:44:17.9 3.30 83.06 HAWAII\n",
+ "18 2019-07-17 09:43:40.2 18.00 66.86 SOUTHERN CALIFORNIA\n",
+ "19 2019-07-17 09:41:09.0 39.63 38.60 OFF COAST OF CENTRAL AMERICA\n",
+ "20 2019-07-17 09:37:56.8 19.16 155.44 PUERTO RICO\n",
+ "21 2019-07-17 09:27:04.2 35.59 117.36 EASTERN TURKEY\n",
+ "22 2019-07-17 09:21:21.0 18.65 69.48 ISLAND OF HAWAII\n",
+ "23 2019-07-17 09:16:53.6 36.20 117.89 HAWAII\n",
+ "24 2019-07-17 09:13:30.2 36.61 112.39 SOUTHERN CALIFORNIA\n",
+ "25 2019-07-17 09:00:20.5 39.01 141.68 DOMINICAN REPUBLIC\n",
+ "26 2019-07-17 08:54:30.0 9.55 84.17 CENTRAL CALIFORNIA\n",
+ "27 2019-07-17 08:16:20.3 37.17 55.76 ARIZONA\n",
+ "28 2019-07-17 08:15:20.0 15.53 95.08 EASTERN HONSHU\n",
+ "29 2019-07-17 08:09:01.1 39.88 98.33 JAPAN\n",
+ "30 2019-07-17 08:02:48.0 25.95 112.90 COSTA RICA\n",
+ "31 2019-07-17 07:41:21.4 36.52 121.11 NORTHERN IRAN\n",
+ "32 2019-07-17 07:34:44.6 38.82 20.58 OFFSHORE OAXACA\n",
+ "33 2019-07-17 07:22:09.1 35.67 117.52 MEXICO\n",
+ "34 2019-07-17 07:16:44.0 16.94 119.68 KANSAS\n",
+ "35 2019-07-17 07:08:57.6 43.37 127.10 WESTERN AUSTRALIA\n",
+ "36 2019-07-17 07:02:38.9 36.07 117.84 CENTRAL CALIFORNIA\n",
+ "37 2019-07-17 06:48:54.0 35.65 117.51 GREECE\n",
+ "38 2019-07-17 06:44:26.6 35.86 117.68 SOUTHERN CALIFORNIA\n",
+ "39 2019-07-17 06:37:55.1 19.49 65.40 NORTHWEST OF AUSTRALIA\n",
+ "40 2019-07-17 06:29:38.9 34.76 24.58 OFF COAST OF OREGON\n",
+ "41 2019-07-17 06:28:33.7 37.40 26.97 CENTRAL CALIFORNIA\n",
+ "42 2019-07-17 06:25:50.1 15.81 94.86 SOUTHERN CALIFORNIA\n",
+ "43 2019-07-17 06:24:40.3 33.08 115.78 CENTRAL CALIFORNIA\n",
+ "44 2019-07-17 06:10:46.4 17.08 176.92 PUERTO RICO REGION\n",
+ "45 2019-07-17 06:08:17.5 35.82 117.64 CRETE\n",
+ "46 2019-07-17 06:04:20.7 35.54 117.44 GREECE\n",
+ "47 2019-07-17 05:58:08.7 3.76 151.35 DODECANESE ISLANDS\n",
+ "48 2019-07-17 05:49:52.3 35.86 117.69 GREECE\n",
+ "49 2019-07-17 05:24:23.7 36.32 89.50 OFFSHORE OAXACA\n",
+ "50 NaN NaN NaN MEXICO\n",
+ "51 NaN NaN NaN SOUTHERN CALIFORNIA\n",
+ "52 NaN NaN NaN FIJI REGION\n",
+ "53 NaN NaN NaN CENTRAL CALIFORNIA\n",
+ "54 NaN NaN NaN SOUTHERN CALIFORNIA\n",
+ "55 NaN NaN NaN NEW IRELAND REGION\n",
+ "56 NaN NaN NaN P.N.G.\n",
+ "57 NaN NaN NaN CENTRAL CALIFORNIA\n",
+ "58 NaN NaN NaN TENNESSEE"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "columns = ['Date and time','Latitude','Longitude','Region']\n",
+ "earthquake_df.columns = columns\n",
+ "\n",
+ "earthquake_df"
]
},
{
@@ -308,21 +1187,92 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://hackevents.co/hackathons'"
+ "url8 ='https://hackevents.co/hackathons'\n",
+ "html8 = requests.get(url8).content\n",
+ "link8 = BeautifulSoup(html8, \"html5lib\")"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Title | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 7/29/2019 | \n",
+ " Cairo, Egypt | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 5/21/2019 | \n",
+ " Milano, Italy | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 9/6/2019 | \n",
+ " Munich, Germany | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1/31/2019 | \n",
+ " Prague, Czech Republic | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Title\n",
+ "0 7/29/2019 Cairo, Egypt\n",
+ "1 5/21/2019 Milano, Italy\n",
+ "2 9/6/2019 Munich, Germany\n",
+ "3 1/31/2019 Prague, Czech Republic"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "hack1 = link8.find_all(\"p\", {\"class\":\"card-text\"})\n",
+ "hack1 = [element.text.replace(\"\\u2003\", \"\").split(\"\\n\") for element in hack1]\n",
+ "cols = [\"Date\", \"NA\", \"Title\"]\n",
+ "\n",
+ "df = pd.DataFrame(hack1, columns = cols)\n",
+ "del df[\"NA\"]\n",
+ "df"
]
},
{
@@ -348,7 +1298,9 @@
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
+ "url9 = 'https://twitter.com/'\n",
+ "html9 = requests.get(url9).content\n",
+ "link9 = BeautifulSoup(html9)"
]
},
{
@@ -383,7 +1335,9 @@
"source": [
"# This is the url you will scrape in this exercise \n",
"# You will need to add the account credentials to this url\n",
- "url = 'https://twitter.com/'"
+ "url10 = 'https://twitter.com/'\n",
+ "html10 = requests.get(url10).content\n",
+ "link10 = BeautifulSoup(html10)"
]
},
{
@@ -404,21 +1358,94 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.wikipedia.org/'"
+ "url11 = 'https://www.wikipedia.org/'\n",
+ "html11 = requests.get(url11).content\n",
+ "link11 = BeautifulSoup(html11)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 105,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Português',\n",
+ " '5892000+',\n",
+ " '2123000+',\n",
+ " 'العربية',\n",
+ " 'ܐܬܘܪܝܐ',\n",
+ " 'كوردی',\n",
+ " 'مازِرونی',\n",
+ " '1010000+',\n",
+ " 'Русский',\n",
+ " 'هَوُسَا',\n",
+ " 'ދިވެހިބަސް',\n",
+ " 'مصرى',\n",
+ " 'سنڌي',\n",
+ " 'ייִדיש',\n",
+ " 'ئۇيغۇرچه',\n",
+ " '1159000+',\n",
+ " 'Deutsch',\n",
+ " 'پنجابی(شاہمکھی)',\n",
+ " 'עברית',\n",
+ " '1556000+',\n",
+ " '中文',\n",
+ " '1532000+',\n",
+ " '2323000+',\n",
+ " '日本語',\n",
+ " 'فارسی',\n",
+ " '1346000+',\n",
+ " 'کوردییناوەندی',\n",
+ " 'Français',\n",
+ " 'لۊریشومالی',\n",
+ " '1541000+',\n",
+ " 'تۆرکجه',\n",
+ " 'Italiano',\n",
+ " '1065000+',\n",
+ " 'לאדינו',\n",
+ " 'Español',\n",
+ " 'پښتو',\n",
+ " 'كشميري',\n",
+ " 'اردو',\n",
+ " 'گیلکی',\n",
+ " 'قازاقشا',\n",
+ " 'English',\n",
+ " 'Polski']"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "lang1 = link11.select('a strong')\n",
+ "lang2 = link11.select('a bdi')\n",
+ "lang3 = lang2[0:10]\n",
+ "\n",
+ "langf1 = []\n",
+ "langf2 = []\n",
+ "\n",
+ "for element in lang1:\n",
+ " langf1.append(element.text)\n",
+ "for element in lang2:\n",
+ " langf2.append(element)\n",
+ "\n",
+ "Language = list(set(langf1 + langf2))\n",
+ "\n",
+ "#Falta quitar los numeros del str\n",
+ "langstr = str(Language)\n",
+ "langstr1 = re.sub(r'<(.*?)>','', langstr)\n",
+ "langstr1 = langstr1.replace('\\xa0', '').replace('[\\'','').replace('\\']','').replace(' ','').replace('\\'','')\n",
+ "langstr2 = langstr1.split(',')\n",
+ "langstr2"
]
},
{
@@ -430,21 +1457,52 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://data.gov.uk/'"
+ "url12 = 'https://data.gov.uk/'\n",
+ "html12 = requests.get(url12).content\n",
+ "link12 = BeautifulSoup(html12)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 103,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Business and economy',\n",
+ " ' Crime and justice',\n",
+ " ' Defence',\n",
+ " ' Education',\n",
+ " ' Environment',\n",
+ " ' Government',\n",
+ " ' Government spending',\n",
+ " ' Health',\n",
+ " ' Mapping',\n",
+ " ' Society',\n",
+ " ' Towns and cities',\n",
+ " ' Transport']"
+ ]
+ },
+ "execution_count": 103,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code "
+ "links12 = link12.find_all('a', {'href':True})\n",
+ "links12 = list(links12)\n",
+ "links12 = links12[11:23]\n",
+ "links_list = str(links12) \n",
+ "links_list = re.sub(r'<(.*?)>','', links_list)\n",
+ "links_list = links_list.replace('[','').replace(']','')\n",
+ "links_list = links_list.split(',')\n",
+ "links_list"
]
},
{
@@ -456,21 +1514,62 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'"
+ "url13 = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'\n",
+ "html13 = requests.get(url13).content\n",
+ "link13 = BeautifulSoup(html13)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Chinese',\n",
+ " 'China',\n",
+ " 'Sino-Tibetan',\n",
+ " 'Arabic',\n",
+ " 'Saudi Arabia',\n",
+ " 'Afroasiatic',\n",
+ " 'Lahnda',\n",
+ " 'Pakistan',\n",
+ " 'Indo-European',\n",
+ " 'Malay',\n",
+ " 'Malaysia',\n",
+ " 'Austronesian',\n",
+ " 'Persian',\n",
+ " 'Iran',\n",
+ " 'Indo-European',\n",
+ " 'Pushto',\n",
+ " 'Pakistan',\n",
+ " 'Indo-European',\n",
+ " 'Oriya',\n",
+ " 'India']"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "PDLeng= link13.select('.wikitable td > i > a')\n",
+ "\n",
+ "leng_df = []\n",
+ "\n",
+ "for element in PDLeng:\n",
+ " leng_df.append(element.text)\n",
+ " \n",
+ "leng_df = leng_df[0:20]\n",
+ "leng_df"
]
},
{