From 33cae4037a6bf840e367c4d36133c5dd91550fec Mon Sep 17 00:00:00 2001 From: Gustavo Kiske Date: Tue, 16 Jul 2019 19:09:13 -0500 Subject: [PATCH 1/2] ya no faltan tantos pero no puede con los earthquakes --- .../.ipynb_checkpoints/main-checkpoint.ipynb | 759 ++++++++++++++++-- your-code/main.ipynb | 759 ++++++++++++++++-- 2 files changed, 1412 insertions(+), 106 deletions(-) diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb index 812f7a4..1ac1946 100644 --- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -53,7 +53,7 @@ "# import urllib.request\n", "# from urllib.request import urlopen\n", "# import random\n", - "# import re\n", + "import re\n", "# import scrapy" ] }, @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -76,11 +76,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code" + "#your code\n", + "html = requests.get(url).content\n", + "TrenDev = BeautifulSoup(html)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "#TrenDev #lo comentamos porque salian demasiados datos" ] }, { @@ -134,11 +147,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#your code" + "user = TrenDev.findAll('h1',{'class': 'h3'})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "userlist = str(user)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Frank Denis', 'Vladimir Mihailenco', 'Henrique Dias', 'Kyle Roach', 'Erik Rasmussen', 'Franck Nijhof', 'Robert Wagner', 'François Beaufort', 'Pascal Birchler', 'Francois Zaninotto', 'Olle Jonsson', 'Samuel Reed', 'Robert Mosolgo', 'William Durand', 'Felix Rieseberg', 'Felix Angelov', 'Artur Arseniev', 'Michael Skelton', 'Jack Lloyd', 'Federico Brigante', 'Raphaël Benitte', 'Richard Littauer', 'Steven Macenski']\n" + ] + } + ], + "source": [ + "user_names = re.findall(r'[A-Z]\\w*\\s\\w*\\b', userlist)\n", + "print(user_names)" ] }, { @@ -152,21 +192,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://github.com/trending/python?since=daily'" + "url2 = 'https://github.com/trending/python?since=daily'\n", + "html2 = requests.get(url2).content\n", + "TrenRepo = BeautifulSoup(html2)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "#your code" + "repo = TrenRepo.findAll('h1',{'class': 'h3'})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[

\\n\\n\\nsherlock-project / sherlock\\n

,

\\n\\n\\nj3ssie / Osmedeus\\n

,

\\n\\n\\nytdl-org / youtube-dl\\n

,

\\n\\n\\nuber / ludwig\\n

,

\\n\\n\\ntkat0 / PyTorch_BlazeFace\\n

,

\\n\\n\\nKyubyong / dc_tts\\n

,

\\n\\n\\ngto76 / python-cheatsheet\\n

,

\\n\\n\\nbbfamily / abu\\n

,

\\n\\n\\nDrDonk / unlocker\\n

,

\\n\\n\\nBlackHC / tfpyth\\n

,

\\n\\n\\nsundowndev / PhoneInfoga\\n

,

\\n\\n\\npublic-apis / public-apis\\n

,

\\n\\n\\ntwintproject / twint\\n

,

\\n\\n\\nliuhuanyong / QASystemOnMedicalKG\\n

,

\\n\\n\\nvaexio / vaex\\n

,

\\n\\n\\nMrS0m30n3 / youtube-dl-gui\\n

,

\\n\\n\\nMozillaSecurity / grizzly\\n

,

\\n\\n\\nscikit-learn / scikit-learn\\n

,

\\n\\n\\nbitcoin / bips\\n

,

\\n\\n\\nxinshuoweng / AB3DMOT\\n

,

\\n\\n\\nsmartHomeHub / SmartIR\\n

,

\\n\\n\\nsfyc23 / EverydayWechat\\n

,

\\n\\n\\nhome-assistant / home-assistant\\n

,

\\n\\n\\nmsgi / nlp-journey\\n

,

\\n\\n\\ngunthercox / ChatterBot\\n

]'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "repostr = str(repo)\n", + "repostr" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sherlock', 'Osmedeus', 'ludwig', 'PyTorch_BlazeFace', 'dc_tts', 'abu', 'unlocker', 'tfpyth', 'PhoneInfoga', 'twint', 'QASystemOnMedicalKG', 'vaex', 'grizzly', 'bips', 'AB3DMOT', 'SmartIR', 'EverydayWechat', 'ChatterBot']\n" + ] + } + ], + "source": [ + "reponame = re.findall(r'\\/(\\w*)\"', repostr)\n", + "\n", + "print(reponame)" ] }, { @@ -178,21 +260,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://en.wikipedia.org/wiki/Walt_Disney'" + "url3 = 'https://en.wikipedia.org/wiki/Walt_Disney'\n", + "html3 = requests.get(url3).content\n", + "image = BeautifulSoup(html3)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['http://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "links = image.find_all('div', {'class' : 'thumbinner'})\n", + "linkimage = []\n", + "dire = 'http:'\n", + "\n", + "for i in links:\n", + " linkimage.append(dire + i.find('img')['src'])\n", + "linkimage " ] }, { @@ -204,21 +321,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://en.wikipedia.org/wiki/Python' " + "url4 ='https://en.wikipedia.org/wiki/Python'\n", + "html4 = requests.get(url4).content\n", + "link = BeautifulSoup(html4)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['https://en.wiktionary.org/wiki/Python',\n", + " 'https://en.wiktionary.org/wiki/python',\n", + " 'https://en.wikipedia.org/w/index.php?title=Python&oldid=905477736',\n", + " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452',\n", + " 'https://commons.wikimedia.org/wiki/Category:Python',\n", + " 'https://af.wikipedia.org/wiki/Python',\n", + " 'https://als.wikipedia.org/wiki/Python',\n", + " 'https://az.wikipedia.org/wiki/Python',\n", + " 'https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)',\n", + " 'https://be.wikipedia.org/wiki/Python',\n", + " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)',\n", + " 'https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)',\n", + " 'https://da.wikipedia.org/wiki/Python',\n", + " 'https://de.wikipedia.org/wiki/Python',\n", + " 'https://eo.wikipedia.org/wiki/Pitono_(apartigilo)',\n", + " 'https://eu.wikipedia.org/wiki/Python_(argipena)',\n", + " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86',\n", + " 'https://fr.wikipedia.org/wiki/Python',\n", + " 'https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0',\n", + " 'https://hr.wikipedia.org/wiki/Python_(razdvojba)',\n", + " 'https://io.wikipedia.org/wiki/Pitono',\n", + " 'https://id.wikipedia.org/wiki/Python',\n", + " 'https://ia.wikipedia.org/wiki/Python_(disambiguation)',\n", + " 'https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)',\n", + " 'https://it.wikipedia.org/wiki/Python_(disambigua)',\n", + " 'https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F',\n", + " 'https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)',\n", + " 'https://kg.wikipedia.org/wiki/Mboma_(nyoka)',\n", + " 'https://la.wikipedia.org/wiki/Python_(discretiva)',\n", + " 'https://lb.wikipedia.org/wiki/Python',\n", + " 'https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)',\n", + " 'https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)',\n", + " 'https://nl.wikipedia.org/wiki/Python',\n", + " 'https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3',\n", + " 'https://no.wikipedia.org/wiki/Pyton',\n", + " 'https://pl.wikipedia.org/wiki/Pyton',\n", + " 'https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)',\n", + " 'https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)',\n", + " 'https://sd.wikipedia.org/wiki/%D8%A7%D8%B1%DA%99',\n", + " 'https://sk.wikipedia.org/wiki/Python',\n", + " 'https://sh.wikipedia.org/wiki/Python',\n", + " 'https://fi.wikipedia.org/wiki/Python',\n", + " 'https://sv.wikipedia.org/wiki/Pyton',\n", + " 'https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99',\n", + " 'https://tr.wikipedia.org/wiki/Python',\n", + " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n", + " 'https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86',\n", + " 'https://vi.wikipedia.org/wiki/Python',\n", + " 'https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia',\n", + " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n", + " 'https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute',\n", + " 'https://foundation.wikimedia.org/wiki/Cookie_statement',\n", + " 'https://wikimediafoundation.org/',\n", + " 'https://www.mediawiki.org/']" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "links2 = link.find_all('a', {'href':True})\n", + "linkl2 = []\n", + "dire = 'http'\n", + "\n", + "for i in links2:\n", + " if 'http' in i['href']:\n", + " linkl2.append(i['href'])\n", + "\n", + "linkl2" ] }, { @@ -230,21 +423,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'http://uscode.house.gov/download/download.shtml'" + "url5 = 'http://uscode.house.gov/download/download.shtml'\n", + "html5 = requests.get(url5).content\n", + "link5 = BeautifulSoup(html5)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 158, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['[Title 2 - The Congress',\n", + " ' Title 6 - Domestic Security',\n", + " ' Title 7 - Agriculture',\n", + " ' Title 15 - Commerce and Trade',\n", + " ' Title 16 - Conservation',\n", + " ' Title 19 - Customs Duties',\n", + " ' Title 21 - Food and Drugs',\n", + " ' Title 26 - Internal Revenue Code',\n", + " ' Title 34 - Crime Control and Law Enforcement',\n", + " \" Title 38 - Veterans' Benefits\",\n", + " ' Title 42 - The Public Health and Welfare',\n", + " ' Title 43 - Public Lands',\n", + " ' Title 48 - Territories and Insular Possessions',\n", + " ' Title 49 - Transportation',\n", + " ' Title 50 - War and National Defense]']" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "items = link5.find_all('div', {'class': 'usctitlechanged'})\n", + "items = str(items)\n", + "\n", + "titles = re.sub(r'<(.*?)>','', items)\n", + "\n", + "titles = titles.replace('\\n', '').replace(' ', '').replace(' ','')\n", + "titles_final = titles.split(',')\n", + "titles_final" ] }, { @@ -256,21 +483,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.fbi.gov/wanted/topten'" + "url6 = 'https://www.fbi.gov/wanted/topten'\n", + "html6 = requests.get(url6).content\n", + "link6 = BeautifulSoup(html6)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ - "#your code " + "wanted = link6.findAll('h3',{'class': 'title'})\n", + "wanted = str(wanted)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "namesw = re.sub(r'<(.*?)>','', wanted)" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[ALEJANDRO ROSALES CASTILLO',\n", + " ' YASER ABDEL SAID',\n", + " ' JASON DEREK BROWN',\n", + " ' RAFAEL CARO-QUINTERO',\n", + " ' ALEXIS FLORES',\n", + " ' EUGENE PALMER',\n", + " ' SANTIAGO VILLALBA MEDEROS',\n", + " ' ROBERT WILLIAM FISHER',\n", + " ' BHADRESHKUMAR CHETANBHAI PATEL',\n", + " ' ARNOLDO JIMENEZ]']" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "namesw = namesw.replace('\\n','')\n", + "namesw2 = namesw.split(',')\n", + "namesw2" ] }, { @@ -282,21 +552,213 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 159, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.emsc-csem.org/Earthquake/'" + "url7 = 'https://www.emsc-csem.org/Earthquake/'\n", + "html7 = requests.get(url7).content\n", + "link7 = BeautifulSoup(html7)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 199, "metadata": {}, "outputs": [], "source": [ - "#your code" + "#date = link7.findAll('td', {'class':'tabev6'})\n", + "#laton = link7.findAll('td', {'class':'tabev1'})\n", + "#reg = link7.findAll('td',{'class':'tb_region'})\n", + "\n", + "date = link7.select('.tabev6')\n", + "laton = link7.select('.tabev1')\n", + "reg = link7.select('.tb_region')" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\"['2019-07-16 21:42:41.7'\",\n", + " \" '2019-07-16 21:07:37.5'\",\n", + " \" '2019-07-16 21:06:21.5'\",\n", + " \" '2019-07-16 21:03:36.7'\",\n", + " \" '2019-07-16 20:52:18.6'\",\n", + " \" '2019-07-16 20:49:09.1'\",\n", + " \" '2019-07-16 20:41:11.0'\",\n", + " \" '2019-07-16 20:33:52.9'\",\n", + " \" '2019-07-16 20:31:33.3'\",\n", + " \" '2019-07-16 20:29:07.7'\",\n", + " \" '2019-07-16 20:24:16.9'\",\n", + " \" '2019-07-16 20:19:00.1'\",\n", + " \" '2019-07-16 20:17:51.6'\",\n", + " \" '2019-07-16 20:15:59.0'\",\n", + " \" '2019-07-16 20:11:01.5'\",\n", + " \" '2019-07-16 19:51:06.5'\",\n", + " \" '2019-07-16 19:42:25.9'\",\n", + " \" '2019-07-16 19:35:57.0'\",\n", + " \" '2019-07-16 19:23:50.1'\",\n", + " \" '2019-07-16 19:20:21.4'\",\n", + " \" '2019-07-16 19:16:53.8'\",\n", + " \" '2019-07-16 19:16:15.9'\",\n", + " \" '2019-07-16 19:11:48.9'\",\n", + " \" '2019-07-16 19:04:00.2'\",\n", + " \" '2019-07-16 19:01:48.0'\",\n", + " \" '2019-07-16 19:01:00.8'\",\n", + " \" '2019-07-16 18:53:32.0'\",\n", + " \" '2019-07-16 18:50:16.2'\",\n", + " \" '2019-07-16 18:47:48.9'\",\n", + " \" '2019-07-16 18:36:26.8'\",\n", + " \" '2019-07-16 18:22:31.9'\",\n", + " \" '2019-07-16 18:15:26.5'\",\n", + " \" '2019-07-16 18:10:01.0'\",\n", + " \" '2019-07-16 17:48:24.0'\",\n", + " \" '2019-07-16 17:42:29.9'\",\n", + " \" '2019-07-16 17:39:43.0'\",\n", + " \" '2019-07-16 17:31:56.0'\",\n", + " \" '2019-07-16 17:05:45.0'\",\n", + " \" '2019-07-16 17:05:08.0'\",\n", + " \" '2019-07-16 17:01:30.8'\",\n", + " \" '2019-07-16 16:45:56.5'\",\n", + " \" '2019-07-16 16:45:55.0'\",\n", + " \" '2019-07-16 16:43:40.9'\",\n", + " \" '2019-07-16 16:43:21.7'\",\n", + " \" '2019-07-16 16:36:41.5'\",\n", + " \" '2019-07-16 16:28:38.1'\",\n", + " \" '2019-07-16 16:27:59.0'\",\n", + " \" '2019-07-16 16:26:00.5'\",\n", + " \" '2019-07-16 16:21:05.1'\",\n", + " \" '2019-07-16 16:01:04.3']\"]" + ] + }, + "execution_count": 269, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date_l = [i.select('a') for i in date]\n", + "date2 = str(date_l)\n", + "\n", + "date3 = re.findall(r'\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}.\\d', date2)\n", + "date4 = str(date3)\n", + "date4 = date4.replace('\\xa0','').replace('\\\\xa0',' ')\n", + "date_final = date4.split(',')\n", + "date_final\n" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[69.27, 144.73, 40.25, 19.76, 35.88, 117.69, 37.81, 121.76, 36.07, 117.84, 36.07, 117.65, 16.85, 100.25, 40.09, 19.91, 23.45, 66.86, 35.86, 117.69, 37.82, 121.77, 33.10, 12.42, 35.55, 117.43, 35.68, 117.52, 37.82, 121.77, 6.26, 148.65, 35.61, 117.47, 35.62, 117.45, 36.19, 117.89, 38.39, 16.94, 38.45, 16.91, 61.27, 152.44, 36.03, 117.87, 35.96, 117.71, 39.56, 67.17, 35.68, 117.54, 0.68, 126.36, 43.62, 75.40, 35.59, 117.42, 35.74, 117.56, 35.65, 117.52, 28.47, 56.76, 34.41, 150.73, 9.93, 118.23, 35.67, 117.54, 0.54, 127.86, 35.67, 117.47, 36.20, 117.90, 15.40, 94.64, 36.10, 117.90, 0.68, 127.58, 18.99, 70.09, 37.23, 28.27, 36.03, 117.79, 35.96, 117.30, 35.92, 117.68, 18.51, 120.55, 30.57, 141.98, 28.45, 56.70, 62.22, 150.00]'" + ] + }, + "execution_count": 251, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "laton2 = str(laton)\n", + "laton3 = re.sub(r'<(.*?)>','', laton2)\n", + "laton4 = laton3.replace('\\xa0','')\n", + "laton4" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[NORTHERN ALASKA',\n", + " ' ALBANIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' SAN FRANCISCO BAY AREA',\n", + " ' CALIF.',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' OFFSHORE GUERRERO',\n", + " ' MEXICO',\n", + " ' ALBANIA',\n", + " ' JUJUY',\n", + " ' ARGENTINA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' SAN FRANCISCO BAY AREA',\n", + " ' CALIF.',\n", + " ' MADEIRA ISLANDS',\n", + " ' PORTUGAL REGION',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SAN FRANCISCO BAY AREA',\n", + " ' CALIF.',\n", + " ' NEW BRITAIN REGION',\n", + " ' P.N.G.',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' SOUTHERN ITALY',\n", + " ' SOUTHERN ITALY',\n", + " ' SOUTHERN ALASKA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' EASTERN UZBEKISTAN',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' MOLUCCA SEA',\n", + " ' EASTERN KAZAKHSTAN',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN IRAN',\n", + " ' NEAR S.E. COAST OF AUSTRALIA',\n", + " ' SUMBAWA REGION',\n", + " ' INDONESIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' HALMAHERA',\n", + " ' INDONESIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' OFFSHORE OAXACA',\n", + " ' MEXICO',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' HALMAHERA',\n", + " ' INDONESIA',\n", + " ' DOMINICAN REPUBLIC',\n", + " ' WESTERN TURKEY',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' WESTERN AUSTRALIA',\n", + " ' IZU ISLANDS',\n", + " ' JAPAN REGION',\n", + " ' SOUTHERN IRAN',\n", + " ' CENTRAL ALASKA]']" + ] + }, + "execution_count": 271, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg1 = str(reg)\n", + "reg2 = re.sub(r'<(.*?)>','', reg1)\n", + "reg3 = reg2.replace('\\xa0','')\n", + "reg4 = reg3.split(',')\n", + "reg4" ] }, { @@ -308,21 +770,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 297, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://hackevents.co/hackathons'" + "url8 ='https://hackevents.co/hackathons'\n", + "html8 = requests.get(url8).content\n", + "link8 = BeautifulSoup(html8, \"html5lib\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 298, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTitle
07/29/2019Cairo, Egypt
15/21/2019Milano, Italy
29/6/2019Munich, Germany
31/31/2019Prague, Czech Republic
\n", + "
" + ], + "text/plain": [ + " Date Title\n", + "0 7/29/2019 Cairo, Egypt\n", + "1 5/21/2019 Milano, Italy\n", + "2 9/6/2019 Munich, Germany\n", + "3 1/31/2019 Prague, Czech Republic" + ] + }, + "execution_count": 298, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "hack1 = link8.find_all(\"p\", {\"class\":\"card-text\"})\n", + "hack1 = [element.text.replace(\"\\u2003\", \"\").split(\"\\n\") for element in hack1]\n", + "cols = [\"Date\", \"NA\", \"Title\"]\n", + "\n", + "df = pd.DataFrame(hack1, columns = cols)\n", + "del df[\"NA\"]\n", + "df" ] }, { @@ -348,7 +881,9 @@ "source": [ "# This is the url you will scrape in this exercise \n", "# You will need to add the account credentials to this url\n", - "url = 'https://twitter.com/'" + "url9 = 'https://twitter.com/'\n", + "html9 = requests.get(url9).content\n", + "link9 = BeautifulSoup(html9)" ] }, { @@ -383,7 +918,9 @@ "source": [ "# This is the url you will scrape in this exercise \n", "# You will need to add the account credentials to this url\n", - "url = 'https://twitter.com/'" + "url10 = 'https://twitter.com/'\n", + "html10 = requests.get(url10).content\n", + "link10 = BeautifulSoup(html10)" ] }, { @@ -404,21 +941,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 299, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.wikipedia.org/'" + "url11 = 'https://www.wikipedia.org/'\n", + "html11 = requests.get(url11).content\n", + "link11 = BeautifulSoup(html11)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 308, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[\"['Italiano'\",\n", + " ' لۊری شومالی',\n", + " ' كشميري',\n", + " \" 'Polski'\",\n", + " \" 'Русский'\",\n", + " ' 2123000+',\n", + " \" '中文'\",\n", + " ' ދިވެހިބަސް',\n", + " ' اردو',\n", + " ' مصرى',\n", + " ' לאדינו',\n", + " ' 1010000+',\n", + " \" 'Português'\",\n", + " ' العربية',\n", + " ' 5892000+',\n", + " ' 2323000+',\n", + " ' ייִדיש',\n", + " ' گیلکی',\n", + " ' 1346000+',\n", + " \" 'English'\",\n", + " ' 1541000+',\n", + " ' فارسی',\n", + " ' کوردیی ناوەندی',\n", + " ' كوردی',\n", + " ' עברית',\n", + " ' پنجابی (شاہ مکھی)',\n", + " ' ܐܬܘܪܝܐ',\n", + " ' مازِرونی',\n", + " ' 1532000+',\n", + " ' تۆرکجه',\n", + " ' پښتو',\n", + " \" '日本語'\",\n", + " ' 1159000+',\n", + " \" 'Français'\",\n", + " ' قازاقشا',\n", + " \" 'Español'\",\n", + " ' هَوُسَا',\n", + " ' 1556000+',\n", + " ' 1065000+',\n", + " ' ئۇيغۇرچه',\n", + " \" 'Deutsch'\",\n", + " ' سنڌي]']" + ] + }, + "execution_count": 308, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "lang1 = link11.select('a strong')\n", + "lang2 = link11.select('a bdi')\n", + "lang3 = lang2[0:10]\n", + "\n", + "langf1 = []\n", + "langf2 = []\n", + "\n", + "for element in lang1:\n", + " langf1.append(element.text)\n", + "for element in lang2:\n", + " langf2.append(element)\n", + "\n", + "Language = list(set(langf1 + langf2))\n", + "\n", + "#Falta quitar los numeros del str\n", + "langstr = str(Language)\n", + "langstr1 = re.sub(r'<(.*?)>','', langstr)\n", + "langstr1 = langstr1.replace('\\xa0', '')\n", + "langstr2 = langstr1.split(',')\n", + "langstr2" ] }, { @@ -435,7 +1045,9 @@ "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://data.gov.uk/'" + "url12 = 'https://data.gov.uk/'\n", + "html12 = requests.get(url12).content\n", + "link12 = BeautifulSoup(html12)" ] }, { @@ -456,21 +1068,62 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 310, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'" + "url13 = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'\n", + "html13 = requests.get(url13).content\n", + "link13 = BeautifulSoup(html13)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 314, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Chinese',\n", + " 'China',\n", + " 'Sino-Tibetan',\n", + " 'Arabic',\n", + " 'Saudi Arabia',\n", + " 'Afroasiatic',\n", + " 'Lahnda',\n", + " 'Pakistan',\n", + " 'Indo-European',\n", + " 'Malay',\n", + " 'Malaysia',\n", + " 'Austronesian',\n", + " 'Persian',\n", + " 'Iran',\n", + " 'Indo-European',\n", + " 'Pushto',\n", + " 'Pakistan',\n", + " 'Indo-European',\n", + " 'Oriya',\n", + " 'India']" + ] + }, + "execution_count": 314, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "PDLeng= link13.select('.wikitable td > i > a')\n", + "\n", + "leng_df = []\n", + "\n", + "for element in PDLeng:\n", + " leng_df.append(element.text)\n", + " \n", + "leng_df = leng_df[0:20]\n", + "leng_df" ] }, { diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 812f7a4..1ac1946 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -53,7 +53,7 @@ "# import urllib.request\n", "# from urllib.request import urlopen\n", "# import random\n", - "# import re\n", + "import re\n", "# import scrapy" ] }, @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -76,11 +76,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "#your code" + "#your code\n", + "html = requests.get(url).content\n", + "TrenDev = BeautifulSoup(html)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "#TrenDev #lo comentamos porque salian demasiados datos" ] }, { @@ -134,11 +147,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "#your code" + "user = TrenDev.findAll('h1',{'class': 'h3'})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "userlist = str(user)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Frank Denis', 'Vladimir Mihailenco', 'Henrique Dias', 'Kyle Roach', 'Erik Rasmussen', 'Franck Nijhof', 'Robert Wagner', 'François Beaufort', 'Pascal Birchler', 'Francois Zaninotto', 'Olle Jonsson', 'Samuel Reed', 'Robert Mosolgo', 'William Durand', 'Felix Rieseberg', 'Felix Angelov', 'Artur Arseniev', 'Michael Skelton', 'Jack Lloyd', 'Federico Brigante', 'Raphaël Benitte', 'Richard Littauer', 'Steven Macenski']\n" + ] + } + ], + "source": [ + "user_names = re.findall(r'[A-Z]\\w*\\s\\w*\\b', userlist)\n", + "print(user_names)" ] }, { @@ -152,21 +192,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://github.com/trending/python?since=daily'" + "url2 = 'https://github.com/trending/python?since=daily'\n", + "html2 = requests.get(url2).content\n", + "TrenRepo = BeautifulSoup(html2)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "#your code" + "repo = TrenRepo.findAll('h1',{'class': 'h3'})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[

\\n\\n\\nsherlock-project / sherlock\\n

,

\\n\\n\\nj3ssie / Osmedeus\\n

,

\\n\\n\\nytdl-org / youtube-dl\\n

,

\\n\\n\\nuber / ludwig\\n

,

\\n\\n\\ntkat0 / PyTorch_BlazeFace\\n

,

\\n\\n\\nKyubyong / dc_tts\\n

,

\\n\\n\\ngto76 / python-cheatsheet\\n

,

\\n\\n\\nbbfamily / abu\\n

,

\\n\\n\\nDrDonk / unlocker\\n

,

\\n\\n\\nBlackHC / tfpyth\\n

,

\\n\\n\\nsundowndev / PhoneInfoga\\n

,

\\n\\n\\npublic-apis / public-apis\\n

,

\\n\\n\\ntwintproject / twint\\n

,

\\n\\n\\nliuhuanyong / QASystemOnMedicalKG\\n

,

\\n\\n\\nvaexio / vaex\\n

,

\\n\\n\\nMrS0m30n3 / youtube-dl-gui\\n

,

\\n\\n\\nMozillaSecurity / grizzly\\n

,

\\n\\n\\nscikit-learn / scikit-learn\\n

,

\\n\\n\\nbitcoin / bips\\n

,

\\n\\n\\nxinshuoweng / AB3DMOT\\n

,

\\n\\n\\nsmartHomeHub / SmartIR\\n

,

\\n\\n\\nsfyc23 / EverydayWechat\\n

,

\\n\\n\\nhome-assistant / home-assistant\\n

,

\\n\\n\\nmsgi / nlp-journey\\n

,

\\n\\n\\ngunthercox / ChatterBot\\n

]'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "repostr = str(repo)\n", + "repostr" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['sherlock', 'Osmedeus', 'ludwig', 'PyTorch_BlazeFace', 'dc_tts', 'abu', 'unlocker', 'tfpyth', 'PhoneInfoga', 'twint', 'QASystemOnMedicalKG', 'vaex', 'grizzly', 'bips', 'AB3DMOT', 'SmartIR', 'EverydayWechat', 'ChatterBot']\n" + ] + } + ], + "source": [ + "reponame = re.findall(r'\\/(\\w*)\"', repostr)\n", + "\n", + "print(reponame)" ] }, { @@ -178,21 +260,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://en.wikipedia.org/wiki/Walt_Disney'" + "url3 = 'https://en.wikipedia.org/wiki/Walt_Disney'\n", + "html3 = requests.get(url3).content\n", + "image = BeautifulSoup(html3)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['http://upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n", + " 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "links = image.find_all('div', {'class' : 'thumbinner'})\n", + "linkimage = []\n", + "dire = 'http:'\n", + "\n", + "for i in links:\n", + " linkimage.append(dire + i.find('img')['src'])\n", + "linkimage " ] }, { @@ -204,21 +321,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://en.wikipedia.org/wiki/Python' " + "url4 ='https://en.wikipedia.org/wiki/Python'\n", + "html4 = requests.get(url4).content\n", + "link = BeautifulSoup(html4)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['https://en.wiktionary.org/wiki/Python',\n", + " 'https://en.wiktionary.org/wiki/python',\n", + " 'https://en.wikipedia.org/w/index.php?title=Python&oldid=905477736',\n", + " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452',\n", + " 'https://commons.wikimedia.org/wiki/Category:Python',\n", + " 'https://af.wikipedia.org/wiki/Python',\n", + " 'https://als.wikipedia.org/wiki/Python',\n", + " 'https://az.wikipedia.org/wiki/Python',\n", + " 'https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)',\n", + " 'https://be.wikipedia.org/wiki/Python',\n", + " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)',\n", + " 'https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)',\n", + " 'https://da.wikipedia.org/wiki/Python',\n", + " 'https://de.wikipedia.org/wiki/Python',\n", + " 'https://eo.wikipedia.org/wiki/Pitono_(apartigilo)',\n", + " 'https://eu.wikipedia.org/wiki/Python_(argipena)',\n", + " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86',\n", + " 'https://fr.wikipedia.org/wiki/Python',\n", + " 'https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0',\n", + " 'https://hr.wikipedia.org/wiki/Python_(razdvojba)',\n", + " 'https://io.wikipedia.org/wiki/Pitono',\n", + " 'https://id.wikipedia.org/wiki/Python',\n", + " 'https://ia.wikipedia.org/wiki/Python_(disambiguation)',\n", + " 'https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)',\n", + " 'https://it.wikipedia.org/wiki/Python_(disambigua)',\n", + " 'https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F',\n", + " 'https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)',\n", + " 'https://kg.wikipedia.org/wiki/Mboma_(nyoka)',\n", + " 'https://la.wikipedia.org/wiki/Python_(discretiva)',\n", + " 'https://lb.wikipedia.org/wiki/Python',\n", + " 'https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)',\n", + " 'https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)',\n", + " 'https://nl.wikipedia.org/wiki/Python',\n", + " 'https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3',\n", + " 'https://no.wikipedia.org/wiki/Pyton',\n", + " 'https://pl.wikipedia.org/wiki/Pyton',\n", + " 'https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)',\n", + " 'https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)',\n", + " 'https://sd.wikipedia.org/wiki/%D8%A7%D8%B1%DA%99',\n", + " 'https://sk.wikipedia.org/wiki/Python',\n", + " 'https://sh.wikipedia.org/wiki/Python',\n", + " 'https://fi.wikipedia.org/wiki/Python',\n", + " 'https://sv.wikipedia.org/wiki/Pyton',\n", + " 'https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99',\n", + " 'https://tr.wikipedia.org/wiki/Python',\n", + " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n", + " 'https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86',\n", + " 'https://vi.wikipedia.org/wiki/Python',\n", + " 'https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia',\n", + " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n", + " 'https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute',\n", + " 'https://foundation.wikimedia.org/wiki/Cookie_statement',\n", + " 'https://wikimediafoundation.org/',\n", + " 'https://www.mediawiki.org/']" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "links2 = link.find_all('a', {'href':True})\n", + "linkl2 = []\n", + "dire = 'http'\n", + "\n", + "for i in links2:\n", + " if 'http' in i['href']:\n", + " linkl2.append(i['href'])\n", + "\n", + "linkl2" ] }, { @@ -230,21 +423,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'http://uscode.house.gov/download/download.shtml'" + "url5 = 'http://uscode.house.gov/download/download.shtml'\n", + "html5 = requests.get(url5).content\n", + "link5 = BeautifulSoup(html5)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 158, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['[Title 2 - The Congress',\n", + " ' Title 6 - Domestic Security',\n", + " ' Title 7 - Agriculture',\n", + " ' Title 15 - Commerce and Trade',\n", + " ' Title 16 - Conservation',\n", + " ' Title 19 - Customs Duties',\n", + " ' Title 21 - Food and Drugs',\n", + " ' Title 26 - Internal Revenue Code',\n", + " ' Title 34 - Crime Control and Law Enforcement',\n", + " \" Title 38 - Veterans' Benefits\",\n", + " ' Title 42 - The Public Health and Welfare',\n", + " ' Title 43 - Public Lands',\n", + " ' Title 48 - Territories and Insular Possessions',\n", + " ' Title 49 - Transportation',\n", + " ' Title 50 - War and National Defense]']" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "items = link5.find_all('div', {'class': 'usctitlechanged'})\n", + "items = str(items)\n", + "\n", + "titles = re.sub(r'<(.*?)>','', items)\n", + "\n", + "titles = titles.replace('\\n', '').replace(' ', '').replace(' ','')\n", + "titles_final = titles.split(',')\n", + "titles_final" ] }, { @@ -256,21 +483,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.fbi.gov/wanted/topten'" + "url6 = 'https://www.fbi.gov/wanted/topten'\n", + "html6 = requests.get(url6).content\n", + "link6 = BeautifulSoup(html6)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ - "#your code " + "wanted = link6.findAll('h3',{'class': 'title'})\n", + "wanted = str(wanted)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "namesw = re.sub(r'<(.*?)>','', wanted)" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[ALEJANDRO ROSALES CASTILLO',\n", + " ' YASER ABDEL SAID',\n", + " ' JASON DEREK BROWN',\n", + " ' RAFAEL CARO-QUINTERO',\n", + " ' ALEXIS FLORES',\n", + " ' EUGENE PALMER',\n", + " ' SANTIAGO VILLALBA MEDEROS',\n", + " ' ROBERT WILLIAM FISHER',\n", + " ' BHADRESHKUMAR CHETANBHAI PATEL',\n", + " ' ARNOLDO JIMENEZ]']" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "namesw = namesw.replace('\\n','')\n", + "namesw2 = namesw.split(',')\n", + "namesw2" ] }, { @@ -282,21 +552,213 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 159, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.emsc-csem.org/Earthquake/'" + "url7 = 'https://www.emsc-csem.org/Earthquake/'\n", + "html7 = requests.get(url7).content\n", + "link7 = BeautifulSoup(html7)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 199, "metadata": {}, "outputs": [], "source": [ - "#your code" + "#date = link7.findAll('td', {'class':'tabev6'})\n", + "#laton = link7.findAll('td', {'class':'tabev1'})\n", + "#reg = link7.findAll('td',{'class':'tb_region'})\n", + "\n", + "date = link7.select('.tabev6')\n", + "laton = link7.select('.tabev1')\n", + "reg = link7.select('.tb_region')" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\"['2019-07-16 21:42:41.7'\",\n", + " \" '2019-07-16 21:07:37.5'\",\n", + " \" '2019-07-16 21:06:21.5'\",\n", + " \" '2019-07-16 21:03:36.7'\",\n", + " \" '2019-07-16 20:52:18.6'\",\n", + " \" '2019-07-16 20:49:09.1'\",\n", + " \" '2019-07-16 20:41:11.0'\",\n", + " \" '2019-07-16 20:33:52.9'\",\n", + " \" '2019-07-16 20:31:33.3'\",\n", + " \" '2019-07-16 20:29:07.7'\",\n", + " \" '2019-07-16 20:24:16.9'\",\n", + " \" '2019-07-16 20:19:00.1'\",\n", + " \" '2019-07-16 20:17:51.6'\",\n", + " \" '2019-07-16 20:15:59.0'\",\n", + " \" '2019-07-16 20:11:01.5'\",\n", + " \" '2019-07-16 19:51:06.5'\",\n", + " \" '2019-07-16 19:42:25.9'\",\n", + " \" '2019-07-16 19:35:57.0'\",\n", + " \" '2019-07-16 19:23:50.1'\",\n", + " \" '2019-07-16 19:20:21.4'\",\n", + " \" '2019-07-16 19:16:53.8'\",\n", + " \" '2019-07-16 19:16:15.9'\",\n", + " \" '2019-07-16 19:11:48.9'\",\n", + " \" '2019-07-16 19:04:00.2'\",\n", + " \" '2019-07-16 19:01:48.0'\",\n", + " \" '2019-07-16 19:01:00.8'\",\n", + " \" '2019-07-16 18:53:32.0'\",\n", + " \" '2019-07-16 18:50:16.2'\",\n", + " \" '2019-07-16 18:47:48.9'\",\n", + " \" '2019-07-16 18:36:26.8'\",\n", + " \" '2019-07-16 18:22:31.9'\",\n", + " \" '2019-07-16 18:15:26.5'\",\n", + " \" '2019-07-16 18:10:01.0'\",\n", + " \" '2019-07-16 17:48:24.0'\",\n", + " \" '2019-07-16 17:42:29.9'\",\n", + " \" '2019-07-16 17:39:43.0'\",\n", + " \" '2019-07-16 17:31:56.0'\",\n", + " \" '2019-07-16 17:05:45.0'\",\n", + " \" '2019-07-16 17:05:08.0'\",\n", + " \" '2019-07-16 17:01:30.8'\",\n", + " \" '2019-07-16 16:45:56.5'\",\n", + " \" '2019-07-16 16:45:55.0'\",\n", + " \" '2019-07-16 16:43:40.9'\",\n", + " \" '2019-07-16 16:43:21.7'\",\n", + " \" '2019-07-16 16:36:41.5'\",\n", + " \" '2019-07-16 16:28:38.1'\",\n", + " \" '2019-07-16 16:27:59.0'\",\n", + " \" '2019-07-16 16:26:00.5'\",\n", + " \" '2019-07-16 16:21:05.1'\",\n", + " \" '2019-07-16 16:01:04.3']\"]" + ] + }, + "execution_count": 269, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date_l = [i.select('a') for i in date]\n", + "date2 = str(date_l)\n", + "\n", + "date3 = re.findall(r'\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}.\\d', date2)\n", + "date4 = str(date3)\n", + "date4 = date4.replace('\\xa0','').replace('\\\\xa0',' ')\n", + "date_final = date4.split(',')\n", + "date_final\n" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'[69.27, 144.73, 40.25, 19.76, 35.88, 117.69, 37.81, 121.76, 36.07, 117.84, 36.07, 117.65, 16.85, 100.25, 40.09, 19.91, 23.45, 66.86, 35.86, 117.69, 37.82, 121.77, 33.10, 12.42, 35.55, 117.43, 35.68, 117.52, 37.82, 121.77, 6.26, 148.65, 35.61, 117.47, 35.62, 117.45, 36.19, 117.89, 38.39, 16.94, 38.45, 16.91, 61.27, 152.44, 36.03, 117.87, 35.96, 117.71, 39.56, 67.17, 35.68, 117.54, 0.68, 126.36, 43.62, 75.40, 35.59, 117.42, 35.74, 117.56, 35.65, 117.52, 28.47, 56.76, 34.41, 150.73, 9.93, 118.23, 35.67, 117.54, 0.54, 127.86, 35.67, 117.47, 36.20, 117.90, 15.40, 94.64, 36.10, 117.90, 0.68, 127.58, 18.99, 70.09, 37.23, 28.27, 36.03, 117.79, 35.96, 117.30, 35.92, 117.68, 18.51, 120.55, 30.57, 141.98, 28.45, 56.70, 62.22, 150.00]'" + ] + }, + "execution_count": 251, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "laton2 = str(laton)\n", + "laton3 = re.sub(r'<(.*?)>','', laton2)\n", + "laton4 = laton3.replace('\\xa0','')\n", + "laton4" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[NORTHERN ALASKA',\n", + " ' ALBANIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' SAN FRANCISCO BAY AREA',\n", + " ' CALIF.',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' OFFSHORE GUERRERO',\n", + " ' MEXICO',\n", + " ' ALBANIA',\n", + " ' JUJUY',\n", + " ' ARGENTINA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' SAN FRANCISCO BAY AREA',\n", + " ' CALIF.',\n", + " ' MADEIRA ISLANDS',\n", + " ' PORTUGAL REGION',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SAN FRANCISCO BAY AREA',\n", + " ' CALIF.',\n", + " ' NEW BRITAIN REGION',\n", + " ' P.N.G.',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' SOUTHERN ITALY',\n", + " ' SOUTHERN ITALY',\n", + " ' SOUTHERN ALASKA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' EASTERN UZBEKISTAN',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' MOLUCCA SEA',\n", + " ' EASTERN KAZAKHSTAN',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' SOUTHERN IRAN',\n", + " ' NEAR S.E. COAST OF AUSTRALIA',\n", + " ' SUMBAWA REGION',\n", + " ' INDONESIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' HALMAHERA',\n", + " ' INDONESIA',\n", + " ' SOUTHERN CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' OFFSHORE OAXACA',\n", + " ' MEXICO',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' HALMAHERA',\n", + " ' INDONESIA',\n", + " ' DOMINICAN REPUBLIC',\n", + " ' WESTERN TURKEY',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' CENTRAL CALIFORNIA',\n", + " ' WESTERN AUSTRALIA',\n", + " ' IZU ISLANDS',\n", + " ' JAPAN REGION',\n", + " ' SOUTHERN IRAN',\n", + " ' CENTRAL ALASKA]']" + ] + }, + "execution_count": 271, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg1 = str(reg)\n", + "reg2 = re.sub(r'<(.*?)>','', reg1)\n", + "reg3 = reg2.replace('\\xa0','')\n", + "reg4 = reg3.split(',')\n", + "reg4" ] }, { @@ -308,21 +770,92 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 297, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://hackevents.co/hackathons'" + "url8 ='https://hackevents.co/hackathons'\n", + "html8 = requests.get(url8).content\n", + "link8 = BeautifulSoup(html8, \"html5lib\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 298, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateTitle
07/29/2019Cairo, Egypt
15/21/2019Milano, Italy
29/6/2019Munich, Germany
31/31/2019Prague, Czech Republic
\n", + "
" + ], + "text/plain": [ + " Date Title\n", + "0 7/29/2019 Cairo, Egypt\n", + "1 5/21/2019 Milano, Italy\n", + "2 9/6/2019 Munich, Germany\n", + "3 1/31/2019 Prague, Czech Republic" + ] + }, + "execution_count": 298, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "hack1 = link8.find_all(\"p\", {\"class\":\"card-text\"})\n", + "hack1 = [element.text.replace(\"\\u2003\", \"\").split(\"\\n\") for element in hack1]\n", + "cols = [\"Date\", \"NA\", \"Title\"]\n", + "\n", + "df = pd.DataFrame(hack1, columns = cols)\n", + "del df[\"NA\"]\n", + "df" ] }, { @@ -348,7 +881,9 @@ "source": [ "# This is the url you will scrape in this exercise \n", "# You will need to add the account credentials to this url\n", - "url = 'https://twitter.com/'" + "url9 = 'https://twitter.com/'\n", + "html9 = requests.get(url9).content\n", + "link9 = BeautifulSoup(html9)" ] }, { @@ -383,7 +918,9 @@ "source": [ "# This is the url you will scrape in this exercise \n", "# You will need to add the account credentials to this url\n", - "url = 'https://twitter.com/'" + "url10 = 'https://twitter.com/'\n", + "html10 = requests.get(url10).content\n", + "link10 = BeautifulSoup(html10)" ] }, { @@ -404,21 +941,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 299, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.wikipedia.org/'" + "url11 = 'https://www.wikipedia.org/'\n", + "html11 = requests.get(url11).content\n", + "link11 = BeautifulSoup(html11)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 308, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[\"['Italiano'\",\n", + " ' لۊری شومالی',\n", + " ' كشميري',\n", + " \" 'Polski'\",\n", + " \" 'Русский'\",\n", + " ' 2123000+',\n", + " \" '中文'\",\n", + " ' ދިވެހިބަސް',\n", + " ' اردو',\n", + " ' مصرى',\n", + " ' לאדינו',\n", + " ' 1010000+',\n", + " \" 'Português'\",\n", + " ' العربية',\n", + " ' 5892000+',\n", + " ' 2323000+',\n", + " ' ייִדיש',\n", + " ' گیلکی',\n", + " ' 1346000+',\n", + " \" 'English'\",\n", + " ' 1541000+',\n", + " ' فارسی',\n", + " ' کوردیی ناوەندی',\n", + " ' كوردی',\n", + " ' עברית',\n", + " ' پنجابی (شاہ مکھی)',\n", + " ' ܐܬܘܪܝܐ',\n", + " ' مازِرونی',\n", + " ' 1532000+',\n", + " ' تۆرکجه',\n", + " ' پښتو',\n", + " \" '日本語'\",\n", + " ' 1159000+',\n", + " \" 'Français'\",\n", + " ' قازاقشا',\n", + " \" 'Español'\",\n", + " ' هَوُسَا',\n", + " ' 1556000+',\n", + " ' 1065000+',\n", + " ' ئۇيغۇرچه',\n", + " \" 'Deutsch'\",\n", + " ' سنڌي]']" + ] + }, + "execution_count": 308, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "lang1 = link11.select('a strong')\n", + "lang2 = link11.select('a bdi')\n", + "lang3 = lang2[0:10]\n", + "\n", + "langf1 = []\n", + "langf2 = []\n", + "\n", + "for element in lang1:\n", + " langf1.append(element.text)\n", + "for element in lang2:\n", + " langf2.append(element)\n", + "\n", + "Language = list(set(langf1 + langf2))\n", + "\n", + "#Falta quitar los numeros del str\n", + "langstr = str(Language)\n", + "langstr1 = re.sub(r'<(.*?)>','', langstr)\n", + "langstr1 = langstr1.replace('\\xa0', '')\n", + "langstr2 = langstr1.split(',')\n", + "langstr2" ] }, { @@ -435,7 +1045,9 @@ "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://data.gov.uk/'" + "url12 = 'https://data.gov.uk/'\n", + "html12 = requests.get(url12).content\n", + "link12 = BeautifulSoup(html12)" ] }, { @@ -456,21 +1068,62 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 310, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'" + "url13 = 'https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers'\n", + "html13 = requests.get(url13).content\n", + "link13 = BeautifulSoup(html13)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 314, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Chinese',\n", + " 'China',\n", + " 'Sino-Tibetan',\n", + " 'Arabic',\n", + " 'Saudi Arabia',\n", + " 'Afroasiatic',\n", + " 'Lahnda',\n", + " 'Pakistan',\n", + " 'Indo-European',\n", + " 'Malay',\n", + " 'Malaysia',\n", + " 'Austronesian',\n", + " 'Persian',\n", + " 'Iran',\n", + " 'Indo-European',\n", + " 'Pushto',\n", + " 'Pakistan',\n", + " 'Indo-European',\n", + " 'Oriya',\n", + " 'India']" + ] + }, + "execution_count": 314, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "PDLeng= link13.select('.wikitable td > i > a')\n", + "\n", + "leng_df = []\n", + "\n", + "for element in PDLeng:\n", + " leng_df.append(element.text)\n", + " \n", + "leng_df = leng_df[0:20]\n", + "leng_df" ] }, { From 7891816d863b52e8a81eab79e55cd0901f055ea5 Mon Sep 17 00:00:00 2001 From: Alejandro Arroyo Yamin Date: Wed, 17 Jul 2019 08:15:18 -0500 Subject: [PATCH 2/2] =?UTF-8?q?Terminanos=20todo=20excepto=20Twitter=20y?= =?UTF-8?q?=20bonus.=20La=20autenticaci=C3=B3n=20en=20Twitter=20no=20funci?= =?UTF-8?q?on=C3=B3.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../.ipynb_checkpoints/main-checkpoint.ipynb | 936 +++++++++++++----- your-code/main.ipynb | 936 +++++++++++++----- 2 files changed, 1382 insertions(+), 490 deletions(-) diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb index 1ac1946..4138883 100644 --- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": { "scrolled": false }, @@ -147,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -165,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -192,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -213,16 +213,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'[

\\n\\n\\nsherlock-project / sherlock\\n

,

\\n\\n\\nj3ssie / Osmedeus\\n

,

\\n\\n\\nytdl-org / youtube-dl\\n

,

\\n\\n\\nuber / ludwig\\n

,

\\n\\n\\ntkat0 / PyTorch_BlazeFace\\n

,

\\n\\n\\nKyubyong / dc_tts\\n

,

\\n\\n\\ngto76 / python-cheatsheet\\n

,

\\n\\n\\nbbfamily / abu\\n

,

\\n\\n\\nDrDonk / unlocker\\n

,

\\n\\n\\nBlackHC / tfpyth\\n

,

\\n\\n\\nsundowndev / PhoneInfoga\\n

,

\\n\\n\\npublic-apis / public-apis\\n

,

\\n\\n\\ntwintproject / twint\\n

,

\\n\\n\\nliuhuanyong / QASystemOnMedicalKG\\n

,

\\n\\n\\nvaexio / vaex\\n

,

\\n\\n\\nMrS0m30n3 / youtube-dl-gui\\n

,

\\n\\n\\nMozillaSecurity / grizzly\\n

,

\\n\\n\\nscikit-learn / scikit-learn\\n

,

\\n\\n\\nbitcoin / bips\\n

,

\\n\\n\\nxinshuoweng / AB3DMOT\\n

,

\\n\\n\\nsmartHomeHub / SmartIR\\n

,

\\n\\n\\nsfyc23 / EverydayWechat\\n

,

\\n\\n\\nhome-assistant / home-assistant\\n

,

\\n\\n\\nmsgi / nlp-journey\\n

,

\\n\\n\\ngunthercox / ChatterBot\\n

]'" + "'[

\\n\\n\\ngto76 / python-cheatsheet\\n

,

\\n\\n\\nj3ssie / Osmedeus\\n

,

\\n\\n\\ntangzixiang0304 / Shielded_detector\\n

,

\\n\\n\\nuber / ludwig\\n

,

\\n\\n\\nxinshuoweng / AB3DMOT\\n

,

\\n\\n\\nNVlabs / stylegan\\n

,

\\n\\n\\ndagster-io / dagster\\n

,

\\n\\n\\ntensorflow / models\\n

,

\\n\\n\\neragonruan / text-detection-ctpn\\n

,

\\n\\n\\nsherlock-project / sherlock\\n

,

\\n\\n\\ndeepfakes / faceswap\\n

,

\\n\\n\\nnbei / Deep-Flow-Guided-Video-Inpainting\\n

,

\\n\\n\\niovisor / bcc\\n

,

\\n\\n\\nRoibal / Cryptocurrency-Trading-Bots-Python-Beginner-Advance\\n

,

\\n\\n\\nNVIDIA / DeepLearningExamples\\n

,

\\n\\n\\nBlackHC / tfpyth\\n

,

\\n\\n\\nclovaai / deep-text-recognition-benchmark\\n

,

\\n\\n\\ntkat0 / PyTorch_BlazeFace\\n

,

\\n\\n\\nOpenMined / PySyft\\n

,

\\n\\n\\nCoreyMSchafer / code_snippets\\n

,

\\n\\n\\npublic-apis / public-apis\\n

,

\\n\\n\\nd2l-ai / d2l-zh\\n

,

\\n\\n\\napache / airflow\\n

,

\\n\\n\\nbeecost / bee-university\\n

,

\\n\\n\\nsundowndev / PhoneInfoga\\n

]'" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -234,14 +234,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['sherlock', 'Osmedeus', 'ludwig', 'PyTorch_BlazeFace', 'dc_tts', 'abu', 'unlocker', 'tfpyth', 'PhoneInfoga', 'twint', 'QASystemOnMedicalKG', 'vaex', 'grizzly', 'bips', 'AB3DMOT', 'SmartIR', 'EverydayWechat', 'ChatterBot']\n" + "['Osmedeus', 'Shielded_detector', 'ludwig', 'AB3DMOT', 'stylegan', 'dagster', 'models', 'sherlock', 'faceswap', 'bcc', 'DeepLearningExamples', 'tfpyth', 'PyTorch_BlazeFace', 'PySyft', 'code_snippets', 'airflow', 'PhoneInfoga']\n" ] } ], @@ -260,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": { "scrolled": true }, @@ -297,7 +297,7 @@ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg']" ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -333,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -397,7 +397,7 @@ " 'https://www.mediawiki.org/']" ] }, - "execution_count": 56, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -423,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -435,13 +435,13 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['[Title 2 - The Congress',\n", + "['Title 2 - The Congress',\n", " ' Title 6 - Domestic Security',\n", " ' Title 7 - Agriculture',\n", " ' Title 15 - Commerce and Trade',\n", @@ -455,10 +455,10 @@ " ' Title 43 - Public Lands',\n", " ' Title 48 - Territories and Insular Possessions',\n", " ' Title 49 - Transportation',\n", - " ' Title 50 - War and National Defense]']" + " ' Title 50 - War and National Defense']" ] }, - "execution_count": 158, + "execution_count": 86, "metadata": {}, "output_type": "execute_result" } @@ -471,6 +471,7 @@ "\n", "titles = titles.replace('\\n', '').replace(' ', '').replace(' ','')\n", "titles_final = titles.split(',')\n", + "titles_final = [i.replace('[','').replace(']','') for i in titles_final]\n", "titles_final" ] }, @@ -483,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -495,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -505,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -514,31 +515,31 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['[ALEJANDRO ROSALES CASTILLO',\n", - " ' YASER ABDEL SAID',\n", - " ' JASON DEREK BROWN',\n", - " ' RAFAEL CARO-QUINTERO',\n", - " ' ALEXIS FLORES',\n", - " ' EUGENE PALMER',\n", - " ' SANTIAGO VILLALBA MEDEROS',\n", - " ' ROBERT WILLIAM FISHER',\n", - " ' BHADRESHKUMAR CHETANBHAI PATEL',\n", - " ' ARNOLDO JIMENEZ]']" + "['Alejandro Rosales Castillo',\n", + " ' Yaser Abdel Said',\n", + " ' Jason Derek Brown',\n", + " ' Rafael Caro-Quintero',\n", + " ' Alexis Flores',\n", + " ' Eugene Palmer',\n", + " ' Santiago Villalba Mederos',\n", + " ' Robert William Fisher',\n", + " ' Bhadreshkumar Chetanbhai Patel',\n", + " ' Arnoldo Jimenez']" ] }, - "execution_count": 157, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "namesw = namesw.replace('\\n','')\n", + "namesw = namesw.replace('\\n','').replace('[','').replace(']','').title()\n", "namesw2 = namesw.split(',')\n", "namesw2" ] @@ -552,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -564,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -579,69 +580,9 @@ }, { "cell_type": "code", - "execution_count": 269, + "execution_count": 67, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\"['2019-07-16 21:42:41.7'\",\n", - " \" '2019-07-16 21:07:37.5'\",\n", - " \" '2019-07-16 21:06:21.5'\",\n", - " \" '2019-07-16 21:03:36.7'\",\n", - " \" '2019-07-16 20:52:18.6'\",\n", - " \" '2019-07-16 20:49:09.1'\",\n", - " \" '2019-07-16 20:41:11.0'\",\n", - " \" '2019-07-16 20:33:52.9'\",\n", - " \" '2019-07-16 20:31:33.3'\",\n", - " \" '2019-07-16 20:29:07.7'\",\n", - " \" '2019-07-16 20:24:16.9'\",\n", - " \" '2019-07-16 20:19:00.1'\",\n", - " \" '2019-07-16 20:17:51.6'\",\n", - " \" '2019-07-16 20:15:59.0'\",\n", - " \" '2019-07-16 20:11:01.5'\",\n", - " \" '2019-07-16 19:51:06.5'\",\n", - " \" '2019-07-16 19:42:25.9'\",\n", - " \" '2019-07-16 19:35:57.0'\",\n", - " \" '2019-07-16 19:23:50.1'\",\n", - " \" '2019-07-16 19:20:21.4'\",\n", - " \" '2019-07-16 19:16:53.8'\",\n", - " \" '2019-07-16 19:16:15.9'\",\n", - " \" '2019-07-16 19:11:48.9'\",\n", - " \" '2019-07-16 19:04:00.2'\",\n", - " \" '2019-07-16 19:01:48.0'\",\n", - " \" '2019-07-16 19:01:00.8'\",\n", - " \" '2019-07-16 18:53:32.0'\",\n", - " \" '2019-07-16 18:50:16.2'\",\n", - " \" '2019-07-16 18:47:48.9'\",\n", - " \" '2019-07-16 18:36:26.8'\",\n", - " \" '2019-07-16 18:22:31.9'\",\n", - " \" '2019-07-16 18:15:26.5'\",\n", - " \" '2019-07-16 18:10:01.0'\",\n", - " \" '2019-07-16 17:48:24.0'\",\n", - " \" '2019-07-16 17:42:29.9'\",\n", - " \" '2019-07-16 17:39:43.0'\",\n", - " \" '2019-07-16 17:31:56.0'\",\n", - " \" '2019-07-16 17:05:45.0'\",\n", - " \" '2019-07-16 17:05:08.0'\",\n", - " \" '2019-07-16 17:01:30.8'\",\n", - " \" '2019-07-16 16:45:56.5'\",\n", - " \" '2019-07-16 16:45:55.0'\",\n", - " \" '2019-07-16 16:43:40.9'\",\n", - " \" '2019-07-16 16:43:21.7'\",\n", - " \" '2019-07-16 16:36:41.5'\",\n", - " \" '2019-07-16 16:28:38.1'\",\n", - " \" '2019-07-16 16:27:59.0'\",\n", - " \" '2019-07-16 16:26:00.5'\",\n", - " \" '2019-07-16 16:21:05.1'\",\n", - " \" '2019-07-16 16:01:04.3']\"]" - ] - }, - "execution_count": 269, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "date_l = [i.select('a') for i in date]\n", "date2 = str(date_l)\n", @@ -650,115 +591,591 @@ "date4 = str(date3)\n", "date4 = date4.replace('\\xa0','').replace('\\\\xa0',' ')\n", "date_final = date4.split(',')\n", - "date_final\n" + "date_final = [i.replace('[\\'','').replace(' \\'','').replace('\\']','').replace('\\'','') for i in date_final]" ] }, { "cell_type": "code", - "execution_count": 251, + "execution_count": 73, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'[69.27, 144.73, 40.25, 19.76, 35.88, 117.69, 37.81, 121.76, 36.07, 117.84, 36.07, 117.65, 16.85, 100.25, 40.09, 19.91, 23.45, 66.86, 35.86, 117.69, 37.82, 121.77, 33.10, 12.42, 35.55, 117.43, 35.68, 117.52, 37.82, 121.77, 6.26, 148.65, 35.61, 117.47, 35.62, 117.45, 36.19, 117.89, 38.39, 16.94, 38.45, 16.91, 61.27, 152.44, 36.03, 117.87, 35.96, 117.71, 39.56, 67.17, 35.68, 117.54, 0.68, 126.36, 43.62, 75.40, 35.59, 117.42, 35.74, 117.56, 35.65, 117.52, 28.47, 56.76, 34.41, 150.73, 9.93, 118.23, 35.67, 117.54, 0.54, 127.86, 35.67, 117.47, 36.20, 117.90, 15.40, 94.64, 36.10, 117.90, 0.68, 127.58, 18.99, 70.09, 37.23, 28.27, 36.03, 117.79, 35.96, 117.30, 35.92, 117.68, 18.51, 120.55, 30.57, 141.98, 28.45, 56.70, 62.22, 150.00]'" - ] - }, - "execution_count": 251, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "laton2 = str(laton)\n", "laton3 = re.sub(r'<(.*?)>','', laton2)\n", "laton4 = laton3.replace('\\xa0','')\n", - "laton4" + "laton_final = laton4.split(',')\n", + "laton_final = [i.replace(' ','').replace('[','').replace(']','') for i in laton_final]" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "latitude = [i for i in laton_final if laton_final.index(i) % 2 == 0]\n", + "longitude = [i for i in laton_final if laton_final.index(i) % 2 != 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "reg1 = str(reg)\n", + "reg2 = re.sub(r'<(.*?)>','', reg1)\n", + "reg3 = reg2.replace('\\xa0','')\n", + "reg_final = reg3.split(',')\n", + "reg_final = [i.replace('[','').replace(']','') for i in reg_final]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "date_df = pd.DataFrame(date_final)\n", + "latitude_df = pd.DataFrame(latitude)\n", + "longitude_df = pd.DataFrame(longitude)\n", + "reg_df = pd.DataFrame(reg_final)" ] }, { "cell_type": "code", - "execution_count": 271, + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "columns = ['Date and time','Latitude and longitude', 'Region']\n", + "\n", + "earthquake_df = pd.concat([date_df, latitude_df, longitude_df, reg_df], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Date and timeLatitudeLongitudeRegion
02019-07-17 12:11:09.035.67117.42SOUTHERN CALIFORNIA
12019-07-17 12:10:41.046.122.11FRANCE
22019-07-17 11:44:28.136.12117.84CENTRAL CALIFORNIA
32019-07-17 11:32:47.636.11117.88CENTRAL CALIFORNIA
42019-07-17 11:29:31.036.11117.89CENTRAL CALIFORNIA
52019-07-17 11:18:39.219.40155.28ISLAND OF HAWAII
62019-07-17 11:12:28.535.93117.68HAWAII
72019-07-17 10:48:30.236.14117.95CENTRAL CALIFORNIA
82019-07-17 10:44:10.018.19120.25CENTRAL CALIFORNIA
92019-07-17 10:41:30.366.32157.08WESTERN AUSTRALIA
102019-07-17 10:20:09.918.44120.24NORTHERN ALASKA
112019-07-17 10:19:17.240.4241.75WESTERN AUSTRALIA
122019-07-17 10:09:19.515.23173.40EASTERN TURKEY
132019-07-17 10:00:55.935.87117.74TONGA
142019-07-17 09:57:38.039.7241.37CENTRAL CALIFORNIA
152019-07-17 09:53:41.919.14155.53EASTERN TURKEY
162019-07-17 09:46:06.635.65117.45ISLAND OF HAWAII
172019-07-17 09:44:17.93.3083.06HAWAII
182019-07-17 09:43:40.218.0066.86SOUTHERN CALIFORNIA
192019-07-17 09:41:09.039.6338.60OFF COAST OF CENTRAL AMERICA
202019-07-17 09:37:56.819.16155.44PUERTO RICO
212019-07-17 09:27:04.235.59117.36EASTERN TURKEY
222019-07-17 09:21:21.018.6569.48ISLAND OF HAWAII
232019-07-17 09:16:53.636.20117.89HAWAII
242019-07-17 09:13:30.236.61112.39SOUTHERN CALIFORNIA
252019-07-17 09:00:20.539.01141.68DOMINICAN REPUBLIC
262019-07-17 08:54:30.09.5584.17CENTRAL CALIFORNIA
272019-07-17 08:16:20.337.1755.76ARIZONA
282019-07-17 08:15:20.015.5395.08EASTERN HONSHU
292019-07-17 08:09:01.139.8898.33JAPAN
302019-07-17 08:02:48.025.95112.90COSTA RICA
312019-07-17 07:41:21.436.52121.11NORTHERN IRAN
322019-07-17 07:34:44.638.8220.58OFFSHORE OAXACA
332019-07-17 07:22:09.135.67117.52MEXICO
342019-07-17 07:16:44.016.94119.68KANSAS
352019-07-17 07:08:57.643.37127.10WESTERN AUSTRALIA
362019-07-17 07:02:38.936.07117.84CENTRAL CALIFORNIA
372019-07-17 06:48:54.035.65117.51GREECE
382019-07-17 06:44:26.635.86117.68SOUTHERN CALIFORNIA
392019-07-17 06:37:55.119.4965.40NORTHWEST OF AUSTRALIA
402019-07-17 06:29:38.934.7624.58OFF COAST OF OREGON
412019-07-17 06:28:33.737.4026.97CENTRAL CALIFORNIA
422019-07-17 06:25:50.115.8194.86SOUTHERN CALIFORNIA
432019-07-17 06:24:40.333.08115.78CENTRAL CALIFORNIA
442019-07-17 06:10:46.417.08176.92PUERTO RICO REGION
452019-07-17 06:08:17.535.82117.64CRETE
462019-07-17 06:04:20.735.54117.44GREECE
472019-07-17 05:58:08.73.76151.35DODECANESE ISLANDS
482019-07-17 05:49:52.335.86117.69GREECE
492019-07-17 05:24:23.736.3289.50OFFSHORE OAXACA
50NaNNaNNaNMEXICO
51NaNNaNNaNSOUTHERN CALIFORNIA
52NaNNaNNaNFIJI REGION
53NaNNaNNaNCENTRAL CALIFORNIA
54NaNNaNNaNSOUTHERN CALIFORNIA
55NaNNaNNaNNEW IRELAND REGION
56NaNNaNNaNP.N.G.
57NaNNaNNaNCENTRAL CALIFORNIA
58NaNNaNNaNTENNESSEE
\n", + "
" + ], "text/plain": [ - "['[NORTHERN ALASKA',\n", - " ' ALBANIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' SAN FRANCISCO BAY AREA',\n", - " ' CALIF.',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' OFFSHORE GUERRERO',\n", - " ' MEXICO',\n", - " ' ALBANIA',\n", - " ' JUJUY',\n", - " ' ARGENTINA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' SAN FRANCISCO BAY AREA',\n", - " ' CALIF.',\n", - " ' MADEIRA ISLANDS',\n", - " ' PORTUGAL REGION',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SAN FRANCISCO BAY AREA',\n", - " ' CALIF.',\n", - " ' NEW BRITAIN REGION',\n", - " ' P.N.G.',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' SOUTHERN ITALY',\n", - " ' SOUTHERN ITALY',\n", - " ' SOUTHERN ALASKA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' EASTERN UZBEKISTAN',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' MOLUCCA SEA',\n", - " ' EASTERN KAZAKHSTAN',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN IRAN',\n", - " ' NEAR S.E. COAST OF AUSTRALIA',\n", - " ' SUMBAWA REGION',\n", - " ' INDONESIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' HALMAHERA',\n", - " ' INDONESIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' OFFSHORE OAXACA',\n", - " ' MEXICO',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' HALMAHERA',\n", - " ' INDONESIA',\n", - " ' DOMINICAN REPUBLIC',\n", - " ' WESTERN TURKEY',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' WESTERN AUSTRALIA',\n", - " ' IZU ISLANDS',\n", - " ' JAPAN REGION',\n", - " ' SOUTHERN IRAN',\n", - " ' CENTRAL ALASKA]']" + " Date and time Latitude Longitude Region\n", + "0 2019-07-17 12:11:09.0 35.67 117.42 SOUTHERN CALIFORNIA\n", + "1 2019-07-17 12:10:41.0 46.12 2.11 FRANCE\n", + "2 2019-07-17 11:44:28.1 36.12 117.84 CENTRAL CALIFORNIA\n", + "3 2019-07-17 11:32:47.6 36.11 117.88 CENTRAL CALIFORNIA\n", + "4 2019-07-17 11:29:31.0 36.11 117.89 CENTRAL CALIFORNIA\n", + "5 2019-07-17 11:18:39.2 19.40 155.28 ISLAND OF HAWAII\n", + "6 2019-07-17 11:12:28.5 35.93 117.68 HAWAII\n", + "7 2019-07-17 10:48:30.2 36.14 117.95 CENTRAL CALIFORNIA\n", + "8 2019-07-17 10:44:10.0 18.19 120.25 CENTRAL CALIFORNIA\n", + "9 2019-07-17 10:41:30.3 66.32 157.08 WESTERN AUSTRALIA\n", + "10 2019-07-17 10:20:09.9 18.44 120.24 NORTHERN ALASKA\n", + "11 2019-07-17 10:19:17.2 40.42 41.75 WESTERN AUSTRALIA\n", + "12 2019-07-17 10:09:19.5 15.23 173.40 EASTERN TURKEY\n", + "13 2019-07-17 10:00:55.9 35.87 117.74 TONGA\n", + "14 2019-07-17 09:57:38.0 39.72 41.37 CENTRAL CALIFORNIA\n", + "15 2019-07-17 09:53:41.9 19.14 155.53 EASTERN TURKEY\n", + "16 2019-07-17 09:46:06.6 35.65 117.45 ISLAND OF HAWAII\n", + "17 2019-07-17 09:44:17.9 3.30 83.06 HAWAII\n", + "18 2019-07-17 09:43:40.2 18.00 66.86 SOUTHERN CALIFORNIA\n", + "19 2019-07-17 09:41:09.0 39.63 38.60 OFF COAST OF CENTRAL AMERICA\n", + "20 2019-07-17 09:37:56.8 19.16 155.44 PUERTO RICO\n", + "21 2019-07-17 09:27:04.2 35.59 117.36 EASTERN TURKEY\n", + "22 2019-07-17 09:21:21.0 18.65 69.48 ISLAND OF HAWAII\n", + "23 2019-07-17 09:16:53.6 36.20 117.89 HAWAII\n", + "24 2019-07-17 09:13:30.2 36.61 112.39 SOUTHERN CALIFORNIA\n", + "25 2019-07-17 09:00:20.5 39.01 141.68 DOMINICAN REPUBLIC\n", + "26 2019-07-17 08:54:30.0 9.55 84.17 CENTRAL CALIFORNIA\n", + "27 2019-07-17 08:16:20.3 37.17 55.76 ARIZONA\n", + "28 2019-07-17 08:15:20.0 15.53 95.08 EASTERN HONSHU\n", + "29 2019-07-17 08:09:01.1 39.88 98.33 JAPAN\n", + "30 2019-07-17 08:02:48.0 25.95 112.90 COSTA RICA\n", + "31 2019-07-17 07:41:21.4 36.52 121.11 NORTHERN IRAN\n", + "32 2019-07-17 07:34:44.6 38.82 20.58 OFFSHORE OAXACA\n", + "33 2019-07-17 07:22:09.1 35.67 117.52 MEXICO\n", + "34 2019-07-17 07:16:44.0 16.94 119.68 KANSAS\n", + "35 2019-07-17 07:08:57.6 43.37 127.10 WESTERN AUSTRALIA\n", + "36 2019-07-17 07:02:38.9 36.07 117.84 CENTRAL CALIFORNIA\n", + "37 2019-07-17 06:48:54.0 35.65 117.51 GREECE\n", + "38 2019-07-17 06:44:26.6 35.86 117.68 SOUTHERN CALIFORNIA\n", + "39 2019-07-17 06:37:55.1 19.49 65.40 NORTHWEST OF AUSTRALIA\n", + "40 2019-07-17 06:29:38.9 34.76 24.58 OFF COAST OF OREGON\n", + "41 2019-07-17 06:28:33.7 37.40 26.97 CENTRAL CALIFORNIA\n", + "42 2019-07-17 06:25:50.1 15.81 94.86 SOUTHERN CALIFORNIA\n", + "43 2019-07-17 06:24:40.3 33.08 115.78 CENTRAL CALIFORNIA\n", + "44 2019-07-17 06:10:46.4 17.08 176.92 PUERTO RICO REGION\n", + "45 2019-07-17 06:08:17.5 35.82 117.64 CRETE\n", + "46 2019-07-17 06:04:20.7 35.54 117.44 GREECE\n", + "47 2019-07-17 05:58:08.7 3.76 151.35 DODECANESE ISLANDS\n", + "48 2019-07-17 05:49:52.3 35.86 117.69 GREECE\n", + "49 2019-07-17 05:24:23.7 36.32 89.50 OFFSHORE OAXACA\n", + "50 NaN NaN NaN MEXICO\n", + "51 NaN NaN NaN SOUTHERN CALIFORNIA\n", + "52 NaN NaN NaN FIJI REGION\n", + "53 NaN NaN NaN CENTRAL CALIFORNIA\n", + "54 NaN NaN NaN SOUTHERN CALIFORNIA\n", + "55 NaN NaN NaN NEW IRELAND REGION\n", + "56 NaN NaN NaN P.N.G.\n", + "57 NaN NaN NaN CENTRAL CALIFORNIA\n", + "58 NaN NaN NaN TENNESSEE" ] }, - "execution_count": 271, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "reg1 = str(reg)\n", - "reg2 = re.sub(r'<(.*?)>','', reg1)\n", - "reg3 = reg2.replace('\\xa0','')\n", - "reg4 = reg3.split(',')\n", - "reg4" + "columns = ['Date and time','Latitude','Longitude','Region']\n", + "earthquake_df.columns = columns\n", + "\n", + "earthquake_df" ] }, { @@ -770,7 +1187,7 @@ }, { "cell_type": "code", - "execution_count": 297, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -782,7 +1199,7 @@ }, { "cell_type": "code", - "execution_count": 298, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -843,7 +1260,7 @@ "3 1/31/2019 Prague, Czech Republic" ] }, - "execution_count": 298, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -941,7 +1358,7 @@ }, { "cell_type": "code", - "execution_count": 299, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -953,57 +1370,57 @@ }, { "cell_type": "code", - "execution_count": 308, + "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[\"['Italiano'\",\n", - " ' لۊری شومالی',\n", - " ' كشميري',\n", - " \" 'Polski'\",\n", - " \" 'Русский'\",\n", - " ' 2123000+',\n", - " \" '中文'\",\n", - " ' ދިވެހިބަސް',\n", - " ' اردو',\n", - " ' مصرى',\n", - " ' לאדינו',\n", - " ' 1010000+',\n", - " \" 'Português'\",\n", - " ' العربية',\n", - " ' 5892000+',\n", - " ' 2323000+',\n", - " ' ייִדיש',\n", - " ' گیلکی',\n", - " ' 1346000+',\n", - " \" 'English'\",\n", - " ' 1541000+',\n", - " ' فارسی',\n", - " ' کوردیی ناوەندی',\n", - " ' كوردی',\n", - " ' עברית',\n", - " ' پنجابی (شاہ مکھی)',\n", - " ' ܐܬܘܪܝܐ',\n", - " ' مازِرونی',\n", - " ' 1532000+',\n", - " ' تۆرکجه',\n", - " ' پښتو',\n", - " \" '日本語'\",\n", - " ' 1159000+',\n", - " \" 'Français'\",\n", - " ' قازاقشا',\n", - " \" 'Español'\",\n", - " ' هَوُسَا',\n", - " ' 1556000+',\n", - " ' 1065000+',\n", - " ' ئۇيغۇرچه',\n", - " \" 'Deutsch'\",\n", - " ' سنڌي]']" + "['Português',\n", + " '5892000+',\n", + " '2123000+',\n", + " 'العربية',\n", + " 'ܐܬܘܪܝܐ',\n", + " 'كوردی',\n", + " 'مازِرونی',\n", + " '1010000+',\n", + " 'Русский',\n", + " 'هَوُسَا',\n", + " 'ދިވެހިބަސް',\n", + " 'مصرى',\n", + " 'سنڌي',\n", + " 'ייִדיש',\n", + " 'ئۇيغۇرچه',\n", + " '1159000+',\n", + " 'Deutsch',\n", + " 'پنجابی(شاہمکھی)',\n", + " 'עברית',\n", + " '1556000+',\n", + " '中文',\n", + " '1532000+',\n", + " '2323000+',\n", + " '日本語',\n", + " 'فارسی',\n", + " '1346000+',\n", + " 'کوردییناوەندی',\n", + " 'Français',\n", + " 'لۊریشومالی',\n", + " '1541000+',\n", + " 'تۆرکجه',\n", + " 'Italiano',\n", + " '1065000+',\n", + " 'לאדינו',\n", + " 'Español',\n", + " 'پښتو',\n", + " 'كشميري',\n", + " 'اردو',\n", + " 'گیلکی',\n", + " 'قازاقشا',\n", + " 'English',\n", + " 'Polski']" ] }, - "execution_count": 308, + "execution_count": 105, "metadata": {}, "output_type": "execute_result" } @@ -1026,7 +1443,7 @@ "#Falta quitar los numeros del str\n", "langstr = str(Language)\n", "langstr1 = re.sub(r'<(.*?)>','', langstr)\n", - "langstr1 = langstr1.replace('\\xa0', '')\n", + "langstr1 = langstr1.replace('\\xa0', '').replace('[\\'','').replace('\\']','').replace(' ','').replace('\\'','')\n", "langstr2 = langstr1.split(',')\n", "langstr2" ] @@ -1040,7 +1457,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": {}, "outputs": [], "source": [ @@ -1052,11 +1469,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 103, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Business and economy',\n", + " ' Crime and justice',\n", + " ' Defence',\n", + " ' Education',\n", + " ' Environment',\n", + " ' Government',\n", + " ' Government spending',\n", + " ' Health',\n", + " ' Mapping',\n", + " ' Society',\n", + " ' Towns and cities',\n", + " ' Transport']" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code " + "links12 = link12.find_all('a', {'href':True})\n", + "links12 = list(links12)\n", + "links12 = links12[11:23]\n", + "links_list = str(links12) \n", + "links_list = re.sub(r'<(.*?)>','', links_list)\n", + "links_list = links_list.replace('[','').replace(']','')\n", + "links_list = links_list.split(',')\n", + "links_list" ] }, { @@ -1068,7 +1514,7 @@ }, { "cell_type": "code", - "execution_count": 310, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -1080,7 +1526,7 @@ }, { "cell_type": "code", - "execution_count": 314, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1108,7 +1554,7 @@ " 'India']" ] }, - "execution_count": 314, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 1ac1946..4138883 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": { "scrolled": false }, @@ -147,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -165,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -192,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -213,16 +213,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'[

\\n\\n\\nsherlock-project / sherlock\\n

,

\\n\\n\\nj3ssie / Osmedeus\\n

,

\\n\\n\\nytdl-org / youtube-dl\\n

,

\\n\\n\\nuber / ludwig\\n

,

\\n\\n\\ntkat0 / PyTorch_BlazeFace\\n

,

\\n\\n\\nKyubyong / dc_tts\\n

,

\\n\\n\\ngto76 / python-cheatsheet\\n

,

\\n\\n\\nbbfamily / abu\\n

,

\\n\\n\\nDrDonk / unlocker\\n

,

\\n\\n\\nBlackHC / tfpyth\\n

,

\\n\\n\\nsundowndev / PhoneInfoga\\n

,

\\n\\n\\npublic-apis / public-apis\\n

,

\\n\\n\\ntwintproject / twint\\n

,

\\n\\n\\nliuhuanyong / QASystemOnMedicalKG\\n

,

\\n\\n\\nvaexio / vaex\\n

,

\\n\\n\\nMrS0m30n3 / youtube-dl-gui\\n

,

\\n\\n\\nMozillaSecurity / grizzly\\n

,

\\n\\n\\nscikit-learn / scikit-learn\\n

,

\\n\\n\\nbitcoin / bips\\n

,

\\n\\n\\nxinshuoweng / AB3DMOT\\n

,

\\n\\n\\nsmartHomeHub / SmartIR\\n

,

\\n\\n\\nsfyc23 / EverydayWechat\\n

,

\\n\\n\\nhome-assistant / home-assistant\\n

,

\\n\\n\\nmsgi / nlp-journey\\n

,

\\n\\n\\ngunthercox / ChatterBot\\n

]'" + "'[

\\n\\n\\ngto76 / python-cheatsheet\\n

,

\\n\\n\\nj3ssie / Osmedeus\\n

,

\\n\\n\\ntangzixiang0304 / Shielded_detector\\n

,

\\n\\n\\nuber / ludwig\\n

,

\\n\\n\\nxinshuoweng / AB3DMOT\\n

,

\\n\\n\\nNVlabs / stylegan\\n

,

\\n\\n\\ndagster-io / dagster\\n

,

\\n\\n\\ntensorflow / models\\n

,

\\n\\n\\neragonruan / text-detection-ctpn\\n

,

\\n\\n\\nsherlock-project / sherlock\\n

,

\\n\\n\\ndeepfakes / faceswap\\n

,

\\n\\n\\nnbei / Deep-Flow-Guided-Video-Inpainting\\n

,

\\n\\n\\niovisor / bcc\\n

,

\\n\\n\\nRoibal / Cryptocurrency-Trading-Bots-Python-Beginner-Advance\\n

,

\\n\\n\\nNVIDIA / DeepLearningExamples\\n

,

\\n\\n\\nBlackHC / tfpyth\\n

,

\\n\\n\\nclovaai / deep-text-recognition-benchmark\\n

,

\\n\\n\\ntkat0 / PyTorch_BlazeFace\\n

,

\\n\\n\\nOpenMined / PySyft\\n

,

\\n\\n\\nCoreyMSchafer / code_snippets\\n

,

\\n\\n\\npublic-apis / public-apis\\n

,

\\n\\n\\nd2l-ai / d2l-zh\\n

,

\\n\\n\\napache / airflow\\n

,

\\n\\n\\nbeecost / bee-university\\n

,

\\n\\n\\nsundowndev / PhoneInfoga\\n

]'" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -234,14 +234,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['sherlock', 'Osmedeus', 'ludwig', 'PyTorch_BlazeFace', 'dc_tts', 'abu', 'unlocker', 'tfpyth', 'PhoneInfoga', 'twint', 'QASystemOnMedicalKG', 'vaex', 'grizzly', 'bips', 'AB3DMOT', 'SmartIR', 'EverydayWechat', 'ChatterBot']\n" + "['Osmedeus', 'Shielded_detector', 'ludwig', 'AB3DMOT', 'stylegan', 'dagster', 'models', 'sherlock', 'faceswap', 'bcc', 'DeepLearningExamples', 'tfpyth', 'PyTorch_BlazeFace', 'PySyft', 'code_snippets', 'airflow', 'PhoneInfoga']\n" ] } ], @@ -260,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": { "scrolled": true }, @@ -297,7 +297,7 @@ " 'http://upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg']" ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -333,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -397,7 +397,7 @@ " 'https://www.mediawiki.org/']" ] }, - "execution_count": 56, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -423,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -435,13 +435,13 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['[Title 2 - The Congress',\n", + "['Title 2 - The Congress',\n", " ' Title 6 - Domestic Security',\n", " ' Title 7 - Agriculture',\n", " ' Title 15 - Commerce and Trade',\n", @@ -455,10 +455,10 @@ " ' Title 43 - Public Lands',\n", " ' Title 48 - Territories and Insular Possessions',\n", " ' Title 49 - Transportation',\n", - " ' Title 50 - War and National Defense]']" + " ' Title 50 - War and National Defense']" ] }, - "execution_count": 158, + "execution_count": 86, "metadata": {}, "output_type": "execute_result" } @@ -471,6 +471,7 @@ "\n", "titles = titles.replace('\\n', '').replace(' ', '').replace(' ','')\n", "titles_final = titles.split(',')\n", + "titles_final = [i.replace('[','').replace(']','') for i in titles_final]\n", "titles_final" ] }, @@ -483,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -495,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -505,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -514,31 +515,31 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['[ALEJANDRO ROSALES CASTILLO',\n", - " ' YASER ABDEL SAID',\n", - " ' JASON DEREK BROWN',\n", - " ' RAFAEL CARO-QUINTERO',\n", - " ' ALEXIS FLORES',\n", - " ' EUGENE PALMER',\n", - " ' SANTIAGO VILLALBA MEDEROS',\n", - " ' ROBERT WILLIAM FISHER',\n", - " ' BHADRESHKUMAR CHETANBHAI PATEL',\n", - " ' ARNOLDO JIMENEZ]']" + "['Alejandro Rosales Castillo',\n", + " ' Yaser Abdel Said',\n", + " ' Jason Derek Brown',\n", + " ' Rafael Caro-Quintero',\n", + " ' Alexis Flores',\n", + " ' Eugene Palmer',\n", + " ' Santiago Villalba Mederos',\n", + " ' Robert William Fisher',\n", + " ' Bhadreshkumar Chetanbhai Patel',\n", + " ' Arnoldo Jimenez']" ] }, - "execution_count": 157, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "namesw = namesw.replace('\\n','')\n", + "namesw = namesw.replace('\\n','').replace('[','').replace(']','').title()\n", "namesw2 = namesw.split(',')\n", "namesw2" ] @@ -552,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -564,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -579,69 +580,9 @@ }, { "cell_type": "code", - "execution_count": 269, + "execution_count": 67, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\"['2019-07-16 21:42:41.7'\",\n", - " \" '2019-07-16 21:07:37.5'\",\n", - " \" '2019-07-16 21:06:21.5'\",\n", - " \" '2019-07-16 21:03:36.7'\",\n", - " \" '2019-07-16 20:52:18.6'\",\n", - " \" '2019-07-16 20:49:09.1'\",\n", - " \" '2019-07-16 20:41:11.0'\",\n", - " \" '2019-07-16 20:33:52.9'\",\n", - " \" '2019-07-16 20:31:33.3'\",\n", - " \" '2019-07-16 20:29:07.7'\",\n", - " \" '2019-07-16 20:24:16.9'\",\n", - " \" '2019-07-16 20:19:00.1'\",\n", - " \" '2019-07-16 20:17:51.6'\",\n", - " \" '2019-07-16 20:15:59.0'\",\n", - " \" '2019-07-16 20:11:01.5'\",\n", - " \" '2019-07-16 19:51:06.5'\",\n", - " \" '2019-07-16 19:42:25.9'\",\n", - " \" '2019-07-16 19:35:57.0'\",\n", - " \" '2019-07-16 19:23:50.1'\",\n", - " \" '2019-07-16 19:20:21.4'\",\n", - " \" '2019-07-16 19:16:53.8'\",\n", - " \" '2019-07-16 19:16:15.9'\",\n", - " \" '2019-07-16 19:11:48.9'\",\n", - " \" '2019-07-16 19:04:00.2'\",\n", - " \" '2019-07-16 19:01:48.0'\",\n", - " \" '2019-07-16 19:01:00.8'\",\n", - " \" '2019-07-16 18:53:32.0'\",\n", - " \" '2019-07-16 18:50:16.2'\",\n", - " \" '2019-07-16 18:47:48.9'\",\n", - " \" '2019-07-16 18:36:26.8'\",\n", - " \" '2019-07-16 18:22:31.9'\",\n", - " \" '2019-07-16 18:15:26.5'\",\n", - " \" '2019-07-16 18:10:01.0'\",\n", - " \" '2019-07-16 17:48:24.0'\",\n", - " \" '2019-07-16 17:42:29.9'\",\n", - " \" '2019-07-16 17:39:43.0'\",\n", - " \" '2019-07-16 17:31:56.0'\",\n", - " \" '2019-07-16 17:05:45.0'\",\n", - " \" '2019-07-16 17:05:08.0'\",\n", - " \" '2019-07-16 17:01:30.8'\",\n", - " \" '2019-07-16 16:45:56.5'\",\n", - " \" '2019-07-16 16:45:55.0'\",\n", - " \" '2019-07-16 16:43:40.9'\",\n", - " \" '2019-07-16 16:43:21.7'\",\n", - " \" '2019-07-16 16:36:41.5'\",\n", - " \" '2019-07-16 16:28:38.1'\",\n", - " \" '2019-07-16 16:27:59.0'\",\n", - " \" '2019-07-16 16:26:00.5'\",\n", - " \" '2019-07-16 16:21:05.1'\",\n", - " \" '2019-07-16 16:01:04.3']\"]" - ] - }, - "execution_count": 269, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "date_l = [i.select('a') for i in date]\n", "date2 = str(date_l)\n", @@ -650,115 +591,591 @@ "date4 = str(date3)\n", "date4 = date4.replace('\\xa0','').replace('\\\\xa0',' ')\n", "date_final = date4.split(',')\n", - "date_final\n" + "date_final = [i.replace('[\\'','').replace(' \\'','').replace('\\']','').replace('\\'','') for i in date_final]" ] }, { "cell_type": "code", - "execution_count": 251, + "execution_count": 73, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'[69.27, 144.73, 40.25, 19.76, 35.88, 117.69, 37.81, 121.76, 36.07, 117.84, 36.07, 117.65, 16.85, 100.25, 40.09, 19.91, 23.45, 66.86, 35.86, 117.69, 37.82, 121.77, 33.10, 12.42, 35.55, 117.43, 35.68, 117.52, 37.82, 121.77, 6.26, 148.65, 35.61, 117.47, 35.62, 117.45, 36.19, 117.89, 38.39, 16.94, 38.45, 16.91, 61.27, 152.44, 36.03, 117.87, 35.96, 117.71, 39.56, 67.17, 35.68, 117.54, 0.68, 126.36, 43.62, 75.40, 35.59, 117.42, 35.74, 117.56, 35.65, 117.52, 28.47, 56.76, 34.41, 150.73, 9.93, 118.23, 35.67, 117.54, 0.54, 127.86, 35.67, 117.47, 36.20, 117.90, 15.40, 94.64, 36.10, 117.90, 0.68, 127.58, 18.99, 70.09, 37.23, 28.27, 36.03, 117.79, 35.96, 117.30, 35.92, 117.68, 18.51, 120.55, 30.57, 141.98, 28.45, 56.70, 62.22, 150.00]'" - ] - }, - "execution_count": 251, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "laton2 = str(laton)\n", "laton3 = re.sub(r'<(.*?)>','', laton2)\n", "laton4 = laton3.replace('\\xa0','')\n", - "laton4" + "laton_final = laton4.split(',')\n", + "laton_final = [i.replace(' ','').replace('[','').replace(']','') for i in laton_final]" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "latitude = [i for i in laton_final if laton_final.index(i) % 2 == 0]\n", + "longitude = [i for i in laton_final if laton_final.index(i) % 2 != 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "reg1 = str(reg)\n", + "reg2 = re.sub(r'<(.*?)>','', reg1)\n", + "reg3 = reg2.replace('\\xa0','')\n", + "reg_final = reg3.split(',')\n", + "reg_final = [i.replace('[','').replace(']','') for i in reg_final]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "date_df = pd.DataFrame(date_final)\n", + "latitude_df = pd.DataFrame(latitude)\n", + "longitude_df = pd.DataFrame(longitude)\n", + "reg_df = pd.DataFrame(reg_final)" ] }, { "cell_type": "code", - "execution_count": 271, + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "columns = ['Date and time','Latitude and longitude', 'Region']\n", + "\n", + "earthquake_df = pd.concat([date_df, latitude_df, longitude_df, reg_df], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Date and timeLatitudeLongitudeRegion
02019-07-17 12:11:09.035.67117.42SOUTHERN CALIFORNIA
12019-07-17 12:10:41.046.122.11FRANCE
22019-07-17 11:44:28.136.12117.84CENTRAL CALIFORNIA
32019-07-17 11:32:47.636.11117.88CENTRAL CALIFORNIA
42019-07-17 11:29:31.036.11117.89CENTRAL CALIFORNIA
52019-07-17 11:18:39.219.40155.28ISLAND OF HAWAII
62019-07-17 11:12:28.535.93117.68HAWAII
72019-07-17 10:48:30.236.14117.95CENTRAL CALIFORNIA
82019-07-17 10:44:10.018.19120.25CENTRAL CALIFORNIA
92019-07-17 10:41:30.366.32157.08WESTERN AUSTRALIA
102019-07-17 10:20:09.918.44120.24NORTHERN ALASKA
112019-07-17 10:19:17.240.4241.75WESTERN AUSTRALIA
122019-07-17 10:09:19.515.23173.40EASTERN TURKEY
132019-07-17 10:00:55.935.87117.74TONGA
142019-07-17 09:57:38.039.7241.37CENTRAL CALIFORNIA
152019-07-17 09:53:41.919.14155.53EASTERN TURKEY
162019-07-17 09:46:06.635.65117.45ISLAND OF HAWAII
172019-07-17 09:44:17.93.3083.06HAWAII
182019-07-17 09:43:40.218.0066.86SOUTHERN CALIFORNIA
192019-07-17 09:41:09.039.6338.60OFF COAST OF CENTRAL AMERICA
202019-07-17 09:37:56.819.16155.44PUERTO RICO
212019-07-17 09:27:04.235.59117.36EASTERN TURKEY
222019-07-17 09:21:21.018.6569.48ISLAND OF HAWAII
232019-07-17 09:16:53.636.20117.89HAWAII
242019-07-17 09:13:30.236.61112.39SOUTHERN CALIFORNIA
252019-07-17 09:00:20.539.01141.68DOMINICAN REPUBLIC
262019-07-17 08:54:30.09.5584.17CENTRAL CALIFORNIA
272019-07-17 08:16:20.337.1755.76ARIZONA
282019-07-17 08:15:20.015.5395.08EASTERN HONSHU
292019-07-17 08:09:01.139.8898.33JAPAN
302019-07-17 08:02:48.025.95112.90COSTA RICA
312019-07-17 07:41:21.436.52121.11NORTHERN IRAN
322019-07-17 07:34:44.638.8220.58OFFSHORE OAXACA
332019-07-17 07:22:09.135.67117.52MEXICO
342019-07-17 07:16:44.016.94119.68KANSAS
352019-07-17 07:08:57.643.37127.10WESTERN AUSTRALIA
362019-07-17 07:02:38.936.07117.84CENTRAL CALIFORNIA
372019-07-17 06:48:54.035.65117.51GREECE
382019-07-17 06:44:26.635.86117.68SOUTHERN CALIFORNIA
392019-07-17 06:37:55.119.4965.40NORTHWEST OF AUSTRALIA
402019-07-17 06:29:38.934.7624.58OFF COAST OF OREGON
412019-07-17 06:28:33.737.4026.97CENTRAL CALIFORNIA
422019-07-17 06:25:50.115.8194.86SOUTHERN CALIFORNIA
432019-07-17 06:24:40.333.08115.78CENTRAL CALIFORNIA
442019-07-17 06:10:46.417.08176.92PUERTO RICO REGION
452019-07-17 06:08:17.535.82117.64CRETE
462019-07-17 06:04:20.735.54117.44GREECE
472019-07-17 05:58:08.73.76151.35DODECANESE ISLANDS
482019-07-17 05:49:52.335.86117.69GREECE
492019-07-17 05:24:23.736.3289.50OFFSHORE OAXACA
50NaNNaNNaNMEXICO
51NaNNaNNaNSOUTHERN CALIFORNIA
52NaNNaNNaNFIJI REGION
53NaNNaNNaNCENTRAL CALIFORNIA
54NaNNaNNaNSOUTHERN CALIFORNIA
55NaNNaNNaNNEW IRELAND REGION
56NaNNaNNaNP.N.G.
57NaNNaNNaNCENTRAL CALIFORNIA
58NaNNaNNaNTENNESSEE
\n", + "
" + ], "text/plain": [ - "['[NORTHERN ALASKA',\n", - " ' ALBANIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' SAN FRANCISCO BAY AREA',\n", - " ' CALIF.',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' OFFSHORE GUERRERO',\n", - " ' MEXICO',\n", - " ' ALBANIA',\n", - " ' JUJUY',\n", - " ' ARGENTINA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' SAN FRANCISCO BAY AREA',\n", - " ' CALIF.',\n", - " ' MADEIRA ISLANDS',\n", - " ' PORTUGAL REGION',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SAN FRANCISCO BAY AREA',\n", - " ' CALIF.',\n", - " ' NEW BRITAIN REGION',\n", - " ' P.N.G.',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' SOUTHERN ITALY',\n", - " ' SOUTHERN ITALY',\n", - " ' SOUTHERN ALASKA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' EASTERN UZBEKISTAN',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' MOLUCCA SEA',\n", - " ' EASTERN KAZAKHSTAN',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' SOUTHERN IRAN',\n", - " ' NEAR S.E. COAST OF AUSTRALIA',\n", - " ' SUMBAWA REGION',\n", - " ' INDONESIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' HALMAHERA',\n", - " ' INDONESIA',\n", - " ' SOUTHERN CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' OFFSHORE OAXACA',\n", - " ' MEXICO',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' HALMAHERA',\n", - " ' INDONESIA',\n", - " ' DOMINICAN REPUBLIC',\n", - " ' WESTERN TURKEY',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' CENTRAL CALIFORNIA',\n", - " ' WESTERN AUSTRALIA',\n", - " ' IZU ISLANDS',\n", - " ' JAPAN REGION',\n", - " ' SOUTHERN IRAN',\n", - " ' CENTRAL ALASKA]']" + " Date and time Latitude Longitude Region\n", + "0 2019-07-17 12:11:09.0 35.67 117.42 SOUTHERN CALIFORNIA\n", + "1 2019-07-17 12:10:41.0 46.12 2.11 FRANCE\n", + "2 2019-07-17 11:44:28.1 36.12 117.84 CENTRAL CALIFORNIA\n", + "3 2019-07-17 11:32:47.6 36.11 117.88 CENTRAL CALIFORNIA\n", + "4 2019-07-17 11:29:31.0 36.11 117.89 CENTRAL CALIFORNIA\n", + "5 2019-07-17 11:18:39.2 19.40 155.28 ISLAND OF HAWAII\n", + "6 2019-07-17 11:12:28.5 35.93 117.68 HAWAII\n", + "7 2019-07-17 10:48:30.2 36.14 117.95 CENTRAL CALIFORNIA\n", + "8 2019-07-17 10:44:10.0 18.19 120.25 CENTRAL CALIFORNIA\n", + "9 2019-07-17 10:41:30.3 66.32 157.08 WESTERN AUSTRALIA\n", + "10 2019-07-17 10:20:09.9 18.44 120.24 NORTHERN ALASKA\n", + "11 2019-07-17 10:19:17.2 40.42 41.75 WESTERN AUSTRALIA\n", + "12 2019-07-17 10:09:19.5 15.23 173.40 EASTERN TURKEY\n", + "13 2019-07-17 10:00:55.9 35.87 117.74 TONGA\n", + "14 2019-07-17 09:57:38.0 39.72 41.37 CENTRAL CALIFORNIA\n", + "15 2019-07-17 09:53:41.9 19.14 155.53 EASTERN TURKEY\n", + "16 2019-07-17 09:46:06.6 35.65 117.45 ISLAND OF HAWAII\n", + "17 2019-07-17 09:44:17.9 3.30 83.06 HAWAII\n", + "18 2019-07-17 09:43:40.2 18.00 66.86 SOUTHERN CALIFORNIA\n", + "19 2019-07-17 09:41:09.0 39.63 38.60 OFF COAST OF CENTRAL AMERICA\n", + "20 2019-07-17 09:37:56.8 19.16 155.44 PUERTO RICO\n", + "21 2019-07-17 09:27:04.2 35.59 117.36 EASTERN TURKEY\n", + "22 2019-07-17 09:21:21.0 18.65 69.48 ISLAND OF HAWAII\n", + "23 2019-07-17 09:16:53.6 36.20 117.89 HAWAII\n", + "24 2019-07-17 09:13:30.2 36.61 112.39 SOUTHERN CALIFORNIA\n", + "25 2019-07-17 09:00:20.5 39.01 141.68 DOMINICAN REPUBLIC\n", + "26 2019-07-17 08:54:30.0 9.55 84.17 CENTRAL CALIFORNIA\n", + "27 2019-07-17 08:16:20.3 37.17 55.76 ARIZONA\n", + "28 2019-07-17 08:15:20.0 15.53 95.08 EASTERN HONSHU\n", + "29 2019-07-17 08:09:01.1 39.88 98.33 JAPAN\n", + "30 2019-07-17 08:02:48.0 25.95 112.90 COSTA RICA\n", + "31 2019-07-17 07:41:21.4 36.52 121.11 NORTHERN IRAN\n", + "32 2019-07-17 07:34:44.6 38.82 20.58 OFFSHORE OAXACA\n", + "33 2019-07-17 07:22:09.1 35.67 117.52 MEXICO\n", + "34 2019-07-17 07:16:44.0 16.94 119.68 KANSAS\n", + "35 2019-07-17 07:08:57.6 43.37 127.10 WESTERN AUSTRALIA\n", + "36 2019-07-17 07:02:38.9 36.07 117.84 CENTRAL CALIFORNIA\n", + "37 2019-07-17 06:48:54.0 35.65 117.51 GREECE\n", + "38 2019-07-17 06:44:26.6 35.86 117.68 SOUTHERN CALIFORNIA\n", + "39 2019-07-17 06:37:55.1 19.49 65.40 NORTHWEST OF AUSTRALIA\n", + "40 2019-07-17 06:29:38.9 34.76 24.58 OFF COAST OF OREGON\n", + "41 2019-07-17 06:28:33.7 37.40 26.97 CENTRAL CALIFORNIA\n", + "42 2019-07-17 06:25:50.1 15.81 94.86 SOUTHERN CALIFORNIA\n", + "43 2019-07-17 06:24:40.3 33.08 115.78 CENTRAL CALIFORNIA\n", + "44 2019-07-17 06:10:46.4 17.08 176.92 PUERTO RICO REGION\n", + "45 2019-07-17 06:08:17.5 35.82 117.64 CRETE\n", + "46 2019-07-17 06:04:20.7 35.54 117.44 GREECE\n", + "47 2019-07-17 05:58:08.7 3.76 151.35 DODECANESE ISLANDS\n", + "48 2019-07-17 05:49:52.3 35.86 117.69 GREECE\n", + "49 2019-07-17 05:24:23.7 36.32 89.50 OFFSHORE OAXACA\n", + "50 NaN NaN NaN MEXICO\n", + "51 NaN NaN NaN SOUTHERN CALIFORNIA\n", + "52 NaN NaN NaN FIJI REGION\n", + "53 NaN NaN NaN CENTRAL CALIFORNIA\n", + "54 NaN NaN NaN SOUTHERN CALIFORNIA\n", + "55 NaN NaN NaN NEW IRELAND REGION\n", + "56 NaN NaN NaN P.N.G.\n", + "57 NaN NaN NaN CENTRAL CALIFORNIA\n", + "58 NaN NaN NaN TENNESSEE" ] }, - "execution_count": 271, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "reg1 = str(reg)\n", - "reg2 = re.sub(r'<(.*?)>','', reg1)\n", - "reg3 = reg2.replace('\\xa0','')\n", - "reg4 = reg3.split(',')\n", - "reg4" + "columns = ['Date and time','Latitude','Longitude','Region']\n", + "earthquake_df.columns = columns\n", + "\n", + "earthquake_df" ] }, { @@ -770,7 +1187,7 @@ }, { "cell_type": "code", - "execution_count": 297, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -782,7 +1199,7 @@ }, { "cell_type": "code", - "execution_count": 298, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -843,7 +1260,7 @@ "3 1/31/2019 Prague, Czech Republic" ] }, - "execution_count": 298, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -941,7 +1358,7 @@ }, { "cell_type": "code", - "execution_count": 299, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -953,57 +1370,57 @@ }, { "cell_type": "code", - "execution_count": 308, + "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[\"['Italiano'\",\n", - " ' لۊری شومالی',\n", - " ' كشميري',\n", - " \" 'Polski'\",\n", - " \" 'Русский'\",\n", - " ' 2123000+',\n", - " \" '中文'\",\n", - " ' ދިވެހިބަސް',\n", - " ' اردو',\n", - " ' مصرى',\n", - " ' לאדינו',\n", - " ' 1010000+',\n", - " \" 'Português'\",\n", - " ' العربية',\n", - " ' 5892000+',\n", - " ' 2323000+',\n", - " ' ייִדיש',\n", - " ' گیلکی',\n", - " ' 1346000+',\n", - " \" 'English'\",\n", - " ' 1541000+',\n", - " ' فارسی',\n", - " ' کوردیی ناوەندی',\n", - " ' كوردی',\n", - " ' עברית',\n", - " ' پنجابی (شاہ مکھی)',\n", - " ' ܐܬܘܪܝܐ',\n", - " ' مازِرونی',\n", - " ' 1532000+',\n", - " ' تۆرکجه',\n", - " ' پښتو',\n", - " \" '日本語'\",\n", - " ' 1159000+',\n", - " \" 'Français'\",\n", - " ' قازاقشا',\n", - " \" 'Español'\",\n", - " ' هَوُسَا',\n", - " ' 1556000+',\n", - " ' 1065000+',\n", - " ' ئۇيغۇرچه',\n", - " \" 'Deutsch'\",\n", - " ' سنڌي]']" + "['Português',\n", + " '5892000+',\n", + " '2123000+',\n", + " 'العربية',\n", + " 'ܐܬܘܪܝܐ',\n", + " 'كوردی',\n", + " 'مازِرونی',\n", + " '1010000+',\n", + " 'Русский',\n", + " 'هَوُسَا',\n", + " 'ދިވެހިބަސް',\n", + " 'مصرى',\n", + " 'سنڌي',\n", + " 'ייִדיש',\n", + " 'ئۇيغۇرچه',\n", + " '1159000+',\n", + " 'Deutsch',\n", + " 'پنجابی(شاہمکھی)',\n", + " 'עברית',\n", + " '1556000+',\n", + " '中文',\n", + " '1532000+',\n", + " '2323000+',\n", + " '日本語',\n", + " 'فارسی',\n", + " '1346000+',\n", + " 'کوردییناوەندی',\n", + " 'Français',\n", + " 'لۊریشومالی',\n", + " '1541000+',\n", + " 'تۆرکجه',\n", + " 'Italiano',\n", + " '1065000+',\n", + " 'לאדינו',\n", + " 'Español',\n", + " 'پښتو',\n", + " 'كشميري',\n", + " 'اردو',\n", + " 'گیلکی',\n", + " 'قازاقشا',\n", + " 'English',\n", + " 'Polski']" ] }, - "execution_count": 308, + "execution_count": 105, "metadata": {}, "output_type": "execute_result" } @@ -1026,7 +1443,7 @@ "#Falta quitar los numeros del str\n", "langstr = str(Language)\n", "langstr1 = re.sub(r'<(.*?)>','', langstr)\n", - "langstr1 = langstr1.replace('\\xa0', '')\n", + "langstr1 = langstr1.replace('\\xa0', '').replace('[\\'','').replace('\\']','').replace(' ','').replace('\\'','')\n", "langstr2 = langstr1.split(',')\n", "langstr2" ] @@ -1040,7 +1457,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": {}, "outputs": [], "source": [ @@ -1052,11 +1469,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 103, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Business and economy',\n", + " ' Crime and justice',\n", + " ' Defence',\n", + " ' Education',\n", + " ' Environment',\n", + " ' Government',\n", + " ' Government spending',\n", + " ' Health',\n", + " ' Mapping',\n", + " ' Society',\n", + " ' Towns and cities',\n", + " ' Transport']" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code " + "links12 = link12.find_all('a', {'href':True})\n", + "links12 = list(links12)\n", + "links12 = links12[11:23]\n", + "links_list = str(links12) \n", + "links_list = re.sub(r'<(.*?)>','', links_list)\n", + "links_list = links_list.replace('[','').replace(']','')\n", + "links_list = links_list.split(',')\n", + "links_list" ] }, { @@ -1068,7 +1514,7 @@ }, { "cell_type": "code", - "execution_count": 310, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -1080,7 +1526,7 @@ }, { "cell_type": "code", - "execution_count": 314, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1108,7 +1554,7 @@ " 'India']" ] }, - "execution_count": 314, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" }