From c6ea97141d5a4fc5fa62211e97a285731a3d96cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?lponce=C2=B4?= Date: Tue, 16 Jul 2019 19:31:35 -0500 Subject: [PATCH] hasta donde se pudo --- .../.ipynb_checkpoints/main-checkpoint.ipynb | 771 +++++++++++++++++- your-code/main.ipynb | 771 +++++++++++++++++- 2 files changed, 1468 insertions(+), 74 deletions(-) diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb index 812f7a4..a856e02 100644 --- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -76,11 +76,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Eric Ma',\n", + " 'Federico Brigante',\n", + " 'Kyle Roach',\n", + " 'Olle Jonsson',\n", + " 'Nikita Sobolev',\n", + " 'Frank S. Thomas',\n", + " 'syuilo',\n", + " 'Ives van Hoorne',\n", + " 'Paulus Schoutsen',\n", + " 'Sarah Drasner',\n", + " 'Stefanos Kornilios Mitsis Poiitidis',\n", + " 'Jan Hovancik',\n", + " 'Andreas Mueller',\n", + " 'Guillaume Gomez',\n", + " 'Matt Holt',\n", + " 'Clifford Wolf',\n", + " 'Franck Nijhof',\n", + " 'Joe Block',\n", + " 'Andrei Neagoie',\n", + " 'Jack Lloyd',\n", + " 'Guillermo Rauch',\n", + " 'Tim Griesser',\n", + " 'Jameson Nash',\n", + " 'Anderson Banihirwe',\n", + " 'Danny Ryan']" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "soup_git = BeautifulSoup(content, 'html')\n", + "soup_git\n" ] }, { @@ -134,11 +172,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Eric Ma',\n", + " 'Federico Brigante',\n", + " 'Kyle Roach',\n", + " 'Olle Jonsson',\n", + " 'Nikita Sobolev',\n", + " 'Frank S. Thomas',\n", + " 'syuilo',\n", + " 'Ives van Hoorne',\n", + " 'Paulus Schoutsen',\n", + " 'Sarah Drasner',\n", + " 'Stefanos Kornilios Mitsis Poiitidis',\n", + " 'Jan Hovancik',\n", + " 'Andreas Mueller',\n", + " 'Guillaume Gomez',\n", + " 'Matt Holt',\n", + " 'Clifford Wolf',\n", + " 'Franck Nijhof',\n", + " 'Joe Block',\n", + " 'Andrei Neagoie',\n", + " 'Jack Lloyd',\n", + " 'Guillermo Rauch',\n", + " 'Tim Griesser',\n", + " 'Jameson Nash',\n", + " 'Anderson Banihirwe',\n", + " 'Danny Ryan']" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "dev_names = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n", + "dev_names = [element.text for element in dev_names]\n", + "dev_names" ] }, { @@ -152,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -162,11 +238,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['sherlock-project / sherlock ',\n", + " 'j3ssie / Osmedeus ',\n", + " 'ytdl-org / youtube-dl ',\n", + " 'uber / ludwig ',\n", + " 'tkat0 / PyTorch_BlazeFace ',\n", + " 'Kyubyong / dc_tts ',\n", + " 'gto76 / python-cheatsheet ',\n", + " 'bbfamily / abu ',\n", + " 'DrDonk / unlocker ',\n", + " 'BlackHC / tfpyth ',\n", + " 'sundowndev / PhoneInfoga ',\n", + " 'public-apis / public-apis ',\n", + " 'twintproject / twint ',\n", + " 'liuhuanyong / QASystemOnMedicalKG ',\n", + " 'vaexio / vaex ',\n", + " 'MrS0m30n3 / youtube-dl-gui ',\n", + " 'MozillaSecurity / grizzly ',\n", + " 'scikit-learn / scikit-learn ',\n", + " 'bitcoin / bips ',\n", + " 'xinshuoweng / AB3DMOT ',\n", + " 'smartHomeHub / SmartIR ',\n", + " 'sfyc23 / EverydayWechat ',\n", + " 'home-assistant / home-assistant ',\n", + " 'msgi / nlp-journey ',\n", + " 'gunthercox / ChatterBot ']" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "soup_git = BeautifulSoup(content, 'html')\n", + "\n", + "dev_rep = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n", + "dev_rep = [element.text.replace('\\n', '') for element in dev_rep]\n", + "dev_rep" ] }, { @@ -178,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -188,11 +305,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/30px-Animation_disc.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/29px-P_vip.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/24px-Magic_Kingdom_castle.jpg',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/30px-Video-x-generic.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/30px-Flag_of_Los_Angeles_County%2C_California.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/USA_flag_on_television.svg/30px-USA_flag_on_television.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/22px-Commons-logo.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/25px-Wikiquote-logo.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/30px-Wikidata-logo.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n", + " '//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1',\n", + " '/static/images/wikimedia-button.png',\n", + " '/static/images/poweredby_mediawiki_88x31.png']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "disney = BeautifulSoup(content, 'html')\n", + "#print(disney)\n", + "\n", + "\n", + "disney_img = disney.find_all('img', src = True)\n", + "disney_img = [element['src'] for element in disney_img]\n", + "\n", + "disney_img" ] }, { @@ -204,21 +373,211 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://en.wikipedia.org/wiki/Python' " + "url = 'https://en.wikipedia.org/wiki/Python_(mythology)' " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['/wiki/Pythia',\n", + " '/wiki/File:Apollo_dan_Pithon.jpg',\n", + " '/wiki/Apollo',\n", + " '/wiki/Virgil_Solis',\n", + " '/wiki/Ovid',\n", + " '/wiki/Metamorphoses',\n", + " '/wiki/Greek_mythology',\n", + " '/wiki/Greek_language',\n", + " '/wiki/Genitive_case',\n", + " '/wiki/Serpent_(symbolism)',\n", + " '/wiki/Medieval',\n", + " '/wiki/Dragon',\n", + " '/wiki/Earth',\n", + " '/wiki/Delphi',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=1',\n", + " '/wiki/Oracle',\n", + " '/wiki/Gaia_(mythology)',\n", + " '/wiki/Axis_mundi#Places',\n", + " '/wiki/Omphalos',\n", + " '/wiki/Chthonic',\n", + " '/wiki/Apollo',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=2',\n", + " '/wiki/File:Pietro_Francavilla_-_Apollo_Victorious_over_the_Python_-_Walters_27302.jpg',\n", + " '/wiki/Homeric_Hymn',\n", + " '/wiki/Drakaina_(mythology)',\n", + " '/wiki/Gaius_Julius_Hyginus',\n", + " '/wiki/Zeus',\n", + " '/wiki/Leto',\n", + " '/wiki/Artemis',\n", + " '/wiki/Apollo',\n", + " '/wiki/Hera',\n", + " '/wiki/Mount_Parnassus',\n", + " '/wiki/Gaia_(mythology)',\n", + " '/wiki/Robert_Graves',\n", + " '/wiki/Greeks',\n", + " '/wiki/Sacrilege',\n", + " '/wiki/Pythian_Games',\n", + " '/wiki/Erwin_Rohde',\n", + " '/wiki/Omphalos',\n", + " '/wiki/Oracle',\n", + " '/wiki/Delphi',\n", + " '/wiki/Hyperion_(mythology)',\n", + " '/wiki/Helios',\n", + " '/wiki/Karl_Kerenyi',\n", + " '/wiki/Delphyne',\n", + " '/wiki/Typhon',\n", + " '/wiki/Typhoeus',\n", + " '/wiki/Pythia',\n", + " '/wiki/Omphalos',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=3',\n", + " '/wiki/Python_(genus)',\n", + " '/wiki/Pythonidae',\n", + " '/wiki/File:Barcelona_221.JPG',\n", + " '/wiki/Apollo_Belvedere',\n", + " '/wiki/Delphi',\n", + " '/wiki/Dragons_in_Greek_mythology',\n", + " '/wiki/Pythia',\n", + " '/wiki/Serpent_(symbolism)',\n", + " '/wiki/Saint_George_and_the_Dragon',\n", + " '/wiki/Analogy_of_the_sun',\n", + " '/wiki/Yamata_no_Orochi',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=4',\n", + " '/wiki/Telphousa',\n", + " '/wiki/Parnassus',\n", + " '/wiki/Dodona',\n", + " '/wiki/The_Walters_Art_Museum',\n", + " '/wiki/Walter_Burkert',\n", + " '/wiki/Encyclopedia_Americana',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=5',\n", + " 'https://commons.wikimedia.org/wiki/Category:Python_(mythology)',\n", + " '/wiki/Wikisource',\n", + " '/wiki/Encyclop%C3%A6dia_Britannica_Eleventh_Edition',\n", + " 'https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Python_(mythology)',\n", + " '/wiki/Walter_Burkert',\n", + " '/wiki/Joseph_Fontenrose',\n", + " '/wiki/Manly_Palmer_Hall',\n", + " '/wiki/Jane_Ellen_Harrison',\n", + " '/wiki/Karl_Kerenyi',\n", + " '/wiki/Erwin_Rohde',\n", + " '/wiki/William_Smith_(lexicographer)',\n", + " '/wiki/Dictionary_of_Greek_and_Roman_Biography_and_Mythology',\n", + " '/wiki/Help:Authority_control',\n", + " 'https://www.wikidata.org/wiki/Q15721',\n", + " '/wiki/Integrated_Authority_File',\n", + " '/wiki/Syst%C3%A8me_universitaire_de_documentation',\n", + " '/wiki/Virtual_International_Authority_File',\n", + " '/wiki/WorldCat_Identities',\n", + " '/wiki/Help:Category',\n", + " '/wiki/Category:Ancient_Delphi',\n", + " '/wiki/Category:Offspring_of_Gaia',\n", + " '/wiki/Category:Greek_dragons',\n", + " '/wiki/Category:Wikipedia_articles_incorporating_a_citation_from_the_Encyclopedia_Americana_with_a_Wikisource_reference',\n", + " '/wiki/Category:Articles_containing_Greek-language_text',\n", + " '/wiki/Category:Commons_category_link_is_on_Wikidata',\n", + " '/wiki/Category:Wikipedia_articles_with_GND_identifiers',\n", + " '/wiki/Category:Wikipedia_articles_with_SUDOC_identifiers',\n", + " '/wiki/Category:Wikipedia_articles_with_VIAF_identifiers',\n", + " '/wiki/Category:Wikipedia_articles_with_WorldCat-VIAF_identifiers',\n", + " '/wiki/Special:MyTalk',\n", + " '/wiki/Special:MyContributions',\n", + " '/w/index.php?title=Special:CreateAccount&returnto=Python+%28mythology%29',\n", + " '/w/index.php?title=Special:UserLogin&returnto=Python+%28mythology%29',\n", + " '/wiki/Python_(mythology)',\n", + " '/wiki/Talk:Python_(mythology)',\n", + " '/w/index.php?title=Python_(mythology)&action=edit',\n", + " '/w/index.php?title=Python_(mythology)&action=history',\n", + " '/wiki/Main_Page',\n", + " '/wiki/Main_Page',\n", + " '/wiki/Portal:Contents',\n", + " '/wiki/Portal:Featured_content',\n", + " '/wiki/Portal:Current_events',\n", + " '/wiki/Special:Random',\n", + " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n", + " '//shop.wikimedia.org',\n", + " '/wiki/Help:Contents',\n", + " '/wiki/Wikipedia:About',\n", + " '/wiki/Wikipedia:Community_portal',\n", + " '/wiki/Special:RecentChanges',\n", + " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n", + " '/wiki/Special:WhatLinksHere/Python_(mythology)',\n", + " '/wiki/Special:RecentChangesLinked/Python_(mythology)',\n", + " '/wiki/Wikipedia:File_Upload_Wizard',\n", + " '/wiki/Special:SpecialPages',\n", + " '/w/index.php?title=Python_(mythology)&oldid=900434819',\n", + " '/w/index.php?title=Python_(mythology)&action=info',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721',\n", + " '/w/index.php?title=Special:CiteThisPage&page=Python_%28mythology%29&id=900434819',\n", + " '/w/index.php?title=Python_(mythology)&printable=yes',\n", + " 'https://af.wikipedia.org/wiki/Piton',\n", + " 'https://be.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n", + " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%8F)',\n", + " 'https://br.wikipedia.org/wiki/Python_(aerouant)',\n", + " 'https://ca.wikipedia.org/wiki/Pit%C3%B3_(mitologia)',\n", + " 'https://cs.wikipedia.org/wiki/P%C3%BDth%C3%B3n',\n", + " 'https://da.wikipedia.org/wiki/Python_(mytologi)',\n", + " 'https://de.wikipedia.org/wiki/Python_(Mythologie)',\n", + " 'https://et.wikipedia.org/wiki/Python',\n", + " 'https://el.wikipedia.org/wiki/%CE%A0%CF%8D%CE%B8%CF%89%CE%BD_(%CE%BC%CF%85%CE%B8%CE%BF%CE%BB%CE%BF%CE%B3%CE%AF%CE%B1)',\n", + " 'https://es.wikipedia.org/wiki/Pit%C3%B3n_(mitolog%C3%ADa)',\n", + " 'https://eo.wikipedia.org/wiki/Pitono_(mitologio)',\n", + " 'https://eu.wikipedia.org/wiki/Piton',\n", + " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86_(%D8%A7%D8%B3%D8%B7%D9%88%D8%B1%D9%87)',\n", + " 'https://fr.wikipedia.org/wiki/Python_(mythologie)',\n", + " 'https://gl.wikipedia.org/wiki/Pit%C3%B3n_(mitolox%C3%ADa)',\n", + " 'https://ko.wikipedia.org/wiki/%ED%94%BC%ED%86%A4',\n", + " 'https://hr.wikipedia.org/wiki/Piton_(mitologija)',\n", + " 'https://id.wikipedia.org/wiki/Pithon_(mitologi)',\n", + " 'https://it.wikipedia.org/wiki/Pitone_(mitologia)',\n", + " 'https://la.wikipedia.org/wiki/Python_(mythologia)',\n", + " 'https://lb.wikipedia.org/wiki/Python_(Mythologie)',\n", + " 'https://lt.wikipedia.org/wiki/Pitonas_(mitologija)',\n", + " 'https://hu.wikipedia.org/wiki/P%C3%BCth%C3%B3n',\n", + " 'https://nl.wikipedia.org/wiki/Python_(mythologie)',\n", + " 'https://ja.wikipedia.org/wiki/%E3%83%94%E3%83%A5%E3%83%BC%E3%83%88%E3%83%BC%E3%83%B3',\n", + " 'https://no.wikipedia.org/wiki/Python_(mytologi)',\n", + " 'https://pl.wikipedia.org/wiki/Pyton_(mitologia)',\n", + " 'https://pt.wikipedia.org/wiki/P%C3%ADton_(mitologia)',\n", + " 'https://ro.wikipedia.org/wiki/Python_(mitologie)',\n", + " 'https://ru.wikipedia.org/wiki/%D0%9F%D0%B8%D1%84%D0%BE%D0%BD',\n", + " 'https://sco.wikipedia.org/wiki/Python_(meethology)',\n", + " 'https://sq.wikipedia.org/wiki/Pithoni',\n", + " 'https://sl.wikipedia.org/wiki/Piton_(mitologija)',\n", + " 'https://sr.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%98%D0%B0)',\n", + " 'https://fi.wikipedia.org/wiki/Python_(hirvi%C3%B6)',\n", + " 'https://sv.wikipedia.org/wiki/Python_(mytologi)',\n", + " 'https://tr.wikipedia.org/wiki/Pithon',\n", + " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD_(%D0%BC%D1%96%D1%84%D0%BE%D0%BB%D0%BE%D0%B3%D1%96%D1%8F)',\n", + " 'https://vi.wikipedia.org/wiki/Python_(th%E1%BA%A7n_tho%E1%BA%A1i)',\n", + " 'https://zh.wikipedia.org/wiki/%E7%9A%AE%E5%90%8C',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721#sitelinks-wikipedia',\n", + " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n", + " '/wiki/Wikipedia:About',\n", + " '/wiki/Wikipedia:General_disclaimer']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "python = BeautifulSoup(content, 'html')\n", + "\n", + "python_links = python.find_all('a', href = True, title = True)\n", + "python_links = [link['href'] for link in python_links]\n", + "\n", + "python_links" ] }, { @@ -230,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -240,11 +599,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of titles that have change in the 15\n" + ] + } + ], "source": [ - "#your code" + "#your code\n", + "\n", + "content = requests.get(url).content\n", + "code = BeautifulSoup(content, 'html')\n", + "\n", + "code_change = code.find_all('div',{'class': 'usctitlechanged'})\n", + "code_change = [element.text.replace('\\n', '') for element in code_change]\n", + "\n", + "print(f\"The number of titles that have change in the {len(code_change)}\")" ] }, { @@ -256,21 +631,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.fbi.gov/wanted/topten'" + "url = 'https://www.fbi.gov/wanted/topten'\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 116, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ALEJANDRO ROSALES CASTILLO',\n", + " 'YASER ABDEL SAID',\n", + " 'JASON DEREK BROWN',\n", + " 'RAFAEL CARO-QUINTERO',\n", + " 'ALEXIS FLORES',\n", + " 'EUGENE PALMER',\n", + " 'SANTIAGO VILLALBA MEDEROS',\n", + " 'ROBERT WILLIAM FISHER',\n", + " 'BHADRESHKUMAR CHETANBHAI PATEL',\n", + " 'ARNOLDO JIMENEZ']" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code " + "#your code \n", + "content = requests.get(url).content\n", + "fbi_ten = BeautifulSoup(content, 'html')\n", + "\n", + "top_ten = fbi_ten.find_all('h3',{'class': 'title'})\n", + "top_ten = [element.text.replace('\\n', '') for element in top_ten]\n", + "top_ten" ] }, { @@ -282,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -290,13 +691,231 @@ "url = 'https://www.emsc-csem.org/Earthquake/'" ] }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['2019-07-16', '23:18:22.016 '],\n", + " ['2019-07-16', '23:11:27.223 '],\n", + " ['2019-07-16', '22:50:40.744 '],\n", + " ['2019-07-16', '22:42:26.952 '],\n", + " ['2019-07-16', '22:40:21.754 '],\n", + " ['2019-07-16', '22:38:43.255 '],\n", + " ['2019-07-16', '22:31:33.21 03 '],\n", + " ['2019-07-16', '22:11:10.01 23 '],\n", + " ['2019-07-16', '22:10:21.01 24 '],\n", + " ['2019-07-16', '22:05:16.91 29 '],\n", + " ['2019-07-16', '22:03:53.01 30 '],\n", + " ['2019-07-16', '21:56:18.01 38 '],\n", + " ['2019-07-16', '21:56:14.01 38 '],\n", + " ['2019-07-16', '21:42:41.71 52 '],\n", + " ['2019-07-16', '21:13:33.82 21 '],\n", + " ['2019-07-16', '21:07:37.52 27 '],\n", + " ['2019-07-16', '21:06:21.52 28 '],\n", + " ['2019-07-16', '21:03:36.72 31 '],\n", + " ['2019-07-16', '20:52:18.62 42 '],\n", + " ['2019-07-16', '20:49:09.12 45 '],\n", + " ['2019-07-16', '20:41:11.02 53 '],\n", + " ['2019-07-16', '20:33:52.93 00 '],\n", + " ['2019-07-16', '20:31:33.33 03 '],\n", + " ['2019-07-16', '20:29:07.73 05 '],\n", + " ['2019-07-16', '20:24:16.93 10 '],\n", + " ['2019-07-16', '20:19:00.13 15 '],\n", + " ['2019-07-16', '20:17:51.63 16 '],\n", + " ['2019-07-16', '20:15:59.03 18 '],\n", + " ['2019-07-16', '20:11:01.53 23 '],\n", + " ['2019-07-16', '19:51:06.53 43 '],\n", + " ['2019-07-16', '19:42:25.93 52 '],\n", + " ['2019-07-16', '19:35:57.03 58 '],\n", + " ['2019-07-16', '19:23:50.14 10 '],\n", + " ['2019-07-16', '19:20:21.44 14 '],\n", + " ['2019-07-16', '19:16:53.84 17 '],\n", + " ['2019-07-16', '19:16:15.94 18 '],\n", + " ['2019-07-16', '19:11:48.94 22 '],\n", + " ['2019-07-16', '19:04:00.24 30 '],\n", + " ['2019-07-16', '19:01:48.04 32 '],\n", + " ['2019-07-16', '19:01:00.84 33 '],\n", + " ['2019-07-16', '18:53:32.04 41 '],\n", + " ['2019-07-16', '18:50:16.24 44 '],\n", + " ['2019-07-16', '18:47:48.94 46 '],\n", + " ['2019-07-16', '18:36:26.84 58 '],\n", + " ['2019-07-16', '18:22:31.95 12 '],\n", + " ['2019-07-16', '18:15:28.75 19 '],\n", + " ['2019-07-16', '18:10:01.05 24 '],\n", + " ['2019-07-16', '17:48:24.05 46 '],\n", + " ['2019-07-16', '17:42:29.95 52 '],\n", + " ['2019-07-16', '17:39:43.05 54 ']]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import re\n", + "#your code\n", + "content = requests.get(url).content\n", + "earthquake = BeautifulSoup(content, 'html')\n", + " \n", + "earthquake_20 = earthquake.select('tr > td:nth-child(4)')\n", + "earthquake_date = [re.sub(r'[a-zA-Z]', '', element.text.replace('\\xa0', \" \")).split(' ') for element in earthquake_20]\n", + "earthquake_date\n" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['2.84 S', '35.57 N', '36.06 N', '35.66 N', '35.64 N', '35.78 N', '35.65 N', '17.32 N', '18.43 S', '34.07 N', '12.40 N', '39.27 N', '16.62 N', '69.27 N', '49.45 N', '40.25 N', '35.88 N', '37.81 N', '36.07 N', '36.07 N', '16.85 N', '40.09 N', '23.45 S', '35.86 N', '37.82 N', '33.10 N', '35.55 N', '35.68 N', '37.82 N', '6.26 S', '35.61 N', '35.62 N', '36.19 N', '38.39 N', '38.45 N', '61.27 N', '36.03 N', '35.96 N', '39.56 N', '35.68 N', '0.68 S', '43.62 N', '35.59 N', '35.74 N', '35.65 N', '28.44 N', '34.41 S', '9.93 S', '35.67 N', '0.54 S']\n" + ] + } + ], + "source": [ + "earthquake_latitude = earthquake.select('tr > td:nth-child(5)')\n", + "earthquake_latitude = [element.text.replace('\\xa0', \"\") for element in earthquake_latitude]\n", + "\n", + "earthquake_cardinal = earthquake.select('tr > td:nth-child(6)')\n", + "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n", + "\n", + "latitude_comp = []\n", + "for a, b in zip(earthquake_latitude, earthquake_cardinal):\n", + " latitude_comp.append(f\"{a} {b}\")\n", + "print(latitude_comp)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['77.62 W', '117.39 W', '117.85 W', '117.44 W', '117.48 W', '117.59 W', '117.46 W', '101.46 W', '120.58 E', '11.30 W', '144.54 E', '29.92 W', '94.75 W', '144.73 W', '120.50 W', '19.76 E', '117.69 W', '121.76 W', '117.84 W', '117.65 W', '100.25 W', '19.91 E', '66.86 W', '117.69 W', '121.77 W', '12.42 W', '117.43 W', '117.52 W', '121.77 W', '148.65 E', '117.47 W', '117.45 W', '117.89 W', '16.94 E', '16.91 E', '152.44 W', '117.87 W', '117.71 W', '67.17 E', '117.54 W', '126.36 E', '75.40 E', '117.42 W', '117.56 W', '117.52 W', '56.67 E', '150.73 E', '118.23 E', '117.54 W', '127.86 E']\n" + ] + } + ], + "source": [ + "earthquake_longitude = earthquake.select('tr > td:nth-child(7)')\n", + "earthquake_longitude = [element.text.replace('\\xa0', \"\") for element in earthquake_longitude]\n", + "\n", + "earthquake_cardinal = earthquake.select('tr > td:nth-child(8)')\n", + "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n", + "\n", + "longitude_comp = []\n", + "for a, b in zip(earthquake_longitude, earthquake_cardinal):\n", + " longitude_comp.append(f\"{a} {b}\")\n", + "print(longitude_comp)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'OFFSHORE GUERRERO, MEXICO',\n", + " 'WESTERN AUSTRALIA',\n", + " 'MADEIRA ISLANDS, PORTUGAL REGION',\n", + " 'GUAM REGION',\n", + " 'AZORES ISLANDS, PORTUGAL',\n", + " 'OAXACA, MEXICO',\n", + " 'NORTHERN ALASKA',\n", + " 'ALBANIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'SAN FRANCISCO BAY AREA, CALIF.',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'OFFSHORE GUERRERO, MEXICO',\n", + " 'ALBANIA',\n", + " 'JUJUY, ARGENTINA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'SAN FRANCISCO BAY AREA, CALIF.',\n", + " 'MADEIRA ISLANDS, PORTUGAL REGION',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SAN FRANCISCO BAY AREA, CALIF.',\n", + " 'NEW BRITAIN REGION, P.N.G.',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'SOUTHERN ITALY',\n", + " 'SOUTHERN ITALY',\n", + " 'SOUTHERN ALASKA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'EASTERN UZBEKISTAN',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'MOLUCCA SEA',\n", + " 'EASTERN KAZAKHSTAN',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN IRAN',\n", + " 'NEAR S.E. COAST OF AUSTRALIA',\n", + " 'SUMBAWA REGION, INDONESIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'HALMAHERA, INDONESIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'OFFSHORE OAXACA, MEXICO',\n", + " 'CENTRAL CALIFORNIA']" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "region = earthquake.find_all('td', {'class': 'tb_region'})\n", + "region_name = [element.text.replace('\\xa0', \"\") for element in region]\n", + "region_name\n", + "\n" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#your code" + "df_earthquakes = pd.DataFrame(earthquake_date, columns = ['year', 'time'])\n", + "df_earthquakes\n", + "\n", + "df_latitude = pd.DataFrame({'latitude':latitude_comp})\n", + "df_latitude\n", + "\n", + "df_longitude = pd.DataFrame({'longitude':longitude_comp})\n", + "df_longitude\n", + "\n", + "df_region = pd.DataFrame({'region':region_name})\n", + "df_region\n", + "\n", + "to_concat = [df_latitude, df_longitude, df_region]\n", + "\n", + "table_earthquakes = df_earthquakes.join(to_concat)\n", + "table_earthquakes" ] }, { @@ -308,21 +927,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://hackevents.co/hackathons'" + "url ='https://hackevents.co/'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 150, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EventDate
0Code Geist Hackathon by SefrWahed7/29/2019
1The Code Factor5/21/2019
2TECHFEST MUNICH9/6/2019
3Galileo App Competition1/31/2019
\n", + "
" + ], + "text/plain": [ + " Event Date\n", + "0 Code Geist Hackathon by SefrWahed 7/29/2019\n", + "1 The Code Factor 5/21/2019\n", + "2 TECHFEST MUNICH 9/6/2019\n", + "3 Galileo App Competition 1/31/2019" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "hack = BeautifulSoup(content, 'html5')\n", + "#print(hack)\n", + "\n", + "hack_event = hack.find_all('h5', {'class': 'card-title'})\n", + "hack_event = [element.text for element in hack_event]\n", + "hack_event\n", + "\n", + "hack_event_date = hack.find_all('p', {'class': 'card-text'})\n", + "hack_event_date = [\"\".join(re.findall(r'[\\d\\/\\d{2}\\d{4}]', element.text)) for element in hack_event_date]\n", + "hack_event_date\n", + "\n", + "\n", + "result = pd.DataFrame({'Event': hack_event, 'Date': hack_event_date})\n", + "result" ] }, { diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 812f7a4..a856e02 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -76,11 +76,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Eric Ma',\n", + " 'Federico Brigante',\n", + " 'Kyle Roach',\n", + " 'Olle Jonsson',\n", + " 'Nikita Sobolev',\n", + " 'Frank S. Thomas',\n", + " 'syuilo',\n", + " 'Ives van Hoorne',\n", + " 'Paulus Schoutsen',\n", + " 'Sarah Drasner',\n", + " 'Stefanos Kornilios Mitsis Poiitidis',\n", + " 'Jan Hovancik',\n", + " 'Andreas Mueller',\n", + " 'Guillaume Gomez',\n", + " 'Matt Holt',\n", + " 'Clifford Wolf',\n", + " 'Franck Nijhof',\n", + " 'Joe Block',\n", + " 'Andrei Neagoie',\n", + " 'Jack Lloyd',\n", + " 'Guillermo Rauch',\n", + " 'Tim Griesser',\n", + " 'Jameson Nash',\n", + " 'Anderson Banihirwe',\n", + " 'Danny Ryan']" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "soup_git = BeautifulSoup(content, 'html')\n", + "soup_git\n" ] }, { @@ -134,11 +172,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Eric Ma',\n", + " 'Federico Brigante',\n", + " 'Kyle Roach',\n", + " 'Olle Jonsson',\n", + " 'Nikita Sobolev',\n", + " 'Frank S. Thomas',\n", + " 'syuilo',\n", + " 'Ives van Hoorne',\n", + " 'Paulus Schoutsen',\n", + " 'Sarah Drasner',\n", + " 'Stefanos Kornilios Mitsis Poiitidis',\n", + " 'Jan Hovancik',\n", + " 'Andreas Mueller',\n", + " 'Guillaume Gomez',\n", + " 'Matt Holt',\n", + " 'Clifford Wolf',\n", + " 'Franck Nijhof',\n", + " 'Joe Block',\n", + " 'Andrei Neagoie',\n", + " 'Jack Lloyd',\n", + " 'Guillermo Rauch',\n", + " 'Tim Griesser',\n", + " 'Jameson Nash',\n", + " 'Anderson Banihirwe',\n", + " 'Danny Ryan']" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "dev_names = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n", + "dev_names = [element.text for element in dev_names]\n", + "dev_names" ] }, { @@ -152,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -162,11 +238,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['sherlock-project / sherlock ',\n", + " 'j3ssie / Osmedeus ',\n", + " 'ytdl-org / youtube-dl ',\n", + " 'uber / ludwig ',\n", + " 'tkat0 / PyTorch_BlazeFace ',\n", + " 'Kyubyong / dc_tts ',\n", + " 'gto76 / python-cheatsheet ',\n", + " 'bbfamily / abu ',\n", + " 'DrDonk / unlocker ',\n", + " 'BlackHC / tfpyth ',\n", + " 'sundowndev / PhoneInfoga ',\n", + " 'public-apis / public-apis ',\n", + " 'twintproject / twint ',\n", + " 'liuhuanyong / QASystemOnMedicalKG ',\n", + " 'vaexio / vaex ',\n", + " 'MrS0m30n3 / youtube-dl-gui ',\n", + " 'MozillaSecurity / grizzly ',\n", + " 'scikit-learn / scikit-learn ',\n", + " 'bitcoin / bips ',\n", + " 'xinshuoweng / AB3DMOT ',\n", + " 'smartHomeHub / SmartIR ',\n", + " 'sfyc23 / EverydayWechat ',\n", + " 'home-assistant / home-assistant ',\n", + " 'msgi / nlp-journey ',\n", + " 'gunthercox / ChatterBot ']" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "soup_git = BeautifulSoup(content, 'html')\n", + "\n", + "dev_rep = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n", + "dev_rep = [element.text.replace('\\n', '') for element in dev_rep]\n", + "dev_rep" ] }, { @@ -178,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -188,11 +305,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/30px-Animation_disc.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/29px-P_vip.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/24px-Magic_Kingdom_castle.jpg',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/30px-Video-x-generic.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/30px-Flag_of_Los_Angeles_County%2C_California.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/USA_flag_on_television.svg/30px-USA_flag_on_television.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/22px-Commons-logo.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/25px-Wikiquote-logo.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/30px-Wikidata-logo.svg.png',\n", + " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n", + " '//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1',\n", + " '/static/images/wikimedia-button.png',\n", + " '/static/images/poweredby_mediawiki_88x31.png']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "disney = BeautifulSoup(content, 'html')\n", + "#print(disney)\n", + "\n", + "\n", + "disney_img = disney.find_all('img', src = True)\n", + "disney_img = [element['src'] for element in disney_img]\n", + "\n", + "disney_img" ] }, { @@ -204,21 +373,211 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://en.wikipedia.org/wiki/Python' " + "url = 'https://en.wikipedia.org/wiki/Python_(mythology)' " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['/wiki/Pythia',\n", + " '/wiki/File:Apollo_dan_Pithon.jpg',\n", + " '/wiki/Apollo',\n", + " '/wiki/Virgil_Solis',\n", + " '/wiki/Ovid',\n", + " '/wiki/Metamorphoses',\n", + " '/wiki/Greek_mythology',\n", + " '/wiki/Greek_language',\n", + " '/wiki/Genitive_case',\n", + " '/wiki/Serpent_(symbolism)',\n", + " '/wiki/Medieval',\n", + " '/wiki/Dragon',\n", + " '/wiki/Earth',\n", + " '/wiki/Delphi',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=1',\n", + " '/wiki/Oracle',\n", + " '/wiki/Gaia_(mythology)',\n", + " '/wiki/Axis_mundi#Places',\n", + " '/wiki/Omphalos',\n", + " '/wiki/Chthonic',\n", + " '/wiki/Apollo',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=2',\n", + " '/wiki/File:Pietro_Francavilla_-_Apollo_Victorious_over_the_Python_-_Walters_27302.jpg',\n", + " '/wiki/Homeric_Hymn',\n", + " '/wiki/Drakaina_(mythology)',\n", + " '/wiki/Gaius_Julius_Hyginus',\n", + " '/wiki/Zeus',\n", + " '/wiki/Leto',\n", + " '/wiki/Artemis',\n", + " '/wiki/Apollo',\n", + " '/wiki/Hera',\n", + " '/wiki/Mount_Parnassus',\n", + " '/wiki/Gaia_(mythology)',\n", + " '/wiki/Robert_Graves',\n", + " '/wiki/Greeks',\n", + " '/wiki/Sacrilege',\n", + " '/wiki/Pythian_Games',\n", + " '/wiki/Erwin_Rohde',\n", + " '/wiki/Omphalos',\n", + " '/wiki/Oracle',\n", + " '/wiki/Delphi',\n", + " '/wiki/Hyperion_(mythology)',\n", + " '/wiki/Helios',\n", + " '/wiki/Karl_Kerenyi',\n", + " '/wiki/Delphyne',\n", + " '/wiki/Typhon',\n", + " '/wiki/Typhoeus',\n", + " '/wiki/Pythia',\n", + " '/wiki/Omphalos',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=3',\n", + " '/wiki/Python_(genus)',\n", + " '/wiki/Pythonidae',\n", + " '/wiki/File:Barcelona_221.JPG',\n", + " '/wiki/Apollo_Belvedere',\n", + " '/wiki/Delphi',\n", + " '/wiki/Dragons_in_Greek_mythology',\n", + " '/wiki/Pythia',\n", + " '/wiki/Serpent_(symbolism)',\n", + " '/wiki/Saint_George_and_the_Dragon',\n", + " '/wiki/Analogy_of_the_sun',\n", + " '/wiki/Yamata_no_Orochi',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=4',\n", + " '/wiki/Telphousa',\n", + " '/wiki/Parnassus',\n", + " '/wiki/Dodona',\n", + " '/wiki/The_Walters_Art_Museum',\n", + " '/wiki/Walter_Burkert',\n", + " '/wiki/Encyclopedia_Americana',\n", + " '/w/index.php?title=Python_(mythology)&action=edit§ion=5',\n", + " 'https://commons.wikimedia.org/wiki/Category:Python_(mythology)',\n", + " '/wiki/Wikisource',\n", + " '/wiki/Encyclop%C3%A6dia_Britannica_Eleventh_Edition',\n", + " 'https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Python_(mythology)',\n", + " '/wiki/Walter_Burkert',\n", + " '/wiki/Joseph_Fontenrose',\n", + " '/wiki/Manly_Palmer_Hall',\n", + " '/wiki/Jane_Ellen_Harrison',\n", + " '/wiki/Karl_Kerenyi',\n", + " '/wiki/Erwin_Rohde',\n", + " '/wiki/William_Smith_(lexicographer)',\n", + " '/wiki/Dictionary_of_Greek_and_Roman_Biography_and_Mythology',\n", + " '/wiki/Help:Authority_control',\n", + " 'https://www.wikidata.org/wiki/Q15721',\n", + " '/wiki/Integrated_Authority_File',\n", + " '/wiki/Syst%C3%A8me_universitaire_de_documentation',\n", + " '/wiki/Virtual_International_Authority_File',\n", + " '/wiki/WorldCat_Identities',\n", + " '/wiki/Help:Category',\n", + " '/wiki/Category:Ancient_Delphi',\n", + " '/wiki/Category:Offspring_of_Gaia',\n", + " '/wiki/Category:Greek_dragons',\n", + " '/wiki/Category:Wikipedia_articles_incorporating_a_citation_from_the_Encyclopedia_Americana_with_a_Wikisource_reference',\n", + " '/wiki/Category:Articles_containing_Greek-language_text',\n", + " '/wiki/Category:Commons_category_link_is_on_Wikidata',\n", + " '/wiki/Category:Wikipedia_articles_with_GND_identifiers',\n", + " '/wiki/Category:Wikipedia_articles_with_SUDOC_identifiers',\n", + " '/wiki/Category:Wikipedia_articles_with_VIAF_identifiers',\n", + " '/wiki/Category:Wikipedia_articles_with_WorldCat-VIAF_identifiers',\n", + " '/wiki/Special:MyTalk',\n", + " '/wiki/Special:MyContributions',\n", + " '/w/index.php?title=Special:CreateAccount&returnto=Python+%28mythology%29',\n", + " '/w/index.php?title=Special:UserLogin&returnto=Python+%28mythology%29',\n", + " '/wiki/Python_(mythology)',\n", + " '/wiki/Talk:Python_(mythology)',\n", + " '/w/index.php?title=Python_(mythology)&action=edit',\n", + " '/w/index.php?title=Python_(mythology)&action=history',\n", + " '/wiki/Main_Page',\n", + " '/wiki/Main_Page',\n", + " '/wiki/Portal:Contents',\n", + " '/wiki/Portal:Featured_content',\n", + " '/wiki/Portal:Current_events',\n", + " '/wiki/Special:Random',\n", + " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n", + " '//shop.wikimedia.org',\n", + " '/wiki/Help:Contents',\n", + " '/wiki/Wikipedia:About',\n", + " '/wiki/Wikipedia:Community_portal',\n", + " '/wiki/Special:RecentChanges',\n", + " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n", + " '/wiki/Special:WhatLinksHere/Python_(mythology)',\n", + " '/wiki/Special:RecentChangesLinked/Python_(mythology)',\n", + " '/wiki/Wikipedia:File_Upload_Wizard',\n", + " '/wiki/Special:SpecialPages',\n", + " '/w/index.php?title=Python_(mythology)&oldid=900434819',\n", + " '/w/index.php?title=Python_(mythology)&action=info',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721',\n", + " '/w/index.php?title=Special:CiteThisPage&page=Python_%28mythology%29&id=900434819',\n", + " '/w/index.php?title=Python_(mythology)&printable=yes',\n", + " 'https://af.wikipedia.org/wiki/Piton',\n", + " 'https://be.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n", + " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%8F)',\n", + " 'https://br.wikipedia.org/wiki/Python_(aerouant)',\n", + " 'https://ca.wikipedia.org/wiki/Pit%C3%B3_(mitologia)',\n", + " 'https://cs.wikipedia.org/wiki/P%C3%BDth%C3%B3n',\n", + " 'https://da.wikipedia.org/wiki/Python_(mytologi)',\n", + " 'https://de.wikipedia.org/wiki/Python_(Mythologie)',\n", + " 'https://et.wikipedia.org/wiki/Python',\n", + " 'https://el.wikipedia.org/wiki/%CE%A0%CF%8D%CE%B8%CF%89%CE%BD_(%CE%BC%CF%85%CE%B8%CE%BF%CE%BB%CE%BF%CE%B3%CE%AF%CE%B1)',\n", + " 'https://es.wikipedia.org/wiki/Pit%C3%B3n_(mitolog%C3%ADa)',\n", + " 'https://eo.wikipedia.org/wiki/Pitono_(mitologio)',\n", + " 'https://eu.wikipedia.org/wiki/Piton',\n", + " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86_(%D8%A7%D8%B3%D8%B7%D9%88%D8%B1%D9%87)',\n", + " 'https://fr.wikipedia.org/wiki/Python_(mythologie)',\n", + " 'https://gl.wikipedia.org/wiki/Pit%C3%B3n_(mitolox%C3%ADa)',\n", + " 'https://ko.wikipedia.org/wiki/%ED%94%BC%ED%86%A4',\n", + " 'https://hr.wikipedia.org/wiki/Piton_(mitologija)',\n", + " 'https://id.wikipedia.org/wiki/Pithon_(mitologi)',\n", + " 'https://it.wikipedia.org/wiki/Pitone_(mitologia)',\n", + " 'https://la.wikipedia.org/wiki/Python_(mythologia)',\n", + " 'https://lb.wikipedia.org/wiki/Python_(Mythologie)',\n", + " 'https://lt.wikipedia.org/wiki/Pitonas_(mitologija)',\n", + " 'https://hu.wikipedia.org/wiki/P%C3%BCth%C3%B3n',\n", + " 'https://nl.wikipedia.org/wiki/Python_(mythologie)',\n", + " 'https://ja.wikipedia.org/wiki/%E3%83%94%E3%83%A5%E3%83%BC%E3%83%88%E3%83%BC%E3%83%B3',\n", + " 'https://no.wikipedia.org/wiki/Python_(mytologi)',\n", + " 'https://pl.wikipedia.org/wiki/Pyton_(mitologia)',\n", + " 'https://pt.wikipedia.org/wiki/P%C3%ADton_(mitologia)',\n", + " 'https://ro.wikipedia.org/wiki/Python_(mitologie)',\n", + " 'https://ru.wikipedia.org/wiki/%D0%9F%D0%B8%D1%84%D0%BE%D0%BD',\n", + " 'https://sco.wikipedia.org/wiki/Python_(meethology)',\n", + " 'https://sq.wikipedia.org/wiki/Pithoni',\n", + " 'https://sl.wikipedia.org/wiki/Piton_(mitologija)',\n", + " 'https://sr.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%98%D0%B0)',\n", + " 'https://fi.wikipedia.org/wiki/Python_(hirvi%C3%B6)',\n", + " 'https://sv.wikipedia.org/wiki/Python_(mytologi)',\n", + " 'https://tr.wikipedia.org/wiki/Pithon',\n", + " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD_(%D0%BC%D1%96%D1%84%D0%BE%D0%BB%D0%BE%D0%B3%D1%96%D1%8F)',\n", + " 'https://vi.wikipedia.org/wiki/Python_(th%E1%BA%A7n_tho%E1%BA%A1i)',\n", + " 'https://zh.wikipedia.org/wiki/%E7%9A%AE%E5%90%8C',\n", + " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721#sitelinks-wikipedia',\n", + " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n", + " '/wiki/Wikipedia:About',\n", + " '/wiki/Wikipedia:General_disclaimer']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "python = BeautifulSoup(content, 'html')\n", + "\n", + "python_links = python.find_all('a', href = True, title = True)\n", + "python_links = [link['href'] for link in python_links]\n", + "\n", + "python_links" ] }, { @@ -230,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -240,11 +599,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of titles that have change in the 15\n" + ] + } + ], "source": [ - "#your code" + "#your code\n", + "\n", + "content = requests.get(url).content\n", + "code = BeautifulSoup(content, 'html')\n", + "\n", + "code_change = code.find_all('div',{'class': 'usctitlechanged'})\n", + "code_change = [element.text.replace('\\n', '') for element in code_change]\n", + "\n", + "print(f\"The number of titles that have change in the {len(code_change)}\")" ] }, { @@ -256,21 +631,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.fbi.gov/wanted/topten'" + "url = 'https://www.fbi.gov/wanted/topten'\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 116, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['ALEJANDRO ROSALES CASTILLO',\n", + " 'YASER ABDEL SAID',\n", + " 'JASON DEREK BROWN',\n", + " 'RAFAEL CARO-QUINTERO',\n", + " 'ALEXIS FLORES',\n", + " 'EUGENE PALMER',\n", + " 'SANTIAGO VILLALBA MEDEROS',\n", + " 'ROBERT WILLIAM FISHER',\n", + " 'BHADRESHKUMAR CHETANBHAI PATEL',\n", + " 'ARNOLDO JIMENEZ']" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code " + "#your code \n", + "content = requests.get(url).content\n", + "fbi_ten = BeautifulSoup(content, 'html')\n", + "\n", + "top_ten = fbi_ten.find_all('h3',{'class': 'title'})\n", + "top_ten = [element.text.replace('\\n', '') for element in top_ten]\n", + "top_ten" ] }, { @@ -282,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -290,13 +691,231 @@ "url = 'https://www.emsc-csem.org/Earthquake/'" ] }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['2019-07-16', '23:18:22.016 '],\n", + " ['2019-07-16', '23:11:27.223 '],\n", + " ['2019-07-16', '22:50:40.744 '],\n", + " ['2019-07-16', '22:42:26.952 '],\n", + " ['2019-07-16', '22:40:21.754 '],\n", + " ['2019-07-16', '22:38:43.255 '],\n", + " ['2019-07-16', '22:31:33.21 03 '],\n", + " ['2019-07-16', '22:11:10.01 23 '],\n", + " ['2019-07-16', '22:10:21.01 24 '],\n", + " ['2019-07-16', '22:05:16.91 29 '],\n", + " ['2019-07-16', '22:03:53.01 30 '],\n", + " ['2019-07-16', '21:56:18.01 38 '],\n", + " ['2019-07-16', '21:56:14.01 38 '],\n", + " ['2019-07-16', '21:42:41.71 52 '],\n", + " ['2019-07-16', '21:13:33.82 21 '],\n", + " ['2019-07-16', '21:07:37.52 27 '],\n", + " ['2019-07-16', '21:06:21.52 28 '],\n", + " ['2019-07-16', '21:03:36.72 31 '],\n", + " ['2019-07-16', '20:52:18.62 42 '],\n", + " ['2019-07-16', '20:49:09.12 45 '],\n", + " ['2019-07-16', '20:41:11.02 53 '],\n", + " ['2019-07-16', '20:33:52.93 00 '],\n", + " ['2019-07-16', '20:31:33.33 03 '],\n", + " ['2019-07-16', '20:29:07.73 05 '],\n", + " ['2019-07-16', '20:24:16.93 10 '],\n", + " ['2019-07-16', '20:19:00.13 15 '],\n", + " ['2019-07-16', '20:17:51.63 16 '],\n", + " ['2019-07-16', '20:15:59.03 18 '],\n", + " ['2019-07-16', '20:11:01.53 23 '],\n", + " ['2019-07-16', '19:51:06.53 43 '],\n", + " ['2019-07-16', '19:42:25.93 52 '],\n", + " ['2019-07-16', '19:35:57.03 58 '],\n", + " ['2019-07-16', '19:23:50.14 10 '],\n", + " ['2019-07-16', '19:20:21.44 14 '],\n", + " ['2019-07-16', '19:16:53.84 17 '],\n", + " ['2019-07-16', '19:16:15.94 18 '],\n", + " ['2019-07-16', '19:11:48.94 22 '],\n", + " ['2019-07-16', '19:04:00.24 30 '],\n", + " ['2019-07-16', '19:01:48.04 32 '],\n", + " ['2019-07-16', '19:01:00.84 33 '],\n", + " ['2019-07-16', '18:53:32.04 41 '],\n", + " ['2019-07-16', '18:50:16.24 44 '],\n", + " ['2019-07-16', '18:47:48.94 46 '],\n", + " ['2019-07-16', '18:36:26.84 58 '],\n", + " ['2019-07-16', '18:22:31.95 12 '],\n", + " ['2019-07-16', '18:15:28.75 19 '],\n", + " ['2019-07-16', '18:10:01.05 24 '],\n", + " ['2019-07-16', '17:48:24.05 46 '],\n", + " ['2019-07-16', '17:42:29.95 52 '],\n", + " ['2019-07-16', '17:39:43.05 54 ']]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import re\n", + "#your code\n", + "content = requests.get(url).content\n", + "earthquake = BeautifulSoup(content, 'html')\n", + " \n", + "earthquake_20 = earthquake.select('tr > td:nth-child(4)')\n", + "earthquake_date = [re.sub(r'[a-zA-Z]', '', element.text.replace('\\xa0', \" \")).split(' ') for element in earthquake_20]\n", + "earthquake_date\n" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['2.84 S', '35.57 N', '36.06 N', '35.66 N', '35.64 N', '35.78 N', '35.65 N', '17.32 N', '18.43 S', '34.07 N', '12.40 N', '39.27 N', '16.62 N', '69.27 N', '49.45 N', '40.25 N', '35.88 N', '37.81 N', '36.07 N', '36.07 N', '16.85 N', '40.09 N', '23.45 S', '35.86 N', '37.82 N', '33.10 N', '35.55 N', '35.68 N', '37.82 N', '6.26 S', '35.61 N', '35.62 N', '36.19 N', '38.39 N', '38.45 N', '61.27 N', '36.03 N', '35.96 N', '39.56 N', '35.68 N', '0.68 S', '43.62 N', '35.59 N', '35.74 N', '35.65 N', '28.44 N', '34.41 S', '9.93 S', '35.67 N', '0.54 S']\n" + ] + } + ], + "source": [ + "earthquake_latitude = earthquake.select('tr > td:nth-child(5)')\n", + "earthquake_latitude = [element.text.replace('\\xa0', \"\") for element in earthquake_latitude]\n", + "\n", + "earthquake_cardinal = earthquake.select('tr > td:nth-child(6)')\n", + "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n", + "\n", + "latitude_comp = []\n", + "for a, b in zip(earthquake_latitude, earthquake_cardinal):\n", + " latitude_comp.append(f\"{a} {b}\")\n", + "print(latitude_comp)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['77.62 W', '117.39 W', '117.85 W', '117.44 W', '117.48 W', '117.59 W', '117.46 W', '101.46 W', '120.58 E', '11.30 W', '144.54 E', '29.92 W', '94.75 W', '144.73 W', '120.50 W', '19.76 E', '117.69 W', '121.76 W', '117.84 W', '117.65 W', '100.25 W', '19.91 E', '66.86 W', '117.69 W', '121.77 W', '12.42 W', '117.43 W', '117.52 W', '121.77 W', '148.65 E', '117.47 W', '117.45 W', '117.89 W', '16.94 E', '16.91 E', '152.44 W', '117.87 W', '117.71 W', '67.17 E', '117.54 W', '126.36 E', '75.40 E', '117.42 W', '117.56 W', '117.52 W', '56.67 E', '150.73 E', '118.23 E', '117.54 W', '127.86 E']\n" + ] + } + ], + "source": [ + "earthquake_longitude = earthquake.select('tr > td:nth-child(7)')\n", + "earthquake_longitude = [element.text.replace('\\xa0', \"\") for element in earthquake_longitude]\n", + "\n", + "earthquake_cardinal = earthquake.select('tr > td:nth-child(8)')\n", + "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n", + "\n", + "longitude_comp = []\n", + "for a, b in zip(earthquake_longitude, earthquake_cardinal):\n", + " longitude_comp.append(f\"{a} {b}\")\n", + "print(longitude_comp)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'OFFSHORE GUERRERO, MEXICO',\n", + " 'WESTERN AUSTRALIA',\n", + " 'MADEIRA ISLANDS, PORTUGAL REGION',\n", + " 'GUAM REGION',\n", + " 'AZORES ISLANDS, PORTUGAL',\n", + " 'OAXACA, MEXICO',\n", + " 'NORTHERN ALASKA',\n", + " 'ALBANIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'SAN FRANCISCO BAY AREA, CALIF.',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'OFFSHORE GUERRERO, MEXICO',\n", + " 'ALBANIA',\n", + " 'JUJUY, ARGENTINA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'SAN FRANCISCO BAY AREA, CALIF.',\n", + " 'MADEIRA ISLANDS, PORTUGAL REGION',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SAN FRANCISCO BAY AREA, CALIF.',\n", + " 'NEW BRITAIN REGION, P.N.G.',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'SOUTHERN ITALY',\n", + " 'SOUTHERN ITALY',\n", + " 'SOUTHERN ALASKA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'EASTERN UZBEKISTAN',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'MOLUCCA SEA',\n", + " 'EASTERN KAZAKHSTAN',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'SOUTHERN IRAN',\n", + " 'NEAR S.E. COAST OF AUSTRALIA',\n", + " 'SUMBAWA REGION, INDONESIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'HALMAHERA, INDONESIA',\n", + " 'SOUTHERN CALIFORNIA',\n", + " 'CENTRAL CALIFORNIA',\n", + " 'OFFSHORE OAXACA, MEXICO',\n", + " 'CENTRAL CALIFORNIA']" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "region = earthquake.find_all('td', {'class': 'tb_region'})\n", + "region_name = [element.text.replace('\\xa0', \"\") for element in region]\n", + "region_name\n", + "\n" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#your code" + "df_earthquakes = pd.DataFrame(earthquake_date, columns = ['year', 'time'])\n", + "df_earthquakes\n", + "\n", + "df_latitude = pd.DataFrame({'latitude':latitude_comp})\n", + "df_latitude\n", + "\n", + "df_longitude = pd.DataFrame({'longitude':longitude_comp})\n", + "df_longitude\n", + "\n", + "df_region = pd.DataFrame({'region':region_name})\n", + "df_region\n", + "\n", + "to_concat = [df_latitude, df_longitude, df_region]\n", + "\n", + "table_earthquakes = df_earthquakes.join(to_concat)\n", + "table_earthquakes" ] }, { @@ -308,21 +927,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url ='https://hackevents.co/hackathons'" + "url ='https://hackevents.co/'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 150, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EventDate
0Code Geist Hackathon by SefrWahed7/29/2019
1The Code Factor5/21/2019
2TECHFEST MUNICH9/6/2019
3Galileo App Competition1/31/2019
\n", + "
" + ], + "text/plain": [ + " Event Date\n", + "0 Code Geist Hackathon by SefrWahed 7/29/2019\n", + "1 The Code Factor 5/21/2019\n", + "2 TECHFEST MUNICH 9/6/2019\n", + "3 Galileo App Competition 1/31/2019" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code" + "#your code\n", + "content = requests.get(url).content\n", + "hack = BeautifulSoup(content, 'html5')\n", + "#print(hack)\n", + "\n", + "hack_event = hack.find_all('h5', {'class': 'card-title'})\n", + "hack_event = [element.text for element in hack_event]\n", + "hack_event\n", + "\n", + "hack_event_date = hack.find_all('p', {'class': 'card-text'})\n", + "hack_event_date = [\"\".join(re.findall(r'[\\d\\/\\d{2}\\d{4}]', element.text)) for element in hack_event_date]\n", + "hack_event_date\n", + "\n", + "\n", + "result = pd.DataFrame({'Event': hack_event, 'Date': hack_event_date})\n", + "result" ] }, {