diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
index 812f7a4..a856e02 100644
--- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -76,11 +76,49 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Eric Ma',\n",
+ " 'Federico Brigante',\n",
+ " 'Kyle Roach',\n",
+ " 'Olle Jonsson',\n",
+ " 'Nikita Sobolev',\n",
+ " 'Frank S. Thomas',\n",
+ " 'syuilo',\n",
+ " 'Ives van Hoorne',\n",
+ " 'Paulus Schoutsen',\n",
+ " 'Sarah Drasner',\n",
+ " 'Stefanos Kornilios Mitsis Poiitidis',\n",
+ " 'Jan Hovancik',\n",
+ " 'Andreas Mueller',\n",
+ " 'Guillaume Gomez',\n",
+ " 'Matt Holt',\n",
+ " 'Clifford Wolf',\n",
+ " 'Franck Nijhof',\n",
+ " 'Joe Block',\n",
+ " 'Andrei Neagoie',\n",
+ " 'Jack Lloyd',\n",
+ " 'Guillermo Rauch',\n",
+ " 'Tim Griesser',\n",
+ " 'Jameson Nash',\n",
+ " 'Anderson Banihirwe',\n",
+ " 'Danny Ryan']"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "soup_git = BeautifulSoup(content, 'html')\n",
+ "soup_git\n"
]
},
{
@@ -134,11 +172,49 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Eric Ma',\n",
+ " 'Federico Brigante',\n",
+ " 'Kyle Roach',\n",
+ " 'Olle Jonsson',\n",
+ " 'Nikita Sobolev',\n",
+ " 'Frank S. Thomas',\n",
+ " 'syuilo',\n",
+ " 'Ives van Hoorne',\n",
+ " 'Paulus Schoutsen',\n",
+ " 'Sarah Drasner',\n",
+ " 'Stefanos Kornilios Mitsis Poiitidis',\n",
+ " 'Jan Hovancik',\n",
+ " 'Andreas Mueller',\n",
+ " 'Guillaume Gomez',\n",
+ " 'Matt Holt',\n",
+ " 'Clifford Wolf',\n",
+ " 'Franck Nijhof',\n",
+ " 'Joe Block',\n",
+ " 'Andrei Neagoie',\n",
+ " 'Jack Lloyd',\n",
+ " 'Guillermo Rauch',\n",
+ " 'Tim Griesser',\n",
+ " 'Jameson Nash',\n",
+ " 'Anderson Banihirwe',\n",
+ " 'Danny Ryan']"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "dev_names = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n",
+ "dev_names = [element.text for element in dev_names]\n",
+ "dev_names"
]
},
{
@@ -152,7 +228,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
@@ -162,11 +238,52 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['sherlock-project / sherlock ',\n",
+ " 'j3ssie / Osmedeus ',\n",
+ " 'ytdl-org / youtube-dl ',\n",
+ " 'uber / ludwig ',\n",
+ " 'tkat0 / PyTorch_BlazeFace ',\n",
+ " 'Kyubyong / dc_tts ',\n",
+ " 'gto76 / python-cheatsheet ',\n",
+ " 'bbfamily / abu ',\n",
+ " 'DrDonk / unlocker ',\n",
+ " 'BlackHC / tfpyth ',\n",
+ " 'sundowndev / PhoneInfoga ',\n",
+ " 'public-apis / public-apis ',\n",
+ " 'twintproject / twint ',\n",
+ " 'liuhuanyong / QASystemOnMedicalKG ',\n",
+ " 'vaexio / vaex ',\n",
+ " 'MrS0m30n3 / youtube-dl-gui ',\n",
+ " 'MozillaSecurity / grizzly ',\n",
+ " 'scikit-learn / scikit-learn ',\n",
+ " 'bitcoin / bips ',\n",
+ " 'xinshuoweng / AB3DMOT ',\n",
+ " 'smartHomeHub / SmartIR ',\n",
+ " 'sfyc23 / EverydayWechat ',\n",
+ " 'home-assistant / home-assistant ',\n",
+ " 'msgi / nlp-journey ',\n",
+ " 'gunthercox / ChatterBot ']"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "soup_git = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "dev_rep = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n",
+ "dev_rep = [element.text.replace('\\n', '') for element in dev_rep]\n",
+ "dev_rep"
]
},
{
@@ -178,7 +295,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -188,11 +305,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/30px-Animation_disc.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/29px-P_vip.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/24px-Magic_Kingdom_castle.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/30px-Video-x-generic.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/30px-Flag_of_Los_Angeles_County%2C_California.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/USA_flag_on_television.svg/30px-USA_flag_on_television.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/22px-Commons-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/25px-Wikiquote-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/30px-Wikidata-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1',\n",
+ " '/static/images/wikimedia-button.png',\n",
+ " '/static/images/poweredby_mediawiki_88x31.png']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "disney = BeautifulSoup(content, 'html')\n",
+ "#print(disney)\n",
+ "\n",
+ "\n",
+ "disney_img = disney.find_all('img', src = True)\n",
+ "disney_img = [element['src'] for element in disney_img]\n",
+ "\n",
+ "disney_img"
]
},
{
@@ -204,21 +373,211 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://en.wikipedia.org/wiki/Python' "
+ "url = 'https://en.wikipedia.org/wiki/Python_(mythology)' "
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['/wiki/Pythia',\n",
+ " '/wiki/File:Apollo_dan_Pithon.jpg',\n",
+ " '/wiki/Apollo',\n",
+ " '/wiki/Virgil_Solis',\n",
+ " '/wiki/Ovid',\n",
+ " '/wiki/Metamorphoses',\n",
+ " '/wiki/Greek_mythology',\n",
+ " '/wiki/Greek_language',\n",
+ " '/wiki/Genitive_case',\n",
+ " '/wiki/Serpent_(symbolism)',\n",
+ " '/wiki/Medieval',\n",
+ " '/wiki/Dragon',\n",
+ " '/wiki/Earth',\n",
+ " '/wiki/Delphi',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=1',\n",
+ " '/wiki/Oracle',\n",
+ " '/wiki/Gaia_(mythology)',\n",
+ " '/wiki/Axis_mundi#Places',\n",
+ " '/wiki/Omphalos',\n",
+ " '/wiki/Chthonic',\n",
+ " '/wiki/Apollo',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=2',\n",
+ " '/wiki/File:Pietro_Francavilla_-_Apollo_Victorious_over_the_Python_-_Walters_27302.jpg',\n",
+ " '/wiki/Homeric_Hymn',\n",
+ " '/wiki/Drakaina_(mythology)',\n",
+ " '/wiki/Gaius_Julius_Hyginus',\n",
+ " '/wiki/Zeus',\n",
+ " '/wiki/Leto',\n",
+ " '/wiki/Artemis',\n",
+ " '/wiki/Apollo',\n",
+ " '/wiki/Hera',\n",
+ " '/wiki/Mount_Parnassus',\n",
+ " '/wiki/Gaia_(mythology)',\n",
+ " '/wiki/Robert_Graves',\n",
+ " '/wiki/Greeks',\n",
+ " '/wiki/Sacrilege',\n",
+ " '/wiki/Pythian_Games',\n",
+ " '/wiki/Erwin_Rohde',\n",
+ " '/wiki/Omphalos',\n",
+ " '/wiki/Oracle',\n",
+ " '/wiki/Delphi',\n",
+ " '/wiki/Hyperion_(mythology)',\n",
+ " '/wiki/Helios',\n",
+ " '/wiki/Karl_Kerenyi',\n",
+ " '/wiki/Delphyne',\n",
+ " '/wiki/Typhon',\n",
+ " '/wiki/Typhoeus',\n",
+ " '/wiki/Pythia',\n",
+ " '/wiki/Omphalos',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=3',\n",
+ " '/wiki/Python_(genus)',\n",
+ " '/wiki/Pythonidae',\n",
+ " '/wiki/File:Barcelona_221.JPG',\n",
+ " '/wiki/Apollo_Belvedere',\n",
+ " '/wiki/Delphi',\n",
+ " '/wiki/Dragons_in_Greek_mythology',\n",
+ " '/wiki/Pythia',\n",
+ " '/wiki/Serpent_(symbolism)',\n",
+ " '/wiki/Saint_George_and_the_Dragon',\n",
+ " '/wiki/Analogy_of_the_sun',\n",
+ " '/wiki/Yamata_no_Orochi',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=4',\n",
+ " '/wiki/Telphousa',\n",
+ " '/wiki/Parnassus',\n",
+ " '/wiki/Dodona',\n",
+ " '/wiki/The_Walters_Art_Museum',\n",
+ " '/wiki/Walter_Burkert',\n",
+ " '/wiki/Encyclopedia_Americana',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=5',\n",
+ " 'https://commons.wikimedia.org/wiki/Category:Python_(mythology)',\n",
+ " '/wiki/Wikisource',\n",
+ " '/wiki/Encyclop%C3%A6dia_Britannica_Eleventh_Edition',\n",
+ " 'https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Python_(mythology)',\n",
+ " '/wiki/Walter_Burkert',\n",
+ " '/wiki/Joseph_Fontenrose',\n",
+ " '/wiki/Manly_Palmer_Hall',\n",
+ " '/wiki/Jane_Ellen_Harrison',\n",
+ " '/wiki/Karl_Kerenyi',\n",
+ " '/wiki/Erwin_Rohde',\n",
+ " '/wiki/William_Smith_(lexicographer)',\n",
+ " '/wiki/Dictionary_of_Greek_and_Roman_Biography_and_Mythology',\n",
+ " '/wiki/Help:Authority_control',\n",
+ " 'https://www.wikidata.org/wiki/Q15721',\n",
+ " '/wiki/Integrated_Authority_File',\n",
+ " '/wiki/Syst%C3%A8me_universitaire_de_documentation',\n",
+ " '/wiki/Virtual_International_Authority_File',\n",
+ " '/wiki/WorldCat_Identities',\n",
+ " '/wiki/Help:Category',\n",
+ " '/wiki/Category:Ancient_Delphi',\n",
+ " '/wiki/Category:Offspring_of_Gaia',\n",
+ " '/wiki/Category:Greek_dragons',\n",
+ " '/wiki/Category:Wikipedia_articles_incorporating_a_citation_from_the_Encyclopedia_Americana_with_a_Wikisource_reference',\n",
+ " '/wiki/Category:Articles_containing_Greek-language_text',\n",
+ " '/wiki/Category:Commons_category_link_is_on_Wikidata',\n",
+ " '/wiki/Category:Wikipedia_articles_with_GND_identifiers',\n",
+ " '/wiki/Category:Wikipedia_articles_with_SUDOC_identifiers',\n",
+ " '/wiki/Category:Wikipedia_articles_with_VIAF_identifiers',\n",
+ " '/wiki/Category:Wikipedia_articles_with_WorldCat-VIAF_identifiers',\n",
+ " '/wiki/Special:MyTalk',\n",
+ " '/wiki/Special:MyContributions',\n",
+ " '/w/index.php?title=Special:CreateAccount&returnto=Python+%28mythology%29',\n",
+ " '/w/index.php?title=Special:UserLogin&returnto=Python+%28mythology%29',\n",
+ " '/wiki/Python_(mythology)',\n",
+ " '/wiki/Talk:Python_(mythology)',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit',\n",
+ " '/w/index.php?title=Python_(mythology)&action=history',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Portal:Contents',\n",
+ " '/wiki/Portal:Featured_content',\n",
+ " '/wiki/Portal:Current_events',\n",
+ " '/wiki/Special:Random',\n",
+ " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n",
+ " '//shop.wikimedia.org',\n",
+ " '/wiki/Help:Contents',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:Community_portal',\n",
+ " '/wiki/Special:RecentChanges',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n",
+ " '/wiki/Special:WhatLinksHere/Python_(mythology)',\n",
+ " '/wiki/Special:RecentChangesLinked/Python_(mythology)',\n",
+ " '/wiki/Wikipedia:File_Upload_Wizard',\n",
+ " '/wiki/Special:SpecialPages',\n",
+ " '/w/index.php?title=Python_(mythology)&oldid=900434819',\n",
+ " '/w/index.php?title=Python_(mythology)&action=info',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721',\n",
+ " '/w/index.php?title=Special:CiteThisPage&page=Python_%28mythology%29&id=900434819',\n",
+ " '/w/index.php?title=Python_(mythology)&printable=yes',\n",
+ " 'https://af.wikipedia.org/wiki/Piton',\n",
+ " 'https://be.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n",
+ " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%8F)',\n",
+ " 'https://br.wikipedia.org/wiki/Python_(aerouant)',\n",
+ " 'https://ca.wikipedia.org/wiki/Pit%C3%B3_(mitologia)',\n",
+ " 'https://cs.wikipedia.org/wiki/P%C3%BDth%C3%B3n',\n",
+ " 'https://da.wikipedia.org/wiki/Python_(mytologi)',\n",
+ " 'https://de.wikipedia.org/wiki/Python_(Mythologie)',\n",
+ " 'https://et.wikipedia.org/wiki/Python',\n",
+ " 'https://el.wikipedia.org/wiki/%CE%A0%CF%8D%CE%B8%CF%89%CE%BD_(%CE%BC%CF%85%CE%B8%CE%BF%CE%BB%CE%BF%CE%B3%CE%AF%CE%B1)',\n",
+ " 'https://es.wikipedia.org/wiki/Pit%C3%B3n_(mitolog%C3%ADa)',\n",
+ " 'https://eo.wikipedia.org/wiki/Pitono_(mitologio)',\n",
+ " 'https://eu.wikipedia.org/wiki/Piton',\n",
+ " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86_(%D8%A7%D8%B3%D8%B7%D9%88%D8%B1%D9%87)',\n",
+ " 'https://fr.wikipedia.org/wiki/Python_(mythologie)',\n",
+ " 'https://gl.wikipedia.org/wiki/Pit%C3%B3n_(mitolox%C3%ADa)',\n",
+ " 'https://ko.wikipedia.org/wiki/%ED%94%BC%ED%86%A4',\n",
+ " 'https://hr.wikipedia.org/wiki/Piton_(mitologija)',\n",
+ " 'https://id.wikipedia.org/wiki/Pithon_(mitologi)',\n",
+ " 'https://it.wikipedia.org/wiki/Pitone_(mitologia)',\n",
+ " 'https://la.wikipedia.org/wiki/Python_(mythologia)',\n",
+ " 'https://lb.wikipedia.org/wiki/Python_(Mythologie)',\n",
+ " 'https://lt.wikipedia.org/wiki/Pitonas_(mitologija)',\n",
+ " 'https://hu.wikipedia.org/wiki/P%C3%BCth%C3%B3n',\n",
+ " 'https://nl.wikipedia.org/wiki/Python_(mythologie)',\n",
+ " 'https://ja.wikipedia.org/wiki/%E3%83%94%E3%83%A5%E3%83%BC%E3%83%88%E3%83%BC%E3%83%B3',\n",
+ " 'https://no.wikipedia.org/wiki/Python_(mytologi)',\n",
+ " 'https://pl.wikipedia.org/wiki/Pyton_(mitologia)',\n",
+ " 'https://pt.wikipedia.org/wiki/P%C3%ADton_(mitologia)',\n",
+ " 'https://ro.wikipedia.org/wiki/Python_(mitologie)',\n",
+ " 'https://ru.wikipedia.org/wiki/%D0%9F%D0%B8%D1%84%D0%BE%D0%BD',\n",
+ " 'https://sco.wikipedia.org/wiki/Python_(meethology)',\n",
+ " 'https://sq.wikipedia.org/wiki/Pithoni',\n",
+ " 'https://sl.wikipedia.org/wiki/Piton_(mitologija)',\n",
+ " 'https://sr.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%98%D0%B0)',\n",
+ " 'https://fi.wikipedia.org/wiki/Python_(hirvi%C3%B6)',\n",
+ " 'https://sv.wikipedia.org/wiki/Python_(mytologi)',\n",
+ " 'https://tr.wikipedia.org/wiki/Pithon',\n",
+ " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD_(%D0%BC%D1%96%D1%84%D0%BE%D0%BB%D0%BE%D0%B3%D1%96%D1%8F)',\n",
+ " 'https://vi.wikipedia.org/wiki/Python_(th%E1%BA%A7n_tho%E1%BA%A1i)',\n",
+ " 'https://zh.wikipedia.org/wiki/%E7%9A%AE%E5%90%8C',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721#sitelinks-wikipedia',\n",
+ " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:General_disclaimer']"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "python = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "python_links = python.find_all('a', href = True, title = True)\n",
+ "python_links = [link['href'] for link in python_links]\n",
+ "\n",
+ "python_links"
]
},
{
@@ -230,7 +589,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -240,11 +599,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The number of titles that have change in the 15\n"
+ ]
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "\n",
+ "content = requests.get(url).content\n",
+ "code = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "code_change = code.find_all('div',{'class': 'usctitlechanged'})\n",
+ "code_change = [element.text.replace('\\n', '') for element in code_change]\n",
+ "\n",
+ "print(f\"The number of titles that have change in the {len(code_change)}\")"
]
},
{
@@ -256,21 +631,47 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.fbi.gov/wanted/topten'"
+ "url = 'https://www.fbi.gov/wanted/topten'\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 116,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ALEJANDRO ROSALES CASTILLO',\n",
+ " 'YASER ABDEL SAID',\n",
+ " 'JASON DEREK BROWN',\n",
+ " 'RAFAEL CARO-QUINTERO',\n",
+ " 'ALEXIS FLORES',\n",
+ " 'EUGENE PALMER',\n",
+ " 'SANTIAGO VILLALBA MEDEROS',\n",
+ " 'ROBERT WILLIAM FISHER',\n",
+ " 'BHADRESHKUMAR CHETANBHAI PATEL',\n",
+ " 'ARNOLDO JIMENEZ']"
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code "
+ "#your code \n",
+ "content = requests.get(url).content\n",
+ "fbi_ten = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "top_ten = fbi_ten.find_all('h3',{'class': 'title'})\n",
+ "top_ten = [element.text.replace('\\n', '') for element in top_ten]\n",
+ "top_ten"
]
},
{
@@ -282,7 +683,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
@@ -290,13 +691,231 @@
"url = 'https://www.emsc-csem.org/Earthquake/'"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[['2019-07-16', '23:18:22.016 '],\n",
+ " ['2019-07-16', '23:11:27.223 '],\n",
+ " ['2019-07-16', '22:50:40.744 '],\n",
+ " ['2019-07-16', '22:42:26.952 '],\n",
+ " ['2019-07-16', '22:40:21.754 '],\n",
+ " ['2019-07-16', '22:38:43.255 '],\n",
+ " ['2019-07-16', '22:31:33.21 03 '],\n",
+ " ['2019-07-16', '22:11:10.01 23 '],\n",
+ " ['2019-07-16', '22:10:21.01 24 '],\n",
+ " ['2019-07-16', '22:05:16.91 29 '],\n",
+ " ['2019-07-16', '22:03:53.01 30 '],\n",
+ " ['2019-07-16', '21:56:18.01 38 '],\n",
+ " ['2019-07-16', '21:56:14.01 38 '],\n",
+ " ['2019-07-16', '21:42:41.71 52 '],\n",
+ " ['2019-07-16', '21:13:33.82 21 '],\n",
+ " ['2019-07-16', '21:07:37.52 27 '],\n",
+ " ['2019-07-16', '21:06:21.52 28 '],\n",
+ " ['2019-07-16', '21:03:36.72 31 '],\n",
+ " ['2019-07-16', '20:52:18.62 42 '],\n",
+ " ['2019-07-16', '20:49:09.12 45 '],\n",
+ " ['2019-07-16', '20:41:11.02 53 '],\n",
+ " ['2019-07-16', '20:33:52.93 00 '],\n",
+ " ['2019-07-16', '20:31:33.33 03 '],\n",
+ " ['2019-07-16', '20:29:07.73 05 '],\n",
+ " ['2019-07-16', '20:24:16.93 10 '],\n",
+ " ['2019-07-16', '20:19:00.13 15 '],\n",
+ " ['2019-07-16', '20:17:51.63 16 '],\n",
+ " ['2019-07-16', '20:15:59.03 18 '],\n",
+ " ['2019-07-16', '20:11:01.53 23 '],\n",
+ " ['2019-07-16', '19:51:06.53 43 '],\n",
+ " ['2019-07-16', '19:42:25.93 52 '],\n",
+ " ['2019-07-16', '19:35:57.03 58 '],\n",
+ " ['2019-07-16', '19:23:50.14 10 '],\n",
+ " ['2019-07-16', '19:20:21.44 14 '],\n",
+ " ['2019-07-16', '19:16:53.84 17 '],\n",
+ " ['2019-07-16', '19:16:15.94 18 '],\n",
+ " ['2019-07-16', '19:11:48.94 22 '],\n",
+ " ['2019-07-16', '19:04:00.24 30 '],\n",
+ " ['2019-07-16', '19:01:48.04 32 '],\n",
+ " ['2019-07-16', '19:01:00.84 33 '],\n",
+ " ['2019-07-16', '18:53:32.04 41 '],\n",
+ " ['2019-07-16', '18:50:16.24 44 '],\n",
+ " ['2019-07-16', '18:47:48.94 46 '],\n",
+ " ['2019-07-16', '18:36:26.84 58 '],\n",
+ " ['2019-07-16', '18:22:31.95 12 '],\n",
+ " ['2019-07-16', '18:15:28.75 19 '],\n",
+ " ['2019-07-16', '18:10:01.05 24 '],\n",
+ " ['2019-07-16', '17:48:24.05 46 '],\n",
+ " ['2019-07-16', '17:42:29.95 52 '],\n",
+ " ['2019-07-16', '17:39:43.05 54 ']]"
+ ]
+ },
+ "execution_count": 84,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import re\n",
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "earthquake = BeautifulSoup(content, 'html')\n",
+ " \n",
+ "earthquake_20 = earthquake.select('tr > td:nth-child(4)')\n",
+ "earthquake_date = [re.sub(r'[a-zA-Z]', '', element.text.replace('\\xa0', \" \")).split(' ') for element in earthquake_20]\n",
+ "earthquake_date\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['2.84 S', '35.57 N', '36.06 N', '35.66 N', '35.64 N', '35.78 N', '35.65 N', '17.32 N', '18.43 S', '34.07 N', '12.40 N', '39.27 N', '16.62 N', '69.27 N', '49.45 N', '40.25 N', '35.88 N', '37.81 N', '36.07 N', '36.07 N', '16.85 N', '40.09 N', '23.45 S', '35.86 N', '37.82 N', '33.10 N', '35.55 N', '35.68 N', '37.82 N', '6.26 S', '35.61 N', '35.62 N', '36.19 N', '38.39 N', '38.45 N', '61.27 N', '36.03 N', '35.96 N', '39.56 N', '35.68 N', '0.68 S', '43.62 N', '35.59 N', '35.74 N', '35.65 N', '28.44 N', '34.41 S', '9.93 S', '35.67 N', '0.54 S']\n"
+ ]
+ }
+ ],
+ "source": [
+ "earthquake_latitude = earthquake.select('tr > td:nth-child(5)')\n",
+ "earthquake_latitude = [element.text.replace('\\xa0', \"\") for element in earthquake_latitude]\n",
+ "\n",
+ "earthquake_cardinal = earthquake.select('tr > td:nth-child(6)')\n",
+ "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n",
+ "\n",
+ "latitude_comp = []\n",
+ "for a, b in zip(earthquake_latitude, earthquake_cardinal):\n",
+ " latitude_comp.append(f\"{a} {b}\")\n",
+ "print(latitude_comp)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['77.62 W', '117.39 W', '117.85 W', '117.44 W', '117.48 W', '117.59 W', '117.46 W', '101.46 W', '120.58 E', '11.30 W', '144.54 E', '29.92 W', '94.75 W', '144.73 W', '120.50 W', '19.76 E', '117.69 W', '121.76 W', '117.84 W', '117.65 W', '100.25 W', '19.91 E', '66.86 W', '117.69 W', '121.77 W', '12.42 W', '117.43 W', '117.52 W', '121.77 W', '148.65 E', '117.47 W', '117.45 W', '117.89 W', '16.94 E', '16.91 E', '152.44 W', '117.87 W', '117.71 W', '67.17 E', '117.54 W', '126.36 E', '75.40 E', '117.42 W', '117.56 W', '117.52 W', '56.67 E', '150.73 E', '118.23 E', '117.54 W', '127.86 E']\n"
+ ]
+ }
+ ],
+ "source": [
+ "earthquake_longitude = earthquake.select('tr > td:nth-child(7)')\n",
+ "earthquake_longitude = [element.text.replace('\\xa0', \"\") for element in earthquake_longitude]\n",
+ "\n",
+ "earthquake_cardinal = earthquake.select('tr > td:nth-child(8)')\n",
+ "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n",
+ "\n",
+ "longitude_comp = []\n",
+ "for a, b in zip(earthquake_longitude, earthquake_cardinal):\n",
+ " longitude_comp.append(f\"{a} {b}\")\n",
+ "print(longitude_comp)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'OFFSHORE GUERRERO, MEXICO',\n",
+ " 'WESTERN AUSTRALIA',\n",
+ " 'MADEIRA ISLANDS, PORTUGAL REGION',\n",
+ " 'GUAM REGION',\n",
+ " 'AZORES ISLANDS, PORTUGAL',\n",
+ " 'OAXACA, MEXICO',\n",
+ " 'NORTHERN ALASKA',\n",
+ " 'ALBANIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'SAN FRANCISCO BAY AREA, CALIF.',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'OFFSHORE GUERRERO, MEXICO',\n",
+ " 'ALBANIA',\n",
+ " 'JUJUY, ARGENTINA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'SAN FRANCISCO BAY AREA, CALIF.',\n",
+ " 'MADEIRA ISLANDS, PORTUGAL REGION',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SAN FRANCISCO BAY AREA, CALIF.',\n",
+ " 'NEW BRITAIN REGION, P.N.G.',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'SOUTHERN ITALY',\n",
+ " 'SOUTHERN ITALY',\n",
+ " 'SOUTHERN ALASKA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'EASTERN UZBEKISTAN',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'MOLUCCA SEA',\n",
+ " 'EASTERN KAZAKHSTAN',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN IRAN',\n",
+ " 'NEAR S.E. COAST OF AUSTRALIA',\n",
+ " 'SUMBAWA REGION, INDONESIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'HALMAHERA, INDONESIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'OFFSHORE OAXACA, MEXICO',\n",
+ " 'CENTRAL CALIFORNIA']"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "region = earthquake.find_all('td', {'class': 'tb_region'})\n",
+ "region_name = [element.text.replace('\\xa0', \"\") for element in region]\n",
+ "region_name\n",
+ "\n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "df_earthquakes = pd.DataFrame(earthquake_date, columns = ['year', 'time'])\n",
+ "df_earthquakes\n",
+ "\n",
+ "df_latitude = pd.DataFrame({'latitude':latitude_comp})\n",
+ "df_latitude\n",
+ "\n",
+ "df_longitude = pd.DataFrame({'longitude':longitude_comp})\n",
+ "df_longitude\n",
+ "\n",
+ "df_region = pd.DataFrame({'region':region_name})\n",
+ "df_region\n",
+ "\n",
+ "to_concat = [df_latitude, df_longitude, df_region]\n",
+ "\n",
+ "table_earthquakes = df_earthquakes.join(to_concat)\n",
+ "table_earthquakes"
]
},
{
@@ -308,21 +927,99 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://hackevents.co/hackathons'"
+ "url ='https://hackevents.co/'"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 150,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Event | \n",
+ " Date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Code Geist Hackathon by SefrWahed | \n",
+ " 7/29/2019 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " The Code Factor | \n",
+ " 5/21/2019 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " TECHFEST MUNICH | \n",
+ " 9/6/2019 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Galileo App Competition | \n",
+ " 1/31/2019 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Event Date\n",
+ "0 Code Geist Hackathon by SefrWahed 7/29/2019\n",
+ "1 The Code Factor 5/21/2019\n",
+ "2 TECHFEST MUNICH 9/6/2019\n",
+ "3 Galileo App Competition 1/31/2019"
+ ]
+ },
+ "execution_count": 150,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "hack = BeautifulSoup(content, 'html5')\n",
+ "#print(hack)\n",
+ "\n",
+ "hack_event = hack.find_all('h5', {'class': 'card-title'})\n",
+ "hack_event = [element.text for element in hack_event]\n",
+ "hack_event\n",
+ "\n",
+ "hack_event_date = hack.find_all('p', {'class': 'card-text'})\n",
+ "hack_event_date = [\"\".join(re.findall(r'[\\d\\/\\d{2}\\d{4}]', element.text)) for element in hack_event_date]\n",
+ "hack_event_date\n",
+ "\n",
+ "\n",
+ "result = pd.DataFrame({'Event': hack_event, 'Date': hack_event_date})\n",
+ "result"
]
},
{
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 812f7a4..a856e02 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -76,11 +76,49 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Eric Ma',\n",
+ " 'Federico Brigante',\n",
+ " 'Kyle Roach',\n",
+ " 'Olle Jonsson',\n",
+ " 'Nikita Sobolev',\n",
+ " 'Frank S. Thomas',\n",
+ " 'syuilo',\n",
+ " 'Ives van Hoorne',\n",
+ " 'Paulus Schoutsen',\n",
+ " 'Sarah Drasner',\n",
+ " 'Stefanos Kornilios Mitsis Poiitidis',\n",
+ " 'Jan Hovancik',\n",
+ " 'Andreas Mueller',\n",
+ " 'Guillaume Gomez',\n",
+ " 'Matt Holt',\n",
+ " 'Clifford Wolf',\n",
+ " 'Franck Nijhof',\n",
+ " 'Joe Block',\n",
+ " 'Andrei Neagoie',\n",
+ " 'Jack Lloyd',\n",
+ " 'Guillermo Rauch',\n",
+ " 'Tim Griesser',\n",
+ " 'Jameson Nash',\n",
+ " 'Anderson Banihirwe',\n",
+ " 'Danny Ryan']"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "soup_git = BeautifulSoup(content, 'html')\n",
+ "soup_git\n"
]
},
{
@@ -134,11 +172,49 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Eric Ma',\n",
+ " 'Federico Brigante',\n",
+ " 'Kyle Roach',\n",
+ " 'Olle Jonsson',\n",
+ " 'Nikita Sobolev',\n",
+ " 'Frank S. Thomas',\n",
+ " 'syuilo',\n",
+ " 'Ives van Hoorne',\n",
+ " 'Paulus Schoutsen',\n",
+ " 'Sarah Drasner',\n",
+ " 'Stefanos Kornilios Mitsis Poiitidis',\n",
+ " 'Jan Hovancik',\n",
+ " 'Andreas Mueller',\n",
+ " 'Guillaume Gomez',\n",
+ " 'Matt Holt',\n",
+ " 'Clifford Wolf',\n",
+ " 'Franck Nijhof',\n",
+ " 'Joe Block',\n",
+ " 'Andrei Neagoie',\n",
+ " 'Jack Lloyd',\n",
+ " 'Guillermo Rauch',\n",
+ " 'Tim Griesser',\n",
+ " 'Jameson Nash',\n",
+ " 'Anderson Banihirwe',\n",
+ " 'Danny Ryan']"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "dev_names = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n",
+ "dev_names = [element.text for element in dev_names]\n",
+ "dev_names"
]
},
{
@@ -152,7 +228,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
@@ -162,11 +238,52 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['sherlock-project / sherlock ',\n",
+ " 'j3ssie / Osmedeus ',\n",
+ " 'ytdl-org / youtube-dl ',\n",
+ " 'uber / ludwig ',\n",
+ " 'tkat0 / PyTorch_BlazeFace ',\n",
+ " 'Kyubyong / dc_tts ',\n",
+ " 'gto76 / python-cheatsheet ',\n",
+ " 'bbfamily / abu ',\n",
+ " 'DrDonk / unlocker ',\n",
+ " 'BlackHC / tfpyth ',\n",
+ " 'sundowndev / PhoneInfoga ',\n",
+ " 'public-apis / public-apis ',\n",
+ " 'twintproject / twint ',\n",
+ " 'liuhuanyong / QASystemOnMedicalKG ',\n",
+ " 'vaexio / vaex ',\n",
+ " 'MrS0m30n3 / youtube-dl-gui ',\n",
+ " 'MozillaSecurity / grizzly ',\n",
+ " 'scikit-learn / scikit-learn ',\n",
+ " 'bitcoin / bips ',\n",
+ " 'xinshuoweng / AB3DMOT ',\n",
+ " 'smartHomeHub / SmartIR ',\n",
+ " 'sfyc23 / EverydayWechat ',\n",
+ " 'home-assistant / home-assistant ',\n",
+ " 'msgi / nlp-journey ',\n",
+ " 'gunthercox / ChatterBot ']"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "soup_git = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "dev_rep = soup_git.find_all('h1',{'class': 'h3 lh-condensed'})\n",
+ "dev_rep = [element.text.replace('\\n', '') for element in dev_rep]\n",
+ "dev_rep"
]
},
{
@@ -178,7 +295,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -188,11 +305,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Newman_Laugh-O-Gram_%281921%29.webm/220px-seek%3D2-Newman_Laugh-O-Gram_%281921%29.webm.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Trolley_Troubles_poster.jpg/170px-Trolley_Troubles_poster.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg/170px-Walt_Disney_and_his_cartoon_creation_%22Mickey_Mouse%22_-_National_Board_of_Review_Magazine.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/170px-Steamboat-willie.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/5/57/Walt_Disney_1935.jpg/170px-Walt_Disney_1935.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Roy_O._Disney_with_Company_at_Press_Conference.jpg/170px-Roy_O._Disney_with_Company_at_Press_Conference.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Disney_Display_Case.JPG/170px-Disney_Display_Case.JPG',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/30px-Animation_disc.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/29px-P_vip.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/24px-Magic_Kingdom_castle.jpg',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/30px-Video-x-generic.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/30px-Flag_of_Los_Angeles_County%2C_California.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/USA_flag_on_television.svg/30px-USA_flag_on_television.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/22px-Commons-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/25px-Wikiquote-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/30px-Wikidata-logo.svg.png',\n",
+ " '//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png',\n",
+ " '//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1',\n",
+ " '/static/images/wikimedia-button.png',\n",
+ " '/static/images/poweredby_mediawiki_88x31.png']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "disney = BeautifulSoup(content, 'html')\n",
+ "#print(disney)\n",
+ "\n",
+ "\n",
+ "disney_img = disney.find_all('img', src = True)\n",
+ "disney_img = [element['src'] for element in disney_img]\n",
+ "\n",
+ "disney_img"
]
},
{
@@ -204,21 +373,211 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://en.wikipedia.org/wiki/Python' "
+ "url = 'https://en.wikipedia.org/wiki/Python_(mythology)' "
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['/wiki/Pythia',\n",
+ " '/wiki/File:Apollo_dan_Pithon.jpg',\n",
+ " '/wiki/Apollo',\n",
+ " '/wiki/Virgil_Solis',\n",
+ " '/wiki/Ovid',\n",
+ " '/wiki/Metamorphoses',\n",
+ " '/wiki/Greek_mythology',\n",
+ " '/wiki/Greek_language',\n",
+ " '/wiki/Genitive_case',\n",
+ " '/wiki/Serpent_(symbolism)',\n",
+ " '/wiki/Medieval',\n",
+ " '/wiki/Dragon',\n",
+ " '/wiki/Earth',\n",
+ " '/wiki/Delphi',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=1',\n",
+ " '/wiki/Oracle',\n",
+ " '/wiki/Gaia_(mythology)',\n",
+ " '/wiki/Axis_mundi#Places',\n",
+ " '/wiki/Omphalos',\n",
+ " '/wiki/Chthonic',\n",
+ " '/wiki/Apollo',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=2',\n",
+ " '/wiki/File:Pietro_Francavilla_-_Apollo_Victorious_over_the_Python_-_Walters_27302.jpg',\n",
+ " '/wiki/Homeric_Hymn',\n",
+ " '/wiki/Drakaina_(mythology)',\n",
+ " '/wiki/Gaius_Julius_Hyginus',\n",
+ " '/wiki/Zeus',\n",
+ " '/wiki/Leto',\n",
+ " '/wiki/Artemis',\n",
+ " '/wiki/Apollo',\n",
+ " '/wiki/Hera',\n",
+ " '/wiki/Mount_Parnassus',\n",
+ " '/wiki/Gaia_(mythology)',\n",
+ " '/wiki/Robert_Graves',\n",
+ " '/wiki/Greeks',\n",
+ " '/wiki/Sacrilege',\n",
+ " '/wiki/Pythian_Games',\n",
+ " '/wiki/Erwin_Rohde',\n",
+ " '/wiki/Omphalos',\n",
+ " '/wiki/Oracle',\n",
+ " '/wiki/Delphi',\n",
+ " '/wiki/Hyperion_(mythology)',\n",
+ " '/wiki/Helios',\n",
+ " '/wiki/Karl_Kerenyi',\n",
+ " '/wiki/Delphyne',\n",
+ " '/wiki/Typhon',\n",
+ " '/wiki/Typhoeus',\n",
+ " '/wiki/Pythia',\n",
+ " '/wiki/Omphalos',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=3',\n",
+ " '/wiki/Python_(genus)',\n",
+ " '/wiki/Pythonidae',\n",
+ " '/wiki/File:Barcelona_221.JPG',\n",
+ " '/wiki/Apollo_Belvedere',\n",
+ " '/wiki/Delphi',\n",
+ " '/wiki/Dragons_in_Greek_mythology',\n",
+ " '/wiki/Pythia',\n",
+ " '/wiki/Serpent_(symbolism)',\n",
+ " '/wiki/Saint_George_and_the_Dragon',\n",
+ " '/wiki/Analogy_of_the_sun',\n",
+ " '/wiki/Yamata_no_Orochi',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=4',\n",
+ " '/wiki/Telphousa',\n",
+ " '/wiki/Parnassus',\n",
+ " '/wiki/Dodona',\n",
+ " '/wiki/The_Walters_Art_Museum',\n",
+ " '/wiki/Walter_Burkert',\n",
+ " '/wiki/Encyclopedia_Americana',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit§ion=5',\n",
+ " 'https://commons.wikimedia.org/wiki/Category:Python_(mythology)',\n",
+ " '/wiki/Wikisource',\n",
+ " '/wiki/Encyclop%C3%A6dia_Britannica_Eleventh_Edition',\n",
+ " 'https://en.wikisource.org/wiki/1911_Encyclop%C3%A6dia_Britannica/Python_(mythology)',\n",
+ " '/wiki/Walter_Burkert',\n",
+ " '/wiki/Joseph_Fontenrose',\n",
+ " '/wiki/Manly_Palmer_Hall',\n",
+ " '/wiki/Jane_Ellen_Harrison',\n",
+ " '/wiki/Karl_Kerenyi',\n",
+ " '/wiki/Erwin_Rohde',\n",
+ " '/wiki/William_Smith_(lexicographer)',\n",
+ " '/wiki/Dictionary_of_Greek_and_Roman_Biography_and_Mythology',\n",
+ " '/wiki/Help:Authority_control',\n",
+ " 'https://www.wikidata.org/wiki/Q15721',\n",
+ " '/wiki/Integrated_Authority_File',\n",
+ " '/wiki/Syst%C3%A8me_universitaire_de_documentation',\n",
+ " '/wiki/Virtual_International_Authority_File',\n",
+ " '/wiki/WorldCat_Identities',\n",
+ " '/wiki/Help:Category',\n",
+ " '/wiki/Category:Ancient_Delphi',\n",
+ " '/wiki/Category:Offspring_of_Gaia',\n",
+ " '/wiki/Category:Greek_dragons',\n",
+ " '/wiki/Category:Wikipedia_articles_incorporating_a_citation_from_the_Encyclopedia_Americana_with_a_Wikisource_reference',\n",
+ " '/wiki/Category:Articles_containing_Greek-language_text',\n",
+ " '/wiki/Category:Commons_category_link_is_on_Wikidata',\n",
+ " '/wiki/Category:Wikipedia_articles_with_GND_identifiers',\n",
+ " '/wiki/Category:Wikipedia_articles_with_SUDOC_identifiers',\n",
+ " '/wiki/Category:Wikipedia_articles_with_VIAF_identifiers',\n",
+ " '/wiki/Category:Wikipedia_articles_with_WorldCat-VIAF_identifiers',\n",
+ " '/wiki/Special:MyTalk',\n",
+ " '/wiki/Special:MyContributions',\n",
+ " '/w/index.php?title=Special:CreateAccount&returnto=Python+%28mythology%29',\n",
+ " '/w/index.php?title=Special:UserLogin&returnto=Python+%28mythology%29',\n",
+ " '/wiki/Python_(mythology)',\n",
+ " '/wiki/Talk:Python_(mythology)',\n",
+ " '/w/index.php?title=Python_(mythology)&action=edit',\n",
+ " '/w/index.php?title=Python_(mythology)&action=history',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Main_Page',\n",
+ " '/wiki/Portal:Contents',\n",
+ " '/wiki/Portal:Featured_content',\n",
+ " '/wiki/Portal:Current_events',\n",
+ " '/wiki/Special:Random',\n",
+ " 'https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en',\n",
+ " '//shop.wikimedia.org',\n",
+ " '/wiki/Help:Contents',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:Community_portal',\n",
+ " '/wiki/Special:RecentChanges',\n",
+ " '//en.wikipedia.org/wiki/Wikipedia:Contact_us',\n",
+ " '/wiki/Special:WhatLinksHere/Python_(mythology)',\n",
+ " '/wiki/Special:RecentChangesLinked/Python_(mythology)',\n",
+ " '/wiki/Wikipedia:File_Upload_Wizard',\n",
+ " '/wiki/Special:SpecialPages',\n",
+ " '/w/index.php?title=Python_(mythology)&oldid=900434819',\n",
+ " '/w/index.php?title=Python_(mythology)&action=info',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721',\n",
+ " '/w/index.php?title=Special:CiteThisPage&page=Python_%28mythology%29&id=900434819',\n",
+ " '/w/index.php?title=Python_(mythology)&printable=yes',\n",
+ " 'https://af.wikipedia.org/wiki/Piton',\n",
+ " 'https://be.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD',\n",
+ " 'https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%8F)',\n",
+ " 'https://br.wikipedia.org/wiki/Python_(aerouant)',\n",
+ " 'https://ca.wikipedia.org/wiki/Pit%C3%B3_(mitologia)',\n",
+ " 'https://cs.wikipedia.org/wiki/P%C3%BDth%C3%B3n',\n",
+ " 'https://da.wikipedia.org/wiki/Python_(mytologi)',\n",
+ " 'https://de.wikipedia.org/wiki/Python_(Mythologie)',\n",
+ " 'https://et.wikipedia.org/wiki/Python',\n",
+ " 'https://el.wikipedia.org/wiki/%CE%A0%CF%8D%CE%B8%CF%89%CE%BD_(%CE%BC%CF%85%CE%B8%CE%BF%CE%BB%CE%BF%CE%B3%CE%AF%CE%B1)',\n",
+ " 'https://es.wikipedia.org/wiki/Pit%C3%B3n_(mitolog%C3%ADa)',\n",
+ " 'https://eo.wikipedia.org/wiki/Pitono_(mitologio)',\n",
+ " 'https://eu.wikipedia.org/wiki/Piton',\n",
+ " 'https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86_(%D8%A7%D8%B3%D8%B7%D9%88%D8%B1%D9%87)',\n",
+ " 'https://fr.wikipedia.org/wiki/Python_(mythologie)',\n",
+ " 'https://gl.wikipedia.org/wiki/Pit%C3%B3n_(mitolox%C3%ADa)',\n",
+ " 'https://ko.wikipedia.org/wiki/%ED%94%BC%ED%86%A4',\n",
+ " 'https://hr.wikipedia.org/wiki/Piton_(mitologija)',\n",
+ " 'https://id.wikipedia.org/wiki/Pithon_(mitologi)',\n",
+ " 'https://it.wikipedia.org/wiki/Pitone_(mitologia)',\n",
+ " 'https://la.wikipedia.org/wiki/Python_(mythologia)',\n",
+ " 'https://lb.wikipedia.org/wiki/Python_(Mythologie)',\n",
+ " 'https://lt.wikipedia.org/wiki/Pitonas_(mitologija)',\n",
+ " 'https://hu.wikipedia.org/wiki/P%C3%BCth%C3%B3n',\n",
+ " 'https://nl.wikipedia.org/wiki/Python_(mythologie)',\n",
+ " 'https://ja.wikipedia.org/wiki/%E3%83%94%E3%83%A5%E3%83%BC%E3%83%88%E3%83%BC%E3%83%B3',\n",
+ " 'https://no.wikipedia.org/wiki/Python_(mytologi)',\n",
+ " 'https://pl.wikipedia.org/wiki/Pyton_(mitologia)',\n",
+ " 'https://pt.wikipedia.org/wiki/P%C3%ADton_(mitologia)',\n",
+ " 'https://ro.wikipedia.org/wiki/Python_(mitologie)',\n",
+ " 'https://ru.wikipedia.org/wiki/%D0%9F%D0%B8%D1%84%D0%BE%D0%BD',\n",
+ " 'https://sco.wikipedia.org/wiki/Python_(meethology)',\n",
+ " 'https://sq.wikipedia.org/wiki/Pithoni',\n",
+ " 'https://sl.wikipedia.org/wiki/Piton_(mitologija)',\n",
+ " 'https://sr.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BC%D0%B8%D1%82%D0%BE%D0%BB%D0%BE%D0%B3%D0%B8%D1%98%D0%B0)',\n",
+ " 'https://fi.wikipedia.org/wiki/Python_(hirvi%C3%B6)',\n",
+ " 'https://sv.wikipedia.org/wiki/Python_(mytologi)',\n",
+ " 'https://tr.wikipedia.org/wiki/Pithon',\n",
+ " 'https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD_(%D0%BC%D1%96%D1%84%D0%BE%D0%BB%D0%BE%D0%B3%D1%96%D1%8F)',\n",
+ " 'https://vi.wikipedia.org/wiki/Python_(th%E1%BA%A7n_tho%E1%BA%A1i)',\n",
+ " 'https://zh.wikipedia.org/wiki/%E7%9A%AE%E5%90%8C',\n",
+ " 'https://www.wikidata.org/wiki/Special:EntityPage/Q15721#sitelinks-wikipedia',\n",
+ " 'https://foundation.wikimedia.org/wiki/Privacy_policy',\n",
+ " '/wiki/Wikipedia:About',\n",
+ " '/wiki/Wikipedia:General_disclaimer']"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "python = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "python_links = python.find_all('a', href = True, title = True)\n",
+ "python_links = [link['href'] for link in python_links]\n",
+ "\n",
+ "python_links"
]
},
{
@@ -230,7 +589,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -240,11 +599,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The number of titles that have change in the 15\n"
+ ]
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "\n",
+ "content = requests.get(url).content\n",
+ "code = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "code_change = code.find_all('div',{'class': 'usctitlechanged'})\n",
+ "code_change = [element.text.replace('\\n', '') for element in code_change]\n",
+ "\n",
+ "print(f\"The number of titles that have change in the {len(code_change)}\")"
]
},
{
@@ -256,21 +631,47 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url = 'https://www.fbi.gov/wanted/topten'"
+ "url = 'https://www.fbi.gov/wanted/topten'\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 116,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ALEJANDRO ROSALES CASTILLO',\n",
+ " 'YASER ABDEL SAID',\n",
+ " 'JASON DEREK BROWN',\n",
+ " 'RAFAEL CARO-QUINTERO',\n",
+ " 'ALEXIS FLORES',\n",
+ " 'EUGENE PALMER',\n",
+ " 'SANTIAGO VILLALBA MEDEROS',\n",
+ " 'ROBERT WILLIAM FISHER',\n",
+ " 'BHADRESHKUMAR CHETANBHAI PATEL',\n",
+ " 'ARNOLDO JIMENEZ']"
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code "
+ "#your code \n",
+ "content = requests.get(url).content\n",
+ "fbi_ten = BeautifulSoup(content, 'html')\n",
+ "\n",
+ "top_ten = fbi_ten.find_all('h3',{'class': 'title'})\n",
+ "top_ten = [element.text.replace('\\n', '') for element in top_ten]\n",
+ "top_ten"
]
},
{
@@ -282,7 +683,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
@@ -290,13 +691,231 @@
"url = 'https://www.emsc-csem.org/Earthquake/'"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[['2019-07-16', '23:18:22.016 '],\n",
+ " ['2019-07-16', '23:11:27.223 '],\n",
+ " ['2019-07-16', '22:50:40.744 '],\n",
+ " ['2019-07-16', '22:42:26.952 '],\n",
+ " ['2019-07-16', '22:40:21.754 '],\n",
+ " ['2019-07-16', '22:38:43.255 '],\n",
+ " ['2019-07-16', '22:31:33.21 03 '],\n",
+ " ['2019-07-16', '22:11:10.01 23 '],\n",
+ " ['2019-07-16', '22:10:21.01 24 '],\n",
+ " ['2019-07-16', '22:05:16.91 29 '],\n",
+ " ['2019-07-16', '22:03:53.01 30 '],\n",
+ " ['2019-07-16', '21:56:18.01 38 '],\n",
+ " ['2019-07-16', '21:56:14.01 38 '],\n",
+ " ['2019-07-16', '21:42:41.71 52 '],\n",
+ " ['2019-07-16', '21:13:33.82 21 '],\n",
+ " ['2019-07-16', '21:07:37.52 27 '],\n",
+ " ['2019-07-16', '21:06:21.52 28 '],\n",
+ " ['2019-07-16', '21:03:36.72 31 '],\n",
+ " ['2019-07-16', '20:52:18.62 42 '],\n",
+ " ['2019-07-16', '20:49:09.12 45 '],\n",
+ " ['2019-07-16', '20:41:11.02 53 '],\n",
+ " ['2019-07-16', '20:33:52.93 00 '],\n",
+ " ['2019-07-16', '20:31:33.33 03 '],\n",
+ " ['2019-07-16', '20:29:07.73 05 '],\n",
+ " ['2019-07-16', '20:24:16.93 10 '],\n",
+ " ['2019-07-16', '20:19:00.13 15 '],\n",
+ " ['2019-07-16', '20:17:51.63 16 '],\n",
+ " ['2019-07-16', '20:15:59.03 18 '],\n",
+ " ['2019-07-16', '20:11:01.53 23 '],\n",
+ " ['2019-07-16', '19:51:06.53 43 '],\n",
+ " ['2019-07-16', '19:42:25.93 52 '],\n",
+ " ['2019-07-16', '19:35:57.03 58 '],\n",
+ " ['2019-07-16', '19:23:50.14 10 '],\n",
+ " ['2019-07-16', '19:20:21.44 14 '],\n",
+ " ['2019-07-16', '19:16:53.84 17 '],\n",
+ " ['2019-07-16', '19:16:15.94 18 '],\n",
+ " ['2019-07-16', '19:11:48.94 22 '],\n",
+ " ['2019-07-16', '19:04:00.24 30 '],\n",
+ " ['2019-07-16', '19:01:48.04 32 '],\n",
+ " ['2019-07-16', '19:01:00.84 33 '],\n",
+ " ['2019-07-16', '18:53:32.04 41 '],\n",
+ " ['2019-07-16', '18:50:16.24 44 '],\n",
+ " ['2019-07-16', '18:47:48.94 46 '],\n",
+ " ['2019-07-16', '18:36:26.84 58 '],\n",
+ " ['2019-07-16', '18:22:31.95 12 '],\n",
+ " ['2019-07-16', '18:15:28.75 19 '],\n",
+ " ['2019-07-16', '18:10:01.05 24 '],\n",
+ " ['2019-07-16', '17:48:24.05 46 '],\n",
+ " ['2019-07-16', '17:42:29.95 52 '],\n",
+ " ['2019-07-16', '17:39:43.05 54 ']]"
+ ]
+ },
+ "execution_count": 84,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import re\n",
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "earthquake = BeautifulSoup(content, 'html')\n",
+ " \n",
+ "earthquake_20 = earthquake.select('tr > td:nth-child(4)')\n",
+ "earthquake_date = [re.sub(r'[a-zA-Z]', '', element.text.replace('\\xa0', \" \")).split(' ') for element in earthquake_20]\n",
+ "earthquake_date\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['2.84 S', '35.57 N', '36.06 N', '35.66 N', '35.64 N', '35.78 N', '35.65 N', '17.32 N', '18.43 S', '34.07 N', '12.40 N', '39.27 N', '16.62 N', '69.27 N', '49.45 N', '40.25 N', '35.88 N', '37.81 N', '36.07 N', '36.07 N', '16.85 N', '40.09 N', '23.45 S', '35.86 N', '37.82 N', '33.10 N', '35.55 N', '35.68 N', '37.82 N', '6.26 S', '35.61 N', '35.62 N', '36.19 N', '38.39 N', '38.45 N', '61.27 N', '36.03 N', '35.96 N', '39.56 N', '35.68 N', '0.68 S', '43.62 N', '35.59 N', '35.74 N', '35.65 N', '28.44 N', '34.41 S', '9.93 S', '35.67 N', '0.54 S']\n"
+ ]
+ }
+ ],
+ "source": [
+ "earthquake_latitude = earthquake.select('tr > td:nth-child(5)')\n",
+ "earthquake_latitude = [element.text.replace('\\xa0', \"\") for element in earthquake_latitude]\n",
+ "\n",
+ "earthquake_cardinal = earthquake.select('tr > td:nth-child(6)')\n",
+ "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n",
+ "\n",
+ "latitude_comp = []\n",
+ "for a, b in zip(earthquake_latitude, earthquake_cardinal):\n",
+ " latitude_comp.append(f\"{a} {b}\")\n",
+ "print(latitude_comp)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['77.62 W', '117.39 W', '117.85 W', '117.44 W', '117.48 W', '117.59 W', '117.46 W', '101.46 W', '120.58 E', '11.30 W', '144.54 E', '29.92 W', '94.75 W', '144.73 W', '120.50 W', '19.76 E', '117.69 W', '121.76 W', '117.84 W', '117.65 W', '100.25 W', '19.91 E', '66.86 W', '117.69 W', '121.77 W', '12.42 W', '117.43 W', '117.52 W', '121.77 W', '148.65 E', '117.47 W', '117.45 W', '117.89 W', '16.94 E', '16.91 E', '152.44 W', '117.87 W', '117.71 W', '67.17 E', '117.54 W', '126.36 E', '75.40 E', '117.42 W', '117.56 W', '117.52 W', '56.67 E', '150.73 E', '118.23 E', '117.54 W', '127.86 E']\n"
+ ]
+ }
+ ],
+ "source": [
+ "earthquake_longitude = earthquake.select('tr > td:nth-child(7)')\n",
+ "earthquake_longitude = [element.text.replace('\\xa0', \"\") for element in earthquake_longitude]\n",
+ "\n",
+ "earthquake_cardinal = earthquake.select('tr > td:nth-child(8)')\n",
+ "earthquake_cardinal = [element.text.replace('\\xa0', \"\") for element in earthquake_cardinal]\n",
+ "\n",
+ "longitude_comp = []\n",
+ "for a, b in zip(earthquake_longitude, earthquake_cardinal):\n",
+ " longitude_comp.append(f\"{a} {b}\")\n",
+ "print(longitude_comp)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'OFFSHORE GUERRERO, MEXICO',\n",
+ " 'WESTERN AUSTRALIA',\n",
+ " 'MADEIRA ISLANDS, PORTUGAL REGION',\n",
+ " 'GUAM REGION',\n",
+ " 'AZORES ISLANDS, PORTUGAL',\n",
+ " 'OAXACA, MEXICO',\n",
+ " 'NORTHERN ALASKA',\n",
+ " 'ALBANIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'SAN FRANCISCO BAY AREA, CALIF.',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'OFFSHORE GUERRERO, MEXICO',\n",
+ " 'ALBANIA',\n",
+ " 'JUJUY, ARGENTINA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'SAN FRANCISCO BAY AREA, CALIF.',\n",
+ " 'MADEIRA ISLANDS, PORTUGAL REGION',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SAN FRANCISCO BAY AREA, CALIF.',\n",
+ " 'NEW BRITAIN REGION, P.N.G.',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'SOUTHERN ITALY',\n",
+ " 'SOUTHERN ITALY',\n",
+ " 'SOUTHERN ALASKA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'EASTERN UZBEKISTAN',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'MOLUCCA SEA',\n",
+ " 'EASTERN KAZAKHSTAN',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'SOUTHERN IRAN',\n",
+ " 'NEAR S.E. COAST OF AUSTRALIA',\n",
+ " 'SUMBAWA REGION, INDONESIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'HALMAHERA, INDONESIA',\n",
+ " 'SOUTHERN CALIFORNIA',\n",
+ " 'CENTRAL CALIFORNIA',\n",
+ " 'OFFSHORE OAXACA, MEXICO',\n",
+ " 'CENTRAL CALIFORNIA']"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "region = earthquake.find_all('td', {'class': 'tb_region'})\n",
+ "region_name = [element.text.replace('\\xa0', \"\") for element in region]\n",
+ "region_name\n",
+ "\n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#your code"
+ "df_earthquakes = pd.DataFrame(earthquake_date, columns = ['year', 'time'])\n",
+ "df_earthquakes\n",
+ "\n",
+ "df_latitude = pd.DataFrame({'latitude':latitude_comp})\n",
+ "df_latitude\n",
+ "\n",
+ "df_longitude = pd.DataFrame({'longitude':longitude_comp})\n",
+ "df_longitude\n",
+ "\n",
+ "df_region = pd.DataFrame({'region':region_name})\n",
+ "df_region\n",
+ "\n",
+ "to_concat = [df_latitude, df_longitude, df_region]\n",
+ "\n",
+ "table_earthquakes = df_earthquakes.join(to_concat)\n",
+ "table_earthquakes"
]
},
{
@@ -308,21 +927,99 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"# This is the url you will scrape in this exercise\n",
- "url ='https://hackevents.co/hackathons'"
+ "url ='https://hackevents.co/'"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 150,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Event | \n",
+ " Date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Code Geist Hackathon by SefrWahed | \n",
+ " 7/29/2019 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " The Code Factor | \n",
+ " 5/21/2019 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " TECHFEST MUNICH | \n",
+ " 9/6/2019 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Galileo App Competition | \n",
+ " 1/31/2019 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Event Date\n",
+ "0 Code Geist Hackathon by SefrWahed 7/29/2019\n",
+ "1 The Code Factor 5/21/2019\n",
+ "2 TECHFEST MUNICH 9/6/2019\n",
+ "3 Galileo App Competition 1/31/2019"
+ ]
+ },
+ "execution_count": 150,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code"
+ "#your code\n",
+ "content = requests.get(url).content\n",
+ "hack = BeautifulSoup(content, 'html5')\n",
+ "#print(hack)\n",
+ "\n",
+ "hack_event = hack.find_all('h5', {'class': 'card-title'})\n",
+ "hack_event = [element.text for element in hack_event]\n",
+ "hack_event\n",
+ "\n",
+ "hack_event_date = hack.find_all('p', {'class': 'card-text'})\n",
+ "hack_event_date = [\"\".join(re.findall(r'[\\d\\/\\d{2}\\d{4}]', element.text)) for element in hack_event_date]\n",
+ "hack_event_date\n",
+ "\n",
+ "\n",
+ "result = pd.DataFrame({'Event': hack_event, 'Date': hack_event_date})\n",
+ "result"
]
},
{