From ed5308ae93b06635cc90ad9a1518c66f925e9d5e Mon Sep 17 00:00:00 2001 From: Peter Waters Date: Tue, 29 Aug 2023 22:39:11 +0100 Subject: [PATCH] Add my Jupyter Notebook --- your-code/main.ipynb | 2083 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2050 insertions(+), 33 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 1fe9046..eb95e98 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -64,11 +64,1284 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[
\n", + " \n", + " 1\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Azure SDK Bot\n", + "

\n", + "

\n", + " \n", + " azure-sdk\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 2\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Pedro Cuenca\n", + "

\n", + "

\n", + " \n", + " pcuenca\n", + "

\n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Spain

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 3\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " oobabooga\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 4\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Jerry Liu\n", + "

\n", + "

\n", + " \n", + " jerryjliu\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 5\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " kenjis\n", + "

\n", + "

\n", + " \n", + " kenjis\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 6\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Ismail Pelaseyed\n", + "

\n", + "

\n", + " \n", + " homanp\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 7\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Joe Bell\n", + "

\n", + "

\n", + " \n", + " joe-bell\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 8\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Laurent Mazare\n", + "

\n", + "

\n", + " \n", + " LaurentMazare\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 9\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Rick Anderson\n", + "

\n", + "

\n", + " \n", + " Rick-Anderson\n", + "

\n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Microsoft

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 10\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Brad Fitzpatrick\n", + "

\n", + "

\n", + " \n", + " bradfitz\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 11\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Stephen Celis\n", + "

\n", + "

\n", + " \n", + " stephencelis\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 12\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Henrik Rydgård\n", + "

\n", + "

\n", + " \n", + " hrydgard\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 13\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Matt Brophy\n", + "

\n", + "

\n", + " \n", + " brophdawg11\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 14\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Clement Tsang\n", + "

\n", + "

\n", + " \n", + " ClementTsang\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 15\n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "

\n", + " \n", + " @n8n-io

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 16\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Yagiz Nizipli\n", + "

\n", + "

\n", + " \n", + " anonrig\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 17\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Willi Ballenthin\n", + "

\n", + "

\n", + " \n", + " williballenthin\n", + "

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " Popular repo
\n", + "

\n", + " \n", + " \n", + " \n", + " \n", + " EVTXtract\n", + "

\n", + "
\n", + " EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 18\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Xinwei Xiong\n", + "

\n", + "

\n", + " \n", + " cubxxw\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 19\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " NachoSoto\n", + "

\n", + "

\n", + " \n", + " NachoSoto\n", + "

\n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " RevenueCat

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 20\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Aman Gupta Karmani\n", + "

\n", + "

\n", + " \n", + " tmm1\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 21\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Ariel Mashraki\n", + "

\n", + "

\n", + " \n", + " a8m\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 22\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Rick\n", + "

\n", + "

\n", + " \n", + " LinuxSuRen\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 23\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Tim Besard\n", + "

\n", + "

\n", + " \n", + " maleadt\n", + "

\n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " @JuliaComputing

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 24\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " Aaron Pham\n", + "

\n", + "

\n", + " \n", + " aarnphm\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
,\n", + "
\n", + " \n", + " 25\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " \n", + " C. T. Lin\n", + "

\n", + "

\n", + " \n", + " chentsulin\n", + "

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " Follow\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "# your code here\n", + "response = requests.get(url)\n", + "soup=BeautifulSoup(response.content)\n", + "table= soup.find_all(\"div\", attrs={\"class\":\"position-relative container-lg p-responsive pt-6\"})\n", + "table_row = table[0].find_all(\"article\",attrs= {\"class\":\"Box-row d-flex\"})\n", + "table_row" ] }, { @@ -126,13 +1399,163 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[\n", + " azure-sdk\n", + " ,\n", + " \n", + " pcuenca\n", + " ,\n", + " \n", + " jerryjliu\n", + " ,\n", + " \n", + " kenjis\n", + " ,\n", + " \n", + " homanp\n", + " ,\n", + " \n", + " joe-bell\n", + " ,\n", + " \n", + " LaurentMazare\n", + " ,\n", + " \n", + " Rick-Anderson\n", + " ,\n", + " \n", + " bradfitz\n", + " ,\n", + " \n", + " stephencelis\n", + " ,\n", + " \n", + " hrydgard\n", + " ,\n", + " \n", + " brophdawg11\n", + " ,\n", + " \n", + " ClementTsang\n", + " ,\n", + " \n", + " netroy\n", + " ,\n", + " \n", + " anonrig\n", + " ,\n", + " \n", + " williballenthin\n", + " ,\n", + " \n", + " cubxxw\n", + " ,\n", + " \n", + " NachoSoto\n", + " ,\n", + " \n", + " tmm1\n", + " ,\n", + " \n", + " a8m\n", + " ,\n", + " \n", + " LinuxSuRen\n", + " ,\n", + " \n", + " maleadt\n", + " ,\n", + " \n", + " aarnphm\n", + " ,\n", + " \n", + " chentsulin\n", + " ]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "table[0].find_all(\"a\",attrs={\"class\":\"Link--secondary Link\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Azure SDK Bot (Azure SDK Bot)',\n", + " 'Pedro Cuenca (Pedro Cuenca)',\n", + " 'oobabooga',\n", + " 'Jerry Liu (Jerry Liu)',\n", + " 'kenjis (kenjis)',\n", + " 'Ismail Pelaseyed (Ismail Pelaseyed)',\n", + " 'Joe Bell (Joe Bell)',\n", + " 'Laurent Mazare (Laurent Mazare)',\n", + " 'Rick Anderson (Rick Anderson)',\n", + " 'Brad Fitzpatrick (Brad Fitzpatrick)',\n", + " 'Stephen Celis (Stephen Celis)',\n", + " 'Henrik Rydgård (Henrik Rydgård)',\n", + " 'Matt Brophy (Matt Brophy)',\n", + " 'Clement Tsang (Clement Tsang)',\n", + " 'कारतोफ्फेलस्क्रिप्ट™ (कारतोफ्फेलस्क्रिप्ट™)',\n", + " 'Yagiz Nizipli (Yagiz Nizipli)',\n", + " 'Willi Ballenthin (Willi Ballenthin)',\n", + " 'Xinwei Xiong (Xinwei Xiong)',\n", + " 'NachoSoto (NachoSoto)',\n", + " 'Aman Gupta Karmani (Aman Gupta Karmani)',\n", + " 'Ariel Mashraki (Ariel Mashraki)',\n", + " 'Rick (Rick)',\n", + " 'Tim Besard (Tim Besard)',\n", + " 'Aaron Pham (Aaron Pham)',\n", + " 'C. T. Lin (C. T. Lin)']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_list= []\n", + "for info in table_row:\n", + " name_dev = info.find_all(\"h1\", attrs = {\"class\": \"h3 lh-condensed\"})#[0].get_text().strip()\n", + " user_dev = info.find_all(\"p\", attrs = {\"class\": \"f4 text-normal mb-1\"})#[0].get_text().strip()\n", + " if len(user_dev)>0:\n", + " my_list.append(info.find_all(\"h1\", attrs = {\"class\": \"h3 lh-condensed\"})[0].get_text().strip()+\" (\"+info.find_all(\"h1\", attrs = {\"class\": \"h3 lh-condensed\"})[0].get_text().strip()+\")\")\n", + " else:\n", + " my_list.append(info.find_all(\"h1\", attrs = {\"class\": \"h3 lh-condensed\"})[0].get_text().strip())\n", + " # developers_dict[counter] = my_list.append(info.find_all(\"p\", attrs = {\"class\": \"f4 text-normal mb-1\"}))#[0].get_text().strip())\n", + "\n", + "my_list" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -144,7 +1567,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -154,13 +1577,72 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "r = requests.get(url)\n", + "soup=BeautifulSoup(r.content)\n", + "table=soup.find_all(\"div\", attrs={\"class\":\"position-relative container-lg p-responsive pt-6\"})\n", + "table_row = table[0].find_all(\"h2\",attrs={\"class\":\"h3 lh-condensed\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['facefusion', 'facefusion'],\n", + " ['facebookresearch', 'codellama'],\n", + " ['Pythagora-io', 'gpt-pilot'],\n", + " ['lllyasviel', 'Fooocus'],\n", + " ['facebookresearch', 'llama'],\n", + " ['geekan', 'MetaGPT'],\n", + " ['nlpxucan', 'WizardLM'],\n", + " ['spcl', 'graph-of-thoughts'],\n", + " ['QwenLM', 'Qwen-VL'],\n", + " ['s0md3v', 'roop'],\n", + " ['AUTOMATIC1111', 'stable-diffusion-webui'],\n", + " ['yt-dlp', 'yt-dlp'],\n", + " ['neulab', 'prompt2model'],\n", + " ['zhayujie', 'chatgpt-on-wechat'],\n", + " ['Plachtaa', 'VALL-E-X'],\n", + " ['public-apis', 'public-apis'],\n", + " ['paul-gauthier', 'aider'],\n", + " ['w-okada', 'voice-changer'],\n", + " ['donnemartin', 'system-design-primer'],\n", + " ['embedchain', 'embedchain'],\n", + " ['khoj-ai', 'khoj'],\n", + " ['voicepaw', 'so-vits-svc-fork'],\n", + " ['Significant-Gravitas', 'Auto-GPT'],\n", + " ['fastapi-users', 'fastapi-users'],\n", + " ['ddbourgin', 'numpy-ml']]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "repo = table_row[0].find_all(\"a\",href=True)[0].get_text().split()\n", + "my_list=[]\n", + "for i in table_row:\n", + " repo = i.find_all(\"a\",href=True)[0]\n", + " my_list.append(i.find_all(\"a\",href=True)[0].get_text().replace('/', '').replace('\\n', '').strip().split())\n", + "my_list" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -171,7 +1653,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -181,13 +1663,74 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "r = requests.get(url)\n", + "soup=BeautifulSoup(r.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Cscr-featured.svg/20px-Cscr-featured.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/8/8c/Extended-protection-shackle.svg/20px-Extended-protection-shackle.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/d/df/Walt_Disney_1946.JPG/220px-Walt_Disney_1946.JPG\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/8/87/Walt_Disney_1942_signature.svg/150px-Walt_Disney_1942_signature.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Walt_Disney_Birthplace_Exterior_Hermosa_Chicago_Illinois.jpg/220px-Walt_Disney_Birthplace_Exterior_Hermosa_Chicago_Illinois.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/c/c4/Walt_Disney_envelope_ca._1921.jpg/220px-Walt_Disney_envelope_ca._1921.jpg\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/4/4e/Steamboat-willie.jpg/220px-Steamboat-willie.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/c/cd/Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg/220px-Walt_Disney_Snow_white_1937_trailer_screenshot_%2813%29.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/1/15/Disney_drawing_goofy.jpg/170px-Disney_drawing_goofy.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/WaltDisneyplansDisneylandDec1954.jpg/220px-WaltDisneyplansDisneylandDec1954.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Walt_disney_portrait_right.jpg/170px-Walt_disney_portrait_right.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Walt_Disney_Grave.JPG/170px-Walt_Disney_Grave.JPG\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/1/13/DisneySchiphol1951.jpg/220px-DisneySchiphol1951.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/6/6c/Disney1968.jpg/170px-Disney1968.jpg\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Disney_Oscar_1953_%28cropped%29.jpg/170px-Disney_Oscar_1953_%28cropped%29.jpg\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/30px-Commons-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/4/4c/Wikisource-logo.svg/38px-Wikisource-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/34px-Wikiquote-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/e/e3/Disneyland_Resort_logo.svg/135px-Disneyland_Resort_logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/d/da/Animation_disc.svg/20px-Animation_disc.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/6/69/P_vip.svg/19px-P_vip.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Magic_Kingdom_castle.jpg/15px-Magic_Kingdom_castle.jpg\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/e/e7/Video-x-generic.svg/19px-Video-x-generic.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/a/a3/Flag_of_Los_Angeles_County%2C_California.svg/21px-Flag_of_Los_Angeles_County%2C_California.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/8/8c/Blank_television_set.svg/21px-Blank_television_set.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/a/a4/Flag_of_the_United_States.svg/21px-Flag_of_the_United_States.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/14px-Commons-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/16px-Wikiquote-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/4/4c/Wikisource-logo.svg/18px-Wikisource-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/21px-Wikidata-logo.svg.png\n", + "https//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png\n" + ] + } + ], + "source": [ + "# your code here\n", + "img = soup.find_all(\"img\", {\"class\":\"mw-file-element\"})\n", + "for image in img:\n", + " # Print image source\n", + " print(f\"https{image['src']}\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -197,21 +1740,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise\n", - "url = 'https://www.wikipedia.org/'" + "url = 'https://www.wikipedia.org/'\n", + "r = requests.get(url)\n", + "soup=BeautifulSoup(r.content)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "English: 6701000\n", + "日本語: 1384000\n", + "Español: 1886000\n", + "Русский: 1933000\n", + "Deutsch: 2828000\n", + "Français: 2546000\n", + "Italiano: 1823000\n", + "中文: 1373000\n", + "Português: 1107000\n", + "فارسی: فارسی\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "language=soup.find_all(\"div\",attrs={\"class\",\"central-featured-lang\"})\n", + "\n", + "for lan in language:\n", + " language_name= lan.find(\"strong\").text\n", + " num_article = lan.find(\"bdi\").get_text().strip('+').replace('\\xa0', '')\n", + " print(language_name + ': ' + num_article) " ] }, { @@ -224,7 +1792,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -234,11 +1802,118 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LanguageNative Speakers
0Mandarin Chinese (incl. Standard Chinese, but ...939.0
1Spanish485.0
2English380.0
3Hindi (excl. Urdu, and other languages)345.0
4Portuguese236.0
5Bengali234.0
6Russian147.0
7Japanese123.0
8Yue Chinese (incl. Cantonese)86.1
9Vietnamese85.0
\n", + "
" + ], + "text/plain": [ + " Language Native Speakers\n", + "0 Mandarin Chinese (incl. Standard Chinese, but ... 939.0\n", + "1 Spanish 485.0\n", + "2 English 380.0\n", + "3 Hindi (excl. Urdu, and other languages) 345.0\n", + "4 Portuguese 236.0\n", + "5 Bengali 234.0\n", + "6 Russian 147.0\n", + "7 Japanese 123.0\n", + "8 Yue Chinese (incl. Cantonese) 86.1\n", + "9 Vietnamese 85.0" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "\n", + "# your code here\n", + "tables = pd.read_html(url)\n", + "\n", + "df = tables[0]\n", + "df.columns = ['Language','Native Speakers','Language family', 'Branch']\n", + "\n", + "top_10 = df.iloc[:10, 0:2]\n", + "top_10" ] }, { @@ -251,21 +1926,149 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# This is the url you will scrape in this exercise \n", - "url = 'https://www.imdb.com/chart/top'" + "url = 'https://www.imdb.com/chart/top'\n", + "r= requests.get(url, headers={'User-Agent':'Slurp'})\n", + "soup=BeautifulSoup(r.content)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "row_director = soup.find_all('body', attrs={'class':'ipc-promptable-base--body-locked'})\n", + "# irector_name = row_director[1].find_all(\"a\", attrs={\"class\": \"ipc-metadata-list-item__list-content-item ipc-metadata-list-item__list-content-item--link\"})\n", + "row_director" + ] + }, + { + "cell_type": "code", + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "\n", + "[]\n", + "# your code here\n", + "rows = soup.find_all('li',attrs={'class': 'ipc-metadata-list-summary-item sc-bca49391-0 eypSaE cli-parent'})\n", + "title = rows[0].find_all(\"h3\", attrs={\"class\": \"ipc-title__text\"})\n", + "year = rows[0].find_all('span')[1]\n", + "rate = rows[0].find_all('span')[5]\n", + "movie_name = []\n", + "years = []\n", + "rates = []\n", + "for i, row in enumerate(rows):\n", + " title = row.find_all('h3', attrs={'class': 'ipc-title__text'})[0].get_text().strip()\n", + " year = row.find_all('span')[1].get_text().strip()\n", + " rate = row.find_all('span')[5].get_text().strip()\n", + " movie_name.append(title)\n", + " years.append(year)\n", + " rates.append(rate)\n", + "data = {'title': movie_name, \n", + " 'year': years, \n", + " 'rate': rates}" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleyearrate
01. Os Condenados de Shawshank19949.3 (2.8M)
12. O Padrinho19729.2 (1.9M)
23. O Cavaleiro das Trevas20089.0 (2.8M)
34. O Padrinho: Parte II19749.0 (1.3M)
45. Doze Homens em Fúria19579.0 (829K)
\n", + "
" + ], + "text/plain": [ + " title year rate\n", + "0 1. Os Condenados de Shawshank 1994 9.3 (2.8M)\n", + "1 2. O Padrinho 1972 9.2 (1.9M)\n", + "2 3. O Cavaleiro das Trevas 2008 9.0 (2.8M)\n", + "3 4. O Padrinho: Parte II 1974 9.0 (1.3M)\n", + "4 5. Doze Homens em Fúria 1957 9.0 (829K)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data)\n", + "df.head()" ] }, { @@ -277,21 +2080,235 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "#This is the url you will scrape in this exercise\n", - "url = 'https://www.imdb.com/list/ls009796553/'" + "url = 'https://www.imdb.com/list/ls009796553/'\n", + "r= requests.get(url, headers={'User-Agent':'Slurp'})\n", + "soup=BeautifulSoup(r.content)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.Pesadelo em Elm Street(1984)'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows = soup.find_all('h3', attrs={'class': 'lister-item-header'})\n", + "name = rows[0].get_text().strip().replace('\\n', '')\n", + "name" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'(1984)'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "year = soup.find_all('span', attrs={'class': 'lister-item-year text-muted unbold'})\n", + "year[0].get_text().strip().replace('\\n', '')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Teenager Nancy Thompson must uncover the dark truth concealed by her parents after she and her friends become targets of the spirit of a serial killer with a bladed glove in their dreams, in which if they die, it kills them in real life.'" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary_rows = soup.find_all('p',{'class':''})\n", + "summary_rows[0].get_text().strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "\n", + "names = []\n", + "years = []\n", + "summarys = []\n", + "for i, row in enumerate(rows):\n", + " name = row.get_text().strip().replace('\\n', '')\n", + " year = row.find_all('span', attrs={'class': 'lister-item-year text-muted unbold'})[0].get_text().strip().replace('\\n', '')\n", + " names.append(name[2:-6])\n", + " years.append(year)\n", + "for i, row in enumerate(summary_rows):\n", + " summary = row.get_text().strip().replace('\\n', '')\n", + " summarys.append(summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namesyearssummary
0Pesadelo em Elm Street(1984)Teenager Nancy Thompson must uncover the dark ...
1Despertares(1990)The victims of an encephalitis epidemic many y...
2Liga de Mulheres(1992)Two sisters join the first female professional...
3Um Bairro em Nova Iorque(1993)Robert De Niro and Chazz Palminteri give capti...
4Anjos em Campo(1994)When a boy prays for a chance to have a family...
5Tempo de Matar(1996)In Canton, Mississippi, a fearless young lawye...
6Amistad(1997)In 1839, the revolt of Mende captives aboard a...
7Anaconda(1997)A \"National Geographic\" film crew is taken hos...
8A Cool, Dry Place(1998)Russell, single father balances his work as a ...
9.América Proibida(1998)Living a life marked by violence, neo-Nazi Der...
\n", + "
" + ], + "text/plain": [ + " names years \\\n", + "0 Pesadelo em Elm Street (1984) \n", + "1 Despertares (1990) \n", + "2 Liga de Mulheres (1992) \n", + "3 Um Bairro em Nova Iorque (1993) \n", + "4 Anjos em Campo (1994) \n", + "5 Tempo de Matar (1996) \n", + "6 Amistad (1997) \n", + "7 Anaconda (1997) \n", + "8 A Cool, Dry Place (1998) \n", + "9 .América Proibida (1998) \n", + "\n", + " summary \n", + "0 Teenager Nancy Thompson must uncover the dark ... \n", + "1 The victims of an encephalitis epidemic many y... \n", + "2 Two sisters join the first female professional... \n", + "3 Robert De Niro and Chazz Palminteri give capti... \n", + "4 When a boy prays for a chance to have a family... \n", + "5 In Canton, Mississippi, a fearless young lawye... \n", + "6 In 1839, the revolt of Mende captives aboard a... \n", + "7 A \"National Geographic\" film crew is taken hos... \n", + "8 Russell, single father balances his work as a ... \n", + "9 Living a life marked by violence, neo-Nazi Der... " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "data = {'names': names, \n", + " 'years': years, \n", + " 'summary': summarys}\n", + "data=pd.DataFrame(data)\n", + "data.head(10)" ] }, { @@ -395,7 +2412,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -409,7 +2426,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.10.9" } }, "nbformat": 4,