diff --git a/your-code/Proyecto_WebScrapping_APIs.ipynb b/your-code/Proyecto_WebScrapping_APIs.ipynb new file mode 100644 index 0000000..df583be --- /dev/null +++ b/your-code/Proyecto_WebScrapping_APIs.ipynb @@ -0,0 +1,3419 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "id": "20d32fff", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "import numpy as np\n", + "from lxml import html\n", + "from lxml.html import fromstring\n", + "import random\n", + "import re\n", + "import csv" + ] + }, + { + "cell_type": "markdown", + "id": "681843a0", + "metadata": {}, + "source": [ + "# Proyecto Web Scrapping & API´s" + ] + }, + { + "cell_type": "markdown", + "id": "94fc04f5", + "metadata": {}, + "source": [ + "## Rankig ATP" + ] + }, + { + "cell_type": "markdown", + "id": "f9eb366c", + "metadata": {}, + "source": [ + "### Lo que haremos será obtener el ranking de los 50 mejores tenistas masculinos.\n" + ] + }, + { + "cell_type": "markdown", + "id": "767663fa", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "d82786be", + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://www.marca.com/tenis/clasificacion-atp.html'" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "7a089c92", + "metadata": {}, + "outputs": [], + "source": [ + "html = requests.get(url).content\n", + "soup = BeautifulSoup(html, 'lxml')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ce3def68", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "sopa = soup.select('span.ue-table-ranking__name')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "da1bc580", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Novak Djokovic',\n", + " 'Daniil Medvedev',\n", + " 'Alexander Zverev',\n", + " 'Stefanos Tsitsipas',\n", + " 'Rafael Nadal',\n", + " 'Andrey Rublev',\n", + " 'Matteo Berrettini',\n", + " 'Casper Ruud',\n", + " 'Felix Auger Aliassime',\n", + " 'Jannik Sinner',\n", + " 'Hubert Hurkacz',\n", + " 'Cameron Norrie',\n", + " 'Diego Sebastian Schwartzman',\n", + " 'Denis Shapovalov',\n", + " 'Aslan Karatsev',\n", + " 'Dominic Thiem',\n", + " 'Roger Federer',\n", + " 'Roberto Bautista Agut',\n", + " 'Cristian Garin',\n", + " 'Gael Monfils',\n", + " 'Pablo Carreño-Busta',\n", + " 'Taylor Harry Fritz',\n", + " 'Nikoloz Basilashvili',\n", + " 'Daniel Evans',\n", + " 'John Isner',\n", + " 'Lorenzo Sonego',\n", + " 'Marin Cilic',\n", + " 'Grigor Dimitrov',\n", + " 'Reilly Opelka',\n", + " 'Karen Khachanov',\n", + " 'Carlos Alcaraz',\n", + " 'Fabio Fognini',\n", + " 'Lloyd George Muirhead Harris',\n", + " 'Frances Tiafoe',\n", + " 'Marton Fucsovics',\n", + " 'Filip Krajinovic',\n", + " 'Alexander Bublik',\n", + " 'Federico Delbonis',\n", + " 'Dusan Lajovic',\n", + " 'Ugo Humbert',\n", + " 'Tommy Paul',\n", + " 'Álex de Miñaur',\n", + " 'Sebastian Korda',\n", + " 'Albert Ramos Viñolas',\n", + " 'David Goffin',\n", + " 'Kei Nishikori',\n", + " 'Ilya Ivashka',\n", + " 'Arthur Rinderknech',\n", + " 'James Duckworth',\n", + " 'Alejandro Davidovich Fokina',\n", + " 'Laslo Djere',\n", + " 'Jan-Lennard Struff',\n", + " 'Dominik Koepfer',\n", + " 'Soon Woo Kwon',\n", + " 'Mackenzie McDonald',\n", + " 'Benoit Paire',\n", + " 'Botic Van de Zandschulp',\n", + " 'Jenson Brooksby',\n", + " 'Alexei Popyrin',\n", + " 'Lorenzo Musetti',\n", + " 'Pedro Martínez Portero',\n", + " 'Tallon Griekspoor',\n", + " 'Benjamin Bonzi',\n", + " 'Federico Coria',\n", + " 'Gianluca Mager',\n", + " 'Marcos Giron',\n", + " 'Hugo Gaston',\n", + " 'Brandon Nakashima',\n", + " 'Adrian Mannarino',\n", + " 'Maxime Cressy',\n", + " 'Jaume Munar',\n", + " 'Facundo Bagnis',\n", + " 'Jordan Thompson',\n", + " 'Alex Molcan',\n", + " 'Roberto Carballes Baena',\n", + " 'Guido Pella',\n", + " 'Miomir Kecmanovic',\n", + " 'Jiri Vesely',\n", + " 'Thiago Monteiro',\n", + " 'Juan Manuel Cerundolo',\n", + " 'Richard Gasquet',\n", + " 'Peter Gojowczyk',\n", + " 'Pablo Andújar',\n", + " 'Borna Coric',\n", + " 'Milos Raonic',\n", + " 'Mikael Ymer',\n", + " 'Daniel Altmaier',\n", + " 'Sebastian Baez',\n", + " 'John Millman',\n", + " 'Emil Ruusuvuori',\n", + " 'Henri Laaksonen',\n", + " 'Pablo Cuevas',\n", + " 'Ricardas Berankis',\n", + " 'Tennys Sandgren',\n", + " 'Marco Cecchinato',\n", + " 'Oscar Otte',\n", + " 'Kevin Anderson',\n", + " 'Stefano Travaglia',\n", + " 'Holger Rune',\n", + " 'Corentin Moutet']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Nombres = [element.text for element in sopa]\n", + "Nombres" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "19cd7f65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[11015,\n", + " 8935,\n", + " 7970,\n", + " 6540,\n", + " 4875,\n", + " 4785,\n", + " 4568,\n", + " 4155,\n", + " 3608,\n", + " 3390,\n", + " 3336,\n", + " 2900,\n", + " 2730,\n", + " 2593,\n", + " 2553,\n", + " 2410,\n", + " 2385,\n", + " 2385,\n", + " 2375,\n", + " 2373,\n", + " 2305,\n", + " 2175,\n", + " 2051,\n", + " 1957,\n", + " 1881,\n", + " 1860,\n", + " 1840,\n", + " 1821,\n", + " 1776,\n", + " 1748,\n", + " 1609,\n", + " 1494,\n", + " 1473,\n", + " 1467,\n", + " 1457,\n", + " 1427,\n", + " 1411,\n", + " 1347,\n", + " 1346,\n", + " 1318,\n", + " 1317,\n", + " 1316,\n", + " 1286,\n", + " 1259,\n", + " 1216,\n", + " 1210,\n", + " 1194,\n", + " 1178,\n", + " 1166,\n", + " 1160,\n", + " 1156,\n", + " 1149,\n", + " 1096,\n", + " 1085,\n", + " 1084,\n", + " 1075,\n", + " 1069,\n", + " 1063,\n", + " 1010,\n", + " 1004,\n", + " 1001,\n", + " 1001,\n", + " 979,\n", + " 976,\n", + " 957,\n", + " 920,\n", + " 919,\n", + " 917,\n", + " 879,\n", + " 875,\n", + " 875,\n", + " 868,\n", + " 860,\n", + " 839,\n", + " 837,\n", + " 836,\n", + " 836,\n", + " 824,\n", + " 824,\n", + " 818,\n", + " 807,\n", + " 802,\n", + " 800,\n", + " 799,\n", + " 794,\n", + " 793,\n", + " 792,\n", + " 791,\n", + " 788,\n", + " 783,\n", + " 781,\n", + " 770,\n", + " 761,\n", + " 759,\n", + " 759,\n", + " 748,\n", + " 744,\n", + " 744,\n", + " 742,\n", + " 737]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sopa_puntos = soup.select('td.ue-table-ranking__td.is-marked')\n", + "sopa_puntos" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a88c71b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['11015',\n", + " '8935',\n", + " '7970',\n", + " '6540',\n", + " '4875',\n", + " '4785',\n", + " '4568',\n", + " '4155',\n", + " '3608',\n", + " '3390',\n", + " '3336',\n", + " '2900',\n", + " '2730',\n", + " '2593',\n", + " '2553',\n", + " '2410',\n", + " '2385',\n", + " '2385',\n", + " '2375',\n", + " '2373',\n", + " '2305',\n", + " '2175',\n", + " '2051',\n", + " '1957',\n", + " '1881',\n", + " '1860',\n", + " '1840',\n", + " '1821',\n", + " '1776',\n", + " '1748',\n", + " '1609',\n", + " '1494',\n", + " '1473',\n", + " '1467',\n", + " '1457',\n", + " '1427',\n", + " '1411',\n", + " '1347',\n", + " '1346',\n", + " '1318',\n", + " '1317',\n", + " '1316',\n", + " '1286',\n", + " '1259',\n", + " '1216',\n", + " '1210',\n", + " '1194',\n", + " '1178',\n", + " '1166',\n", + " '1160',\n", + " '1156',\n", + " '1149',\n", + " '1096',\n", + " '1085',\n", + " '1084',\n", + " '1075',\n", + " '1069',\n", + " '1063',\n", + " '1010',\n", + " '1004',\n", + " '1001',\n", + " '1001',\n", + " '979',\n", + " '976',\n", + " '957',\n", + " '920',\n", + " '919',\n", + " '917',\n", + " '879',\n", + " '875',\n", + " '875',\n", + " '868',\n", + " '860',\n", + " '839',\n", + " '837',\n", + " '836',\n", + " '836',\n", + " '824',\n", + " '824',\n", + " '818',\n", + " '807',\n", + " '802',\n", + " '800',\n", + " '799',\n", + " '794',\n", + " '793',\n", + " '792',\n", + " '791',\n", + " '788',\n", + " '783',\n", + " '781',\n", + " '770',\n", + " '761',\n", + " '759',\n", + " '759',\n", + " '748',\n", + " '744',\n", + " '744',\n", + " '742',\n", + " '737']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Puntos = [element.text for element in paso2]\n", + "Puntos" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2c6dc1cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamesPoints
0Novak Djokovic11015
1Daniil Medvedev8935
2Alexander Zverev7970
3Stefanos Tsitsipas6540
4Rafael Nadal4875
.........
95Oscar Otte748
96Kevin Anderson744
97Stefano Travaglia744
98Holger Rune742
99Corentin Moutet737
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Names Points\n", + "0 Novak Djokovic 11015\n", + "1 Daniil Medvedev 8935\n", + "2 Alexander Zverev 7970\n", + "3 Stefanos Tsitsipas 6540\n", + "4 Rafael Nadal 4875\n", + ".. ... ...\n", + "95 Oscar Otte 748\n", + "96 Kevin Anderson 744\n", + "97 Stefano Travaglia 744\n", + "98 Holger Rune 742\n", + "99 Corentin Moutet 737\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Ranking = pd.DataFrame(zip(Nombres, Puntos), columns=['Names', 'Points'])\n", + "Ranking" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c96f691d", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamesPoints
0Novak Djokovic11015
1Daniil Medvedev8935
2Alexander Zverev7970
3Stefanos Tsitsipas6540
4Rafael Nadal4875
5Andrey Rublev4785
6Matteo Berrettini4568
7Casper Ruud4155
8Felix Auger Aliassime3608
9Jannik Sinner3390
10Hubert Hurkacz3336
11Cameron Norrie2900
12Diego Sebastian Schwartzman2730
13Denis Shapovalov2593
14Aslan Karatsev2553
15Dominic Thiem2410
16Roger Federer2385
17Roberto Bautista Agut2385
18Cristian Garin2375
19Gael Monfils2373
20Pablo Carreño-Busta2305
21Taylor Harry Fritz2175
22Nikoloz Basilashvili2051
23Daniel Evans1957
24John Isner1881
25Lorenzo Sonego1860
26Marin Cilic1840
27Grigor Dimitrov1821
28Reilly Opelka1776
29Karen Khachanov1748
30Carlos Alcaraz1609
31Fabio Fognini1494
32Lloyd George Muirhead Harris1473
33Frances Tiafoe1467
34Marton Fucsovics1457
35Filip Krajinovic1427
36Alexander Bublik1411
37Federico Delbonis1347
38Dusan Lajovic1346
39Ugo Humbert1318
40Tommy Paul1317
41Álex de Miñaur1316
42Sebastian Korda1286
43Albert Ramos Viñolas1259
44David Goffin1216
45Kei Nishikori1210
46Ilya Ivashka1194
47Arthur Rinderknech1178
48James Duckworth1166
49Alejandro Davidovich Fokina1160
50Laslo Djere1156
\n", + "
" + ], + "text/plain": [ + " Names Points\n", + "0 Novak Djokovic 11015\n", + "1 Daniil Medvedev 8935\n", + "2 Alexander Zverev 7970\n", + "3 Stefanos Tsitsipas 6540\n", + "4 Rafael Nadal 4875\n", + "5 Andrey Rublev 4785\n", + "6 Matteo Berrettini 4568\n", + "7 Casper Ruud 4155\n", + "8 Felix Auger Aliassime 3608\n", + "9 Jannik Sinner 3390\n", + "10 Hubert Hurkacz 3336\n", + "11 Cameron Norrie 2900\n", + "12 Diego Sebastian Schwartzman 2730\n", + "13 Denis Shapovalov 2593\n", + "14 Aslan Karatsev 2553\n", + "15 Dominic Thiem 2410\n", + "16 Roger Federer 2385\n", + "17 Roberto Bautista Agut 2385\n", + "18 Cristian Garin 2375\n", + "19 Gael Monfils 2373\n", + "20 Pablo Carreño-Busta 2305\n", + "21 Taylor Harry Fritz 2175\n", + "22 Nikoloz Basilashvili 2051\n", + "23 Daniel Evans 1957\n", + "24 John Isner 1881\n", + "25 Lorenzo Sonego 1860\n", + "26 Marin Cilic 1840\n", + "27 Grigor Dimitrov 1821\n", + "28 Reilly Opelka 1776\n", + "29 Karen Khachanov 1748\n", + "30 Carlos Alcaraz 1609\n", + "31 Fabio Fognini 1494\n", + "32 Lloyd George Muirhead Harris 1473\n", + "33 Frances Tiafoe 1467\n", + "34 Marton Fucsovics 1457\n", + "35 Filip Krajinovic 1427\n", + "36 Alexander Bublik 1411\n", + "37 Federico Delbonis 1347\n", + "38 Dusan Lajovic 1346\n", + "39 Ugo Humbert 1318\n", + "40 Tommy Paul 1317\n", + "41 Álex de Miñaur 1316\n", + "42 Sebastian Korda 1286\n", + "43 Albert Ramos Viñolas 1259\n", + "44 David Goffin 1216\n", + "45 Kei Nishikori 1210\n", + "46 Ilya Ivashka 1194\n", + "47 Arthur Rinderknech 1178\n", + "48 James Duckworth 1166\n", + "49 Alejandro Davidovich Fokina 1160\n", + "50 Laslo Djere 1156" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Ranking.head(51)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "9aae7eb7", + "metadata": {}, + "outputs": [], + "source": [ + "Ranking.head(51).to_csv('Ranking.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "7dc4548e", + "metadata": { + "scrolled": true + }, + "source": [ + "## Rankig WTA" + ] + }, + { + "cell_type": "markdown", + "id": "85d5372a", + "metadata": {}, + "source": [ + "### Lo que haremos será obtener el ranking de las 50 mejores tenistas femininas.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "e349d333", + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://www.marca.com/tenis/clasificacion-wta.html'" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c8dc223d", + "metadata": {}, + "outputs": [], + "source": [ + "html = requests.get(url).content\n", + "soup = BeautifulSoup(html, 'lxml')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "2e3b0890", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Ashleigh Barty,\n", + " Aryna Sabalenka,\n", + " Barbora Krejcikova,\n", + " Karolina Pliskova,\n", + " Paula Badosa Gibert,\n", + " Garbiñe Muguruza Blanco,\n", + " Maria Sakkari,\n", + " Anett Kontaveit,\n", + " Iga Swiatek,\n", + " Ons Jabeur,\n", + " Elena Rybakina,\n", + " Emma Raducanu,\n", + " Anastasia Pavlyuchenkova,\n", + " Elina Svitolina,\n", + " Cori Gauff,\n", + " Belinda Bencic,\n", + " Daria Kasatkina,\n", + " Angelique Kerber,\n", + " Leylah Annie Fernandez,\n", + " Victoria Azarenka,\n", + " Jessica Pegula,\n", + " Petra Kvitova,\n", + " Jelena Ostapenko,\n", + " Tamara Zidansek,\n", + " Simona Halep,\n", + " Elise Mertens,\n", + " Danielle Collins,\n", + " Jil Belen Teichmann,\n", + " Veronika Kudermetova,\n", + " Camila Giorgi,\n", + " Sara Sorribes Tormo,\n", + " Ludmila Samsonova,\n", + " Viktorija Golubic,\n", + " Clara Tauson,\n", + " Sorana Cirstea,\n", + " Shelby Rogers,\n", + " Ajla Tomljanovic,\n", + " Bianca Andreescu,\n", + " Yulia Putintseva,\n", + " Tereza Martincova,\n", + " Ekaterina Alexandrova,\n", + " Katerina Siniakova,\n", + " Marketa Vondrousova,\n", + " Anhelina Kalinina,\n", + " Jasmine Paolini,\n", + " Maria Camila Osorio Serrano,\n", + " Alison Van Uytvanck,\n", + " Magda Linette,\n", + " Amanda Anisimova,\n", + " Alison Riske,\n", + " Nuria Parrizas Diaz,\n", + " Marta Kostyuk,\n", + " Ana Konjuh,\n", + " Petra Martic,\n", + " Madison Keys,\n", + " Irina-Camelia Begu,\n", + " Sloane Stephens,\n", + " Madison Brengle,\n", + " Ann Li,\n", + " Elena-Gabriela Ruse,\n", + " Alize Cornet,\n", + " Arantxa Rus,\n", + " Anastasija Sevastova,\n", + " Karolina Muchova,\n", + " Jaqueline Cristian,\n", + " Andrea Petkovic,\n", + " Mayar Sherif,\n", + " Maryna Zanevska,\n", + " Caroline Garcia,\n", + " Misaki Doi,\n", + " Aliaksandra Sasnovich,\n", + " Anastasia Potapova,\n", + " Clara Burel,\n", + " Marie Bouzkova,\n", + " Beatriz Haddad Maia,\n", + " Rebecca Peterson,\n", + " Varvara Gracheva,\n", + " Vera Zvonareva,\n", + " Oceane Dodin,\n", + " Anna Bondar,\n", + " Shuai Zhang,\n", + " Claire Liu,\n", + " Danka Kovinic,\n", + " Saisai Zheng,\n", + " Astra Sharma,\n", + " Panna Udvardy,\n", + " Kristina Kucova,\n", + " Lauren Davis,\n", + " Qinwen Zheng,\n", + " Sofia Kenin,\n", + " Anna Karolina Schmiedlova,\n", + " Greet Minnen,\n", + " Naomi Osaka,\n", + " Xinyu Wang,\n", + " Heather Watson,\n", + " Kristina Mladenovic,\n", + " Magdalena Frech,\n", + " Bernarda Pera,\n", + " Martina Trevisan,\n", + " Kaja Juvan]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sopa = soup.select('span.ue-table-ranking__name')\n", + "sopa" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "2c7e3428", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Ashleigh Barty',\n", + " 'Aryna Sabalenka',\n", + " 'Barbora Krejcikova',\n", + " 'Karolina Pliskova',\n", + " 'Paula Badosa Gibert',\n", + " 'Garbiñe Muguruza Blanco',\n", + " 'Maria Sakkari',\n", + " 'Anett Kontaveit',\n", + " 'Iga Swiatek',\n", + " 'Ons Jabeur',\n", + " 'Elena Rybakina',\n", + " 'Emma Raducanu',\n", + " 'Anastasia Pavlyuchenkova',\n", + " 'Elina Svitolina',\n", + " 'Cori Gauff',\n", + " 'Belinda Bencic',\n", + " 'Daria Kasatkina',\n", + " 'Angelique Kerber',\n", + " 'Leylah Annie Fernandez',\n", + " 'Victoria Azarenka',\n", + " 'Jessica Pegula',\n", + " 'Petra Kvitova',\n", + " 'Jelena Ostapenko',\n", + " 'Tamara Zidansek',\n", + " 'Simona Halep',\n", + " 'Elise Mertens',\n", + " 'Danielle Collins',\n", + " 'Jil Belen Teichmann',\n", + " 'Veronika Kudermetova',\n", + " 'Camila Giorgi',\n", + " 'Sara Sorribes Tormo',\n", + " 'Ludmila Samsonova',\n", + " 'Viktorija Golubic',\n", + " 'Clara Tauson',\n", + " 'Sorana Cirstea',\n", + " 'Shelby Rogers',\n", + " 'Ajla Tomljanovic',\n", + " 'Bianca Andreescu',\n", + " 'Yulia Putintseva',\n", + " 'Tereza Martincova',\n", + " 'Ekaterina Alexandrova',\n", + " 'Katerina Siniakova',\n", + " 'Marketa Vondrousova',\n", + " 'Anhelina Kalinina',\n", + " 'Jasmine Paolini',\n", + " 'Maria Camila Osorio Serrano',\n", + " 'Alison Van Uytvanck',\n", + " 'Magda Linette',\n", + " 'Amanda Anisimova',\n", + " 'Alison Riske',\n", + " 'Nuria Parrizas Diaz',\n", + " 'Marta Kostyuk',\n", + " 'Ana Konjuh',\n", + " 'Petra Martic',\n", + " 'Madison Keys',\n", + " 'Irina-Camelia Begu',\n", + " 'Sloane Stephens',\n", + " 'Madison Brengle',\n", + " 'Ann Li',\n", + " 'Elena-Gabriela Ruse',\n", + " 'Alize Cornet',\n", + " 'Arantxa Rus',\n", + " 'Anastasija Sevastova',\n", + " 'Karolina Muchova',\n", + " 'Jaqueline Cristian',\n", + " 'Andrea Petkovic',\n", + " 'Mayar Sherif',\n", + " 'Maryna Zanevska',\n", + " 'Caroline Garcia',\n", + " 'Misaki Doi',\n", + " 'Aliaksandra Sasnovich',\n", + " 'Anastasia Potapova',\n", + " 'Clara Burel',\n", + " 'Marie Bouzkova',\n", + " 'Beatriz Haddad Maia',\n", + " 'Rebecca Peterson',\n", + " 'Varvara Gracheva',\n", + " 'Vera Zvonareva',\n", + " 'Oceane Dodin',\n", + " 'Anna Bondar',\n", + " 'Shuai Zhang',\n", + " 'Claire Liu',\n", + " 'Danka Kovinic',\n", + " 'Saisai Zheng',\n", + " 'Astra Sharma',\n", + " 'Panna Udvardy',\n", + " 'Kristina Kucova',\n", + " 'Lauren Davis',\n", + " 'Qinwen Zheng',\n", + " 'Sofia Kenin',\n", + " 'Anna Karolina Schmiedlova',\n", + " 'Greet Minnen',\n", + " 'Naomi Osaka',\n", + " 'Xinyu Wang',\n", + " 'Heather Watson',\n", + " 'Kristina Mladenovic',\n", + " 'Magdalena Frech',\n", + " 'Bernarda Pera',\n", + " 'Martina Trevisan',\n", + " 'Kaja Juvan']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Nombres = [element.text for element in sopa]\n", + "Nombres" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "2750f1ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[6341,\n", + " 5468,\n", + " 5173,\n", + " 4452,\n", + " 4199,\n", + " 4136,\n", + " 3901,\n", + " 3811,\n", + " 3686,\n", + " 3125,\n", + " 2645,\n", + " 2604,\n", + " 2548,\n", + " 2471,\n", + " 2425,\n", + " 2355,\n", + " 2330,\n", + " 2287,\n", + " 2249,\n", + " 2226,\n", + " 2114,\n", + " 2110,\n", + " 2035,\n", + " 1890,\n", + " 1887,\n", + " 1861,\n", + " 1851,\n", + " 1645,\n", + " 1635,\n", + " 1632,\n", + " 1588,\n", + " 1477,\n", + " 1442,\n", + " 1419,\n", + " 1382,\n", + " 1353,\n", + " 1335,\n", + " 1308,\n", + " 1280,\n", + " 1248,\n", + " 1236,\n", + " 1220,\n", + " 1217,\n", + " 1168,\n", + " 1153,\n", + " 1147,\n", + " 1143,\n", + " 1142,\n", + " 1120,\n", + " 1101,\n", + " 1083,\n", + " 1075,\n", + " 1043,\n", + " 1037,\n", + " 1033,\n", + " 1033,\n", + " 1023,\n", + " 1002,\n", + " 999,\n", + " 991,\n", + " 990,\n", + " 958,\n", + " 956,\n", + " 954,\n", + " 953,\n", + " 949,\n", + " 949,\n", + " 932,\n", + " 930,\n", + " 926,\n", + " 923,\n", + " 895,\n", + " 884,\n", + " 863,\n", + " 853,\n", + " 851,\n", + " 847,\n", + " 846,\n", + " 846,\n", + " 840,\n", + " 826,\n", + " 818,\n", + " 814,\n", + " 808,\n", + " 806,\n", + " 801,\n", + " 799,\n", + " 792,\n", + " 778,\n", + " 772,\n", + " 772,\n", + " 767,\n", + " 766,\n", + " 766,\n", + " 759,\n", + " 740,\n", + " 734,\n", + " 728,\n", + " 713,\n", + " 698]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sopa2 = soup.select('td.ue-table-ranking__td.is-marked')\n", + "sopa2" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "306e92a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['6341',\n", + " '5468',\n", + " '5173',\n", + " '4452',\n", + " '4199',\n", + " '4136',\n", + " '3901',\n", + " '3811',\n", + " '3686',\n", + " '3125',\n", + " '2645',\n", + " '2604',\n", + " '2548',\n", + " '2471',\n", + " '2425',\n", + " '2355',\n", + " '2330',\n", + " '2287',\n", + " '2249',\n", + " '2226',\n", + " '2114',\n", + " '2110',\n", + " '2035',\n", + " '1890',\n", + " '1887',\n", + " '1861',\n", + " '1851',\n", + " '1645',\n", + " '1635',\n", + " '1632',\n", + " '1588',\n", + " '1477',\n", + " '1442',\n", + " '1419',\n", + " '1382',\n", + " '1353',\n", + " '1335',\n", + " '1308',\n", + " '1280',\n", + " '1248',\n", + " '1236',\n", + " '1220',\n", + " '1217',\n", + " '1168',\n", + " '1153',\n", + " '1147',\n", + " '1143',\n", + " '1142',\n", + " '1120',\n", + " '1101',\n", + " '1083',\n", + " '1075',\n", + " '1043',\n", + " '1037',\n", + " '1033',\n", + " '1033',\n", + " '1023',\n", + " '1002',\n", + " '999',\n", + " '991',\n", + " '990',\n", + " '958',\n", + " '956',\n", + " '954',\n", + " '953',\n", + " '949',\n", + " '949',\n", + " '932',\n", + " '930',\n", + " '926',\n", + " '923',\n", + " '895',\n", + " '884',\n", + " '863',\n", + " '853',\n", + " '851',\n", + " '847',\n", + " '846',\n", + " '846',\n", + " '840',\n", + " '826',\n", + " '818',\n", + " '814',\n", + " '808',\n", + " '806',\n", + " '801',\n", + " '799',\n", + " '792',\n", + " '778',\n", + " '772',\n", + " '772',\n", + " '767',\n", + " '766',\n", + " '766',\n", + " '759',\n", + " '740',\n", + " '734',\n", + " '728',\n", + " '713',\n", + " '698']" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Points = [element.text for element in sopa2]\n", + "Points" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "ee0b7234", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamesPoints
0Ashleigh Barty6341
1Aryna Sabalenka5468
2Barbora Krejcikova5173
3Karolina Pliskova4452
4Paula Badosa Gibert4199
.........
95Kristina Mladenovic740
96Magdalena Frech734
97Bernarda Pera728
98Martina Trevisan713
99Kaja Juvan698
\n", + "

100 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Names Points\n", + "0 Ashleigh Barty 6341\n", + "1 Aryna Sabalenka 5468\n", + "2 Barbora Krejcikova 5173\n", + "3 Karolina Pliskova 4452\n", + "4 Paula Badosa Gibert 4199\n", + ".. ... ...\n", + "95 Kristina Mladenovic 740\n", + "96 Magdalena Frech 734\n", + "97 Bernarda Pera 728\n", + "98 Martina Trevisan 713\n", + "99 Kaja Juvan 698\n", + "\n", + "[100 rows x 2 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Ranking_WTA = pd.DataFrame(zip(Nombres, Points), columns=['Names', 'Points'])\n", + "Ranking_WTA" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "979f503f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NamesPoints
0Ashleigh Barty6341
1Aryna Sabalenka5468
2Barbora Krejcikova5173
3Karolina Pliskova4452
4Paula Badosa Gibert4199
5Garbiñe Muguruza Blanco4136
6Maria Sakkari3901
7Anett Kontaveit3811
8Iga Swiatek3686
9Ons Jabeur3125
10Elena Rybakina2645
11Emma Raducanu2604
12Anastasia Pavlyuchenkova2548
13Elina Svitolina2471
14Cori Gauff2425
15Belinda Bencic2355
16Daria Kasatkina2330
17Angelique Kerber2287
18Leylah Annie Fernandez2249
19Victoria Azarenka2226
20Jessica Pegula2114
21Petra Kvitova2110
22Jelena Ostapenko2035
23Tamara Zidansek1890
24Simona Halep1887
25Elise Mertens1861
26Danielle Collins1851
27Jil Belen Teichmann1645
28Veronika Kudermetova1635
29Camila Giorgi1632
30Sara Sorribes Tormo1588
31Ludmila Samsonova1477
32Viktorija Golubic1442
33Clara Tauson1419
34Sorana Cirstea1382
35Shelby Rogers1353
36Ajla Tomljanovic1335
37Bianca Andreescu1308
38Yulia Putintseva1280
39Tereza Martincova1248
40Ekaterina Alexandrova1236
41Katerina Siniakova1220
42Marketa Vondrousova1217
43Anhelina Kalinina1168
44Jasmine Paolini1153
45Maria Camila Osorio Serrano1147
46Alison Van Uytvanck1143
47Magda Linette1142
48Amanda Anisimova1120
49Alison Riske1101
50Nuria Parrizas Diaz1083
\n", + "
" + ], + "text/plain": [ + " Names Points\n", + "0 Ashleigh Barty 6341\n", + "1 Aryna Sabalenka 5468\n", + "2 Barbora Krejcikova 5173\n", + "3 Karolina Pliskova 4452\n", + "4 Paula Badosa Gibert 4199\n", + "5 Garbiñe Muguruza Blanco 4136\n", + "6 Maria Sakkari 3901\n", + "7 Anett Kontaveit 3811\n", + "8 Iga Swiatek 3686\n", + "9 Ons Jabeur 3125\n", + "10 Elena Rybakina 2645\n", + "11 Emma Raducanu 2604\n", + "12 Anastasia Pavlyuchenkova 2548\n", + "13 Elina Svitolina 2471\n", + "14 Cori Gauff 2425\n", + "15 Belinda Bencic 2355\n", + "16 Daria Kasatkina 2330\n", + "17 Angelique Kerber 2287\n", + "18 Leylah Annie Fernandez 2249\n", + "19 Victoria Azarenka 2226\n", + "20 Jessica Pegula 2114\n", + "21 Petra Kvitova 2110\n", + "22 Jelena Ostapenko 2035\n", + "23 Tamara Zidansek 1890\n", + "24 Simona Halep 1887\n", + "25 Elise Mertens 1861\n", + "26 Danielle Collins 1851\n", + "27 Jil Belen Teichmann 1645\n", + "28 Veronika Kudermetova 1635\n", + "29 Camila Giorgi 1632\n", + "30 Sara Sorribes Tormo 1588\n", + "31 Ludmila Samsonova 1477\n", + "32 Viktorija Golubic 1442\n", + "33 Clara Tauson 1419\n", + "34 Sorana Cirstea 1382\n", + "35 Shelby Rogers 1353\n", + "36 Ajla Tomljanovic 1335\n", + "37 Bianca Andreescu 1308\n", + "38 Yulia Putintseva 1280\n", + "39 Tereza Martincova 1248\n", + "40 Ekaterina Alexandrova 1236\n", + "41 Katerina Siniakova 1220\n", + "42 Marketa Vondrousova 1217\n", + "43 Anhelina Kalinina 1168\n", + "44 Jasmine Paolini 1153\n", + "45 Maria Camila Osorio Serrano 1147\n", + "46 Alison Van Uytvanck 1143\n", + "47 Magda Linette 1142\n", + "48 Amanda Anisimova 1120\n", + "49 Alison Riske 1101\n", + "50 Nuria Parrizas Diaz 1083" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Rankin_WTA.head(51)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ec4856c", + "metadata": {}, + "outputs": [], + "source": [ + "Ranking_WTA.head(51).to_csv('Ranking.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "01ac6923", + "metadata": {}, + "source": [ + "# Proyecto API's" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "d2bd7c88", + "metadata": {}, + "outputs": [], + "source": [ + "response = requests.get('https://officeapi.dev/api/characters')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "16fee34c", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': [{'_id': '5e93b4a43af44260882e33b0',\n", + " 'firstname': 'Michael',\n", + " 'lastname': 'Scott',\n", + " '__v': 0},\n", + " {'_id': '5e93b4f03af44260882e33b1',\n", + " 'firstname': 'Jim',\n", + " 'lastname': 'Halpert',\n", + " '__v': 0},\n", + " {'_id': '5e93b4fa3af44260882e33b2',\n", + " 'firstname': 'Dwight',\n", + " 'lastname': 'Schrute',\n", + " '__v': 0},\n", + " {'_id': '5e93b50a3af44260882e33b3',\n", + " 'firstname': 'Pam',\n", + " 'lastname': 'Beesly',\n", + " '__v': 0},\n", + " {'_id': '5e93b5183af44260882e33b4',\n", + " 'firstname': 'Ryan',\n", + " 'lastname': 'Howard',\n", + " '__v': 0},\n", + " {'_id': '5e93b51e3af44260882e33b5',\n", + " 'firstname': 'Kelly',\n", + " 'lastname': 'Kapoor',\n", + " '__v': 0},\n", + " {'_id': '5e93b52b3af44260882e33b6',\n", + " 'firstname': 'Angela',\n", + " 'lastname': 'Martin',\n", + " '__v': 0},\n", + " {'_id': '5e93b5323af44260882e33b7',\n", + " 'firstname': 'Kevin',\n", + " 'lastname': 'Malone',\n", + " '__v': 0},\n", + " {'_id': '5e93b53b3af44260882e33b8',\n", + " 'firstname': 'Oscar',\n", + " 'lastname': 'Martinez',\n", + " '__v': 0},\n", + " {'_id': '5e93b5453af44260882e33b9',\n", + " 'firstname': 'Andy',\n", + " 'lastname': 'Bernard',\n", + " '__v': 0},\n", + " {'_id': '5e93b54d3af44260882e33ba',\n", + " 'firstname': 'Stanley',\n", + " 'lastname': 'Hudson',\n", + " '__v': 0},\n", + " {'_id': '5e93b5583af44260882e33bb',\n", + " 'firstname': 'Phyllis',\n", + " 'lastname': 'Lapin',\n", + " '__v': 0},\n", + " {'_id': '5e93b55f3af44260882e33bc',\n", + " 'firstname': 'Toby',\n", + " 'lastname': 'Flenderson',\n", + " '__v': 0},\n", + " {'_id': '5e93b5653af44260882e33bd',\n", + " 'firstname': 'Erin',\n", + " 'lastname': 'Hannon',\n", + " '__v': 0},\n", + " {'_id': '5e93b56d3af44260882e33be',\n", + " 'firstname': 'Gabe',\n", + " 'lastname': 'Lewis',\n", + " '__v': 0},\n", + " {'_id': '5e93b5783af44260882e33bf',\n", + " 'firstname': 'Darryl',\n", + " 'lastname': 'Philbin',\n", + " '__v': 0},\n", + " {'_id': '5e93b5813af44260882e33c0',\n", + " 'firstname': 'Creed',\n", + " 'lastname': 'Bratton',\n", + " '__v': 0},\n", + " {'_id': '5e93b58b3af44260882e33c1',\n", + " 'firstname': 'Jo',\n", + " 'lastname': 'Bennett',\n", + " '__v': 0},\n", + " {'_id': '5e93b5913af44260882e33c2',\n", + " 'firstname': 'Holly',\n", + " 'lastname': 'Flax',\n", + " '__v': 0},\n", + " {'_id': '5e93b5983af44260882e33c3',\n", + " 'firstname': 'Jan',\n", + " 'lastname': 'Levinson',\n", + " '__v': 0},\n", + " {'_id': '5e93b5a13af44260882e33c4',\n", + " 'firstname': 'Todd',\n", + " 'lastname': 'Packer',\n", + " '__v': 0},\n", + " {'_id': '5e93b5a73af44260882e33c5',\n", + " 'firstname': 'Charles',\n", + " 'lastname': 'Minor',\n", + " '__v': 0},\n", + " {'_id': '5e93b5ad3af44260882e33c6',\n", + " 'firstname': 'Deangelo',\n", + " 'lastname': 'Vickers',\n", + " '__v': 0},\n", + " {'_id': '5e93b5b63af44260882e33c7',\n", + " 'firstname': 'Josh',\n", + " 'lastname': 'Porter',\n", + " '__v': 0},\n", + " {'_id': '5e93b5bb3af44260882e33c8',\n", + " 'firstname': 'Ed',\n", + " 'lastname': 'Truck',\n", + " '__v': 0},\n", + " {'_id': '5e93b5c53af44260882e33c9',\n", + " 'firstname': 'Hunter',\n", + " 'lastname': 'null',\n", + " '__v': 0},\n", + " {'_id': '5e93b5d13af44260882e33ca',\n", + " 'firstname': 'David',\n", + " 'lastname': 'Wallace',\n", + " '__v': 0}]}" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "response.json()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "379af13e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
data
0{'_id': '5e93b4a43af44260882e33b0', 'firstname...
1{'_id': '5e93b4f03af44260882e33b1', 'firstname...
2{'_id': '5e93b4fa3af44260882e33b2', 'firstname...
3{'_id': '5e93b50a3af44260882e33b3', 'firstname...
4{'_id': '5e93b5183af44260882e33b4', 'firstname...
\n", + "
" + ], + "text/plain": [ + " data\n", + "0 {'_id': '5e93b4a43af44260882e33b0', 'firstname...\n", + "1 {'_id': '5e93b4f03af44260882e33b1', 'firstname...\n", + "2 {'_id': '5e93b4fa3af44260882e33b2', 'firstname...\n", + "3 {'_id': '5e93b50a3af44260882e33b3', 'firstname...\n", + "4 {'_id': '5e93b5183af44260882e33b4', 'firstname..." + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "the_office = pd.DataFrame(response.json())\n", + "the_office.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "60525968", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idfirstnamelastname__v
05e93b4a43af44260882e33b0MichaelScott0
15e93b4f03af44260882e33b1JimHalpert0
25e93b4fa3af44260882e33b2DwightSchrute0
35e93b50a3af44260882e33b3PamBeesly0
45e93b5183af44260882e33b4RyanHoward0
\n", + "
" + ], + "text/plain": [ + " _id firstname lastname __v\n", + "0 5e93b4a43af44260882e33b0 Michael Scott 0\n", + "1 5e93b4f03af44260882e33b1 Jim Halpert 0\n", + "2 5e93b4fa3af44260882e33b2 Dwight Schrute 0\n", + "3 5e93b50a3af44260882e33b3 Pam Beesly 0\n", + "4 5e93b5183af44260882e33b4 Ryan Howard 0" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "the_office1 = pd.json_normalize(the_office['data'])\n", + "the_office1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "c2b5a3b0", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
firstnamelastname
0MichaelScott
1JimHalpert
2DwightSchrute
3PamBeesly
4RyanHoward
5KellyKapoor
6AngelaMartin
7KevinMalone
8OscarMartinez
9AndyBernard
10StanleyHudson
11PhyllisLapin
12TobyFlenderson
13ErinHannon
14GabeLewis
15DarrylPhilbin
16CreedBratton
17JoBennett
18HollyFlax
19JanLevinson
20ToddPacker
21CharlesMinor
22DeangeloVickers
23JoshPorter
24EdTruck
25Hunternull
26DavidWallace
\n", + "
" + ], + "text/plain": [ + " firstname lastname\n", + "0 Michael Scott\n", + "1 Jim Halpert\n", + "2 Dwight Schrute\n", + "3 Pam Beesly\n", + "4 Ryan Howard\n", + "5 Kelly Kapoor\n", + "6 Angela Martin\n", + "7 Kevin Malone\n", + "8 Oscar Martinez\n", + "9 Andy Bernard\n", + "10 Stanley Hudson\n", + "11 Phyllis Lapin\n", + "12 Toby Flenderson\n", + "13 Erin Hannon\n", + "14 Gabe Lewis\n", + "15 Darryl Philbin\n", + "16 Creed Bratton\n", + "17 Jo Bennett\n", + "18 Holly Flax\n", + "19 Jan Levinson\n", + "20 Todd Packer\n", + "21 Charles Minor\n", + "22 Deangelo Vickers\n", + "23 Josh Porter\n", + "24 Ed Truck\n", + "25 Hunter null\n", + "26 David Wallace" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Characters = the_office1[['firstname', 'lastname']]\n", + "Characters" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "6b553ff5", + "metadata": {}, + "outputs": [], + "source": [ + "Characters.to_csv('Characters.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "1c31f18e", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Michael',\n", + " 'Jim',\n", + " 'Dwight',\n", + " 'Pam',\n", + " 'Ryan',\n", + " 'Kelly',\n", + " 'Angela',\n", + " 'Kevin',\n", + " 'Oscar',\n", + " 'Andy',\n", + " 'Stanley',\n", + " 'Phyllis',\n", + " 'Toby',\n", + " 'Erin',\n", + " 'Gabe',\n", + " 'Darryl',\n", + " 'Creed',\n", + " 'Jo',\n", + " 'Holly',\n", + " 'Jan',\n", + " 'Todd',\n", + " 'Charles',\n", + " 'Deangelo',\n", + " 'Josh',\n", + " 'Ed',\n", + " 'Hunter',\n", + " 'David']" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Nombres = list(the_office1['firstname'])\n", + "Nombres" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "53fd690d", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Scott',\n", + " 'Halpert',\n", + " 'Schrute',\n", + " 'Beesly',\n", + " 'Howard',\n", + " 'Kapoor',\n", + " 'Martin',\n", + " 'Malone',\n", + " 'Martinez',\n", + " 'Bernard',\n", + " 'Hudson',\n", + " 'Lapin',\n", + " 'Flenderson',\n", + " 'Hannon',\n", + " 'Lewis',\n", + " 'Philbin',\n", + " 'Bratton',\n", + " 'Bennett',\n", + " 'Flax',\n", + " 'Levinson',\n", + " 'Packer',\n", + " 'Minor',\n", + " 'Vickers',\n", + " 'Porter',\n", + " 'Truck',\n", + " 'null',\n", + " 'Wallace']" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Apellidos = list(the_office1['lastname'])\n", + "Apellidos" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "682bf33b", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Michael', 'Scott'),\n", + " ('Jim', 'Halpert'),\n", + " ('Dwight', 'Schrute'),\n", + " ('Pam', 'Beesly'),\n", + " ('Ryan', 'Howard'),\n", + " ('Kelly', 'Kapoor'),\n", + " ('Angela', 'Martin'),\n", + " ('Kevin', 'Malone'),\n", + " ('Oscar', 'Martinez'),\n", + " ('Andy', 'Bernard'),\n", + " ('Stanley', 'Hudson'),\n", + " ('Phyllis', 'Lapin'),\n", + " ('Toby', 'Flenderson'),\n", + " ('Erin', 'Hannon'),\n", + " ('Gabe', 'Lewis'),\n", + " ('Darryl', 'Philbin'),\n", + " ('Creed', 'Bratton'),\n", + " ('Jo', 'Bennett'),\n", + " ('Holly', 'Flax'),\n", + " ('Jan', 'Levinson'),\n", + " ('Todd', 'Packer'),\n", + " ('Charles', 'Minor'),\n", + " ('Deangelo', 'Vickers'),\n", + " ('Josh', 'Porter'),\n", + " ('Ed', 'Truck'),\n", + " ('Hunter', 'null'),\n", + " ('David', 'Wallace')]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Nombres_completos = list(zip(Nombres, Apellidos))\n", + "Nombres_completos" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "e23dc07c", + "metadata": {}, + "outputs": [], + "source": [ + "np.savetxt(\"Nombres_completos.csv\", Nombres_completos, fmt='% s')" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "31322cd1", + "metadata": {}, + "outputs": [], + "source": [ + "episodios = requests.get('https://officeapi.dev/api/episodes')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "5865fd15", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': [{'_id': '5e94d646f733a1332868e1dc',\n", + " 'title': 'Pilot',\n", + " 'description': 'A documentary crew gives a firsthand introduction to the staff of the Scranton branch of the Dunder Mifflin Paper Company, managed by Michael Scott.',\n", + " 'writer': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-03-24T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5e94d6a0f733a1332868e1dd',\n", + " 'title': 'Diversity Day',\n", + " 'description': \"After the corporate office discovers that Michael has been making insensitive remarks to his co-workers, they send a sensitivity trainer to the Scranton branch, but when Michael doesn't agree with everything the trainer says, he sets up his own sensitivity workshop.\",\n", + " 'writer': {'_id': '5e9523bc9511994a07f9a314',\n", + " 'name': 'B.J. Novak',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-03-29T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8f70a88792d4b2bee2ead',\n", + " 'title': 'Health Care',\n", + " 'description': 'Dwight ends up in charge of picking a new health care plan.',\n", + " 'writer': {'_id': '5e9523d69511994a07f9a315',\n", + " 'name': 'Paul Lieberstein',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523ef9511994a07f9a316',\n", + " 'name': 'Ken Whittingham',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-04-05T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8f75188792d4b2bee2eae',\n", + " 'title': 'The Alliance',\n", + " 'description': 'With Dwight worried about downsizing, Jim agrees to an alliance; Michael tries to increase moral be throwing a birthday party for Meredith.',\n", + " 'writer': {'_id': '5e9524139511994a07f9a317',\n", + " 'name': 'Michael Schur',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95241e9511994a07f9a318',\n", + " 'name': 'Bryan Gordon',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-04-12T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8f76d88792d4b2bee2eaf',\n", + " 'title': 'Basketball',\n", + " 'description': 'Michael challenges the warehouse staff to a basketball game versus the office staff.',\n", + " 'writer': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-04-19T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8f8a1e9f68d4b6f809c48',\n", + " 'title': 'Hot Girl',\n", + " 'description': 'Several male staff members vie for the attention of an attractive purse sales woman.',\n", + " 'writer': {'_id': '5e95245e9511994a07f9a31a',\n", + " 'name': 'Mindy Kaling',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95246d9511994a07f9a31b',\n", + " 'name': 'Amy Heckerling',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-04-26T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fb2cddecdf4bb7e9181f',\n", + " 'title': 'The Dundies',\n", + " 'description': 'Michael hosts the annual Dundies awards.',\n", + " 'writer': {'_id': '5e95245e9511994a07f9a31a',\n", + " 'name': 'Mindy Kaling',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-09-20T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fbf9ddecdf4bb7e91820',\n", + " 'title': 'Sexual Harassment',\n", + " 'description': \"Michael's continual sending of explicit e-mail forwards forces Dunder Mifflin to review it's sexual harassment policy; Pam's mom makes a visit to the office.\",\n", + " 'writer': {'_id': '5e9523bc9511994a07f9a314',\n", + " 'name': 'B.J. Novak',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-09-27T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fc28ddecdf4bb7e91821',\n", + " 'title': 'Office Olympics',\n", + " 'description': 'With Michael and Dwight out of the house, Jim organizes an office Olympics.',\n", + " 'writer': {'_id': '5e9524139511994a07f9a317',\n", + " 'name': 'Michael Schur',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524809511994a07f9a31c',\n", + " 'name': 'Paul Feig',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-10-04T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fc87ddecdf4bb7e91822',\n", + " 'title': 'The Fire',\n", + " 'description': 'A fire starts in the office kitchen.',\n", + " 'writer': {'_id': '5e9523bc9511994a07f9a314',\n", + " 'name': 'B.J. Novak',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-10-11T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fcf4ddecdf4bb7e91823',\n", + " 'title': 'Halloween',\n", + " 'description': 'Michael is pressured by corporate to fire someone, which puts a damper on the office Halloween party.',\n", + " 'writer': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524809511994a07f9a31c',\n", + " 'name': 'Paul Feig',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-10-18T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fd39ddecdf4bb7e91824',\n", + " 'title': 'The Fight',\n", + " 'description': \"Jim rallies the staff together to witness a showdown between Michael and Dwight at Dwight's martial arts school.\",\n", + " 'writer': {'_id': '5e9524949511994a07f9a31d',\n", + " 'name': 'Gene Stupnitsky',\n", + " 'role': 'Writer',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-11-01T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fd85ddecdf4bb7e91825',\n", + " 'title': 'The Client',\n", + " 'description': \"Jan helps Michael with an important pitch for a client; The staff discovers a screenplay in Michael's office.\",\n", + " 'writer': {'_id': '5e9523d69511994a07f9a315',\n", + " 'name': 'Paul Lieberstein',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-11-08T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fdc6ddecdf4bb7e91826',\n", + " 'title': 'Performance Review',\n", + " 'description': 'Instead of talking to his employees about their work, Michael uses their annual review time to gather feedback on his \"relationship\" with Jan.',\n", + " 'writer': {'_id': '5e9524d09511994a07f9a31f',\n", + " 'name': 'Larry Wilmore',\n", + " 'role': 'Writer',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524809511994a07f9a31c',\n", + " 'name': 'Paul Feig',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-11-15T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fe0addecdf4bb7e91827',\n", + " 'title': 'E-Mail Surveillance',\n", + " 'description': 'Michael decides to set up e-mail surveillance for all company e-mail, and discovers that Jim has invited everyone over for a barbeque except him.',\n", + " 'writer': {'_id': '5e9524df9511994a07f9a320',\n", + " 'name': 'Jennifer Celotta',\n", + " 'role': 'Writer',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524809511994a07f9a31c',\n", + " 'name': 'Paul Feig',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-11-22T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fe4bddecdf4bb7e91828',\n", + " 'title': 'Christmas Party',\n", + " 'description': 'At the office Christmas party, Michael changes secret santa into a gift exchange.',\n", + " 'writer': {'_id': '5e9524139511994a07f9a317',\n", + " 'name': 'Michael Schur',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524f49511994a07f9a321',\n", + " 'name': 'Charles McDougall',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2005-12-06T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fe89ddecdf4bb7e91829',\n", + " 'title': 'Booze Cruise',\n", + " 'description': 'The Dunder Mifflin staff go on a mandatory \"booze cruise\" to hear Michael give a motivational talk.',\n", + " 'writer': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-01-05T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8fef0ddecdf4bb7e9182a',\n", + " 'title': 'The Injury',\n", + " 'description': 'Michael\\'s \"injury\" from a George Foreman Grill distracts the staff from Dwight, the one with the real injury.',\n", + " 'writer': {'_id': '5e95245e9511994a07f9a31a',\n", + " 'name': 'Mindy Kaling',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95241e9511994a07f9a318',\n", + " 'name': 'Bryan Gordon',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-01-12T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8ff34ddecdf4bb7e9182b',\n", + " 'title': 'The Secret',\n", + " 'description': 'Jim is forced into spending time with Michael so that he will not reveal Jim\\'s feelings for Pam. Meanwhile, Oscar takes a \"sick day\" and Dwight investigates whether he is actually sick.',\n", + " 'writer': {'_id': '5e9524949511994a07f9a31d',\n", + " 'name': 'Gene Stupnitsky',\n", + " 'role': 'Writer',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9525109511994a07f9a322',\n", + " 'name': 'Dennie Gordon',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-01-19T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8ff73ddecdf4bb7e9182c',\n", + " 'title': 'The Carpet',\n", + " 'description': \"Michael's office becomes the victim of an odorous prank which subjects the office to his punishment.\",\n", + " 'writer': {'_id': '5e9523d69511994a07f9a315',\n", + " 'name': 'Paul Lieberstein',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9525459511994a07f9a323',\n", + " 'name': 'Victor Nelli, Jr',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-01-26T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8ffa6ddecdf4bb7e9182d',\n", + " 'title': 'Boys and Girls',\n", + " 'description': \"When Jan comes to the office for a women in the office session, Michael stages a men's only session.\",\n", + " 'writer': {'_id': '5e9523bc9511994a07f9a314',\n", + " 'name': 'B.J. Novak',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9525109511994a07f9a322',\n", + " 'name': 'Dennie Gordon',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-02-02T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea8ffd7ddecdf4bb7e9182e',\n", + " 'title': \"Valentine's Day\",\n", + " 'description': \"Michael has a big meeting in New York with Jan and the companies new CEO; Pam deals with all the Valentine's Day deliveries hoping Roy will remember to send her something.\",\n", + " 'writer': {'_id': '5e9524139511994a07f9a317',\n", + " 'name': 'Michael Schur',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-02-09T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea9000eddecdf4bb7e9182f',\n", + " 'title': \"Dwight's Speech\",\n", + " 'description': 'Michael coaches pompous Dwight on the finer art of public speaking after being named as Salesman of the Year.',\n", + " 'writer': {'_id': '5e9523d69511994a07f9a315',\n", + " 'name': 'Paul Lieberstein',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524f49511994a07f9a321',\n", + " 'name': 'Charles McDougall',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-03-02T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea9005fddecdf4bb7e91830',\n", + " 'title': 'Take Your Daughter to Work Day',\n", + " 'description': 'Staff members bring their children to work. Michael is surprised when he strikes up a friendship with the five-year old daughter of his sworn enemy, Toby.',\n", + " 'writer': {'_id': '5e95245e9511994a07f9a31a',\n", + " 'name': 'Mindy Kaling',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9525459511994a07f9a323',\n", + " 'name': 'Victor Nelli, Jr',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-03-16T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea90087ddecdf4bb7e91831',\n", + " 'title': \"Michael's Birthday\",\n", + " 'description': \"Michael's birthday coincides with Kevin awaiting the news to see if he has skin cancer.\",\n", + " 'writer': {'_id': '5e9524949511994a07f9a31d',\n", + " 'name': 'Gene Stupnitsky',\n", + " 'role': 'Writer',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523ef9511994a07f9a316',\n", + " 'name': 'Ken Whittingham',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-03-30T06:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea900b4ddecdf4bb7e91832',\n", + " 'title': 'Drug Testing',\n", + " 'description': 'After Dwight finds a joint in the office park, he goes overboard with an investigation.',\n", + " 'writer': {'_id': '5e9524df9511994a07f9a320',\n", + " 'name': 'Jennifer Celotta',\n", + " 'role': 'Writer',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-04-27T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea900edddecdf4bb7e91833',\n", + " 'title': 'Conflict Resolution',\n", + " 'description': 'When Michael takes over conflict resolution duties from HR, chaos ensues at Dunder Mifflin.',\n", + " 'writer': {'_id': '5e95242f9511994a07f9a319',\n", + " 'name': 'Greg Daniels',\n", + " 'role': 'Writer/Director',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9524f49511994a07f9a321',\n", + " 'name': 'Charles McDougall',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-05-04T05:00:00.000Z',\n", + " '__v': 0},\n", + " {'_id': '5ea90119ddecdf4bb7e91834',\n", + " 'title': 'Casino Night',\n", + " 'description': 'The Dunder Mifflin crew holds a casino party at the warehouse and Michael has two dates.',\n", + " 'writer': {'_id': '5e95258e9511994a07f9a324',\n", + " 'name': 'Steve Carell',\n", + " 'role': 'Writer/Actor',\n", + " '__v': 0},\n", + " 'director': {'_id': '5e9523649511994a07f9a313',\n", + " 'name': 'Ken Kwapis',\n", + " 'role': 'Director',\n", + " '__v': 0},\n", + " 'airDate': '2006-05-11T05:00:00.000Z',\n", + " '__v': 0}]}" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "episodios.json()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "297fd973", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
data
0{'_id': '5e94d646f733a1332868e1dc', 'title': '...
1{'_id': '5e94d6a0f733a1332868e1dd', 'title': '...
2{'_id': '5ea8f70a88792d4b2bee2ead', 'title': '...
3{'_id': '5ea8f75188792d4b2bee2eae', 'title': '...
4{'_id': '5ea8f76d88792d4b2bee2eaf', 'title': '...
\n", + "
" + ], + "text/plain": [ + " data\n", + "0 {'_id': '5e94d646f733a1332868e1dc', 'title': '...\n", + "1 {'_id': '5e94d6a0f733a1332868e1dd', 'title': '...\n", + "2 {'_id': '5ea8f70a88792d4b2bee2ead', 'title': '...\n", + "3 {'_id': '5ea8f75188792d4b2bee2eae', 'title': '...\n", + "4 {'_id': '5ea8f76d88792d4b2bee2eaf', 'title': '..." + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_episodios = pd.DataFrame(episodios.json())\n", + "data_episodios.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "9f6caf76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idtitledescriptionairDate__vwriter._idwriter.namewriter.rolewriter.__vdirector._iddirector.namedirector.roledirector.__v
05e94d646f733a1332868e1dcPilotA documentary crew gives a firsthand introduct...2005-03-24T06:00:00.000Z05e95242f9511994a07f9a319Greg DanielsWriter/Director05e9523649511994a07f9a313Ken KwapisDirector0
15e94d6a0f733a1332868e1ddDiversity DayAfter the corporate office discovers that Mich...2005-03-29T06:00:00.000Z05e9523bc9511994a07f9a314B.J. NovakWriter/Actor05e9523649511994a07f9a313Ken KwapisDirector0
25ea8f70a88792d4b2bee2eadHealth CareDwight ends up in charge of picking a new heal...2005-04-05T05:00:00.000Z05e9523d69511994a07f9a315Paul LiebersteinWriter/Actor05e9523ef9511994a07f9a316Ken WhittinghamDirector0
35ea8f75188792d4b2bee2eaeThe AllianceWith Dwight worried about downsizing, Jim agre...2005-04-12T05:00:00.000Z05e9524139511994a07f9a317Michael SchurWriter/Actor05e95241e9511994a07f9a318Bryan GordonDirector0
45ea8f76d88792d4b2bee2eafBasketballMichael challenges the warehouse staff to a ba...2005-04-19T05:00:00.000Z05e95242f9511994a07f9a319Greg DanielsWriter/Director05e95242f9511994a07f9a319Greg DanielsWriter/Director0
\n", + "
" + ], + "text/plain": [ + " _id title \\\n", + "0 5e94d646f733a1332868e1dc Pilot \n", + "1 5e94d6a0f733a1332868e1dd Diversity Day \n", + "2 5ea8f70a88792d4b2bee2ead Health Care \n", + "3 5ea8f75188792d4b2bee2eae The Alliance \n", + "4 5ea8f76d88792d4b2bee2eaf Basketball \n", + "\n", + " description \\\n", + "0 A documentary crew gives a firsthand introduct... \n", + "1 After the corporate office discovers that Mich... \n", + "2 Dwight ends up in charge of picking a new heal... \n", + "3 With Dwight worried about downsizing, Jim agre... \n", + "4 Michael challenges the warehouse staff to a ba... \n", + "\n", + " airDate __v writer._id writer.name \\\n", + "0 2005-03-24T06:00:00.000Z 0 5e95242f9511994a07f9a319 Greg Daniels \n", + "1 2005-03-29T06:00:00.000Z 0 5e9523bc9511994a07f9a314 B.J. Novak \n", + "2 2005-04-05T05:00:00.000Z 0 5e9523d69511994a07f9a315 Paul Lieberstein \n", + "3 2005-04-12T05:00:00.000Z 0 5e9524139511994a07f9a317 Michael Schur \n", + "4 2005-04-19T05:00:00.000Z 0 5e95242f9511994a07f9a319 Greg Daniels \n", + "\n", + " writer.role writer.__v director._id director.name \\\n", + "0 Writer/Director 0 5e9523649511994a07f9a313 Ken Kwapis \n", + "1 Writer/Actor 0 5e9523649511994a07f9a313 Ken Kwapis \n", + "2 Writer/Actor 0 5e9523ef9511994a07f9a316 Ken Whittingham \n", + "3 Writer/Actor 0 5e95241e9511994a07f9a318 Bryan Gordon \n", + "4 Writer/Director 0 5e95242f9511994a07f9a319 Greg Daniels \n", + "\n", + " director.role director.__v \n", + "0 Director 0 \n", + "1 Director 0 \n", + "2 Director 0 \n", + "3 Director 0 \n", + "4 Writer/Director 0 " + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_episodios1 = pd.json_normalize(data_episodios['data'])\n", + "data_episodios1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "62698127", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Pilot\n", + "1 Diversity Day\n", + "2 Health Care\n", + "3 The Alliance\n", + "4 Basketball\n", + "5 Hot Girl\n", + "6 The Dundies\n", + "7 Sexual Harassment\n", + "8 Office Olympics\n", + "9 The Fire\n", + "10 Halloween\n", + "11 The Fight\n", + "12 The Client\n", + "13 Performance Review\n", + "14 E-Mail Surveillance\n", + "15 Christmas Party\n", + "16 Booze Cruise\n", + "17 The Injury\n", + "18 The Secret\n", + "19 The Carpet\n", + "20 Boys and Girls\n", + "21 Valentine's Day\n", + "22 Dwight's Speech\n", + "23 Take Your Daughter to Work Day\n", + "24 Michael's Birthday\n", + "25 Drug Testing\n", + "26 Conflict Resolution\n", + "27 Casino Night\n", + "Name: title, dtype: object" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titulos = data_episodios1['title']\n", + "titulos" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "2474ca19", + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Basketball',\n", + " 'Booze Cruise',\n", + " 'Boys and Girls',\n", + " 'Casino Night',\n", + " 'Christmas Party',\n", + " 'Conflict Resolution',\n", + " 'Diversity Day',\n", + " 'Drug Testing',\n", + " \"Dwight's Speech\",\n", + " 'E-Mail Surveillance',\n", + " 'Halloween',\n", + " 'Health Care',\n", + " 'Hot Girl',\n", + " \"Michael's Birthday\",\n", + " 'Office Olympics',\n", + " 'Performance Review',\n", + " 'Pilot',\n", + " 'Sexual Harassment',\n", + " 'Take Your Daughter to Work Day',\n", + " 'The Alliance',\n", + " 'The Carpet',\n", + " 'The Client',\n", + " 'The Dundies',\n", + " 'The Fight',\n", + " 'The Fire',\n", + " 'The Injury',\n", + " 'The Secret',\n", + " \"Valentine's Day\"]" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titulos_alfabeticamente = sorted(titulos)\n", + "titulos_alfabeticamente" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "148dc885", + "metadata": {}, + "outputs": [], + "source": [ + "np.savetxt(\"Titulos.csv\", titulos_alfabeticamente, fmt='% s')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/your-code/README.md b/your-code/README.md new file mode 100644 index 0000000..d3837e5 --- /dev/null +++ b/your-code/README.md @@ -0,0 +1,30 @@ + +# Proyecto Web Scrapping & API´s + +## Introducción + + +El "scraping" web (también llamado "web harvesting", "web data extraction" o incluso "web data mining"), puede definirse como "la construcción de un agente para descargar, analizar y organizar datos de la web de forma automatizada". + +De igual manera hemos aprendido a utilizar las solicitudes de Python a las API y el análisis de las respuestas JSON para extraer la información que necesitas. + +Lo que realizamos para el proyecto de Web Scrapping fue extraer información de un periodico para ubicar a l@s mejores 50 tenistas masculinos y femeninas. + +Para el proyecto de API´s obtuvimos los nombres de todos los personajes de una serie de igual manera obtuvimos los nombres de los episodios ordenados de manera alfabetica. + + +## Desarrollo + +Para el proyecto de WebScrapping se ingreso a la pagina del periodcio Marca, de la cual se extrajo la tabla del ranking de la ATP Y WTA. +De esta manera se obtuvieron los datos buscados sobre l@s 50 mejores tenistas de la actualidad. + +Para el proyecto de API´s se ubico una página API de la serie The Office de la cual utilizando un DataFrame se obtuvieron los nombres de los personajes y el nombres de los episodios- + + + + +## Recursos + +https://www.officeapi.dev/ +https://www.sport.es/es/noticias/tenis/ranking-wta-tenis-2022-actualizado-13100681 +https://www.marca.com/tenis/clasificacion-atp.html