diff --git a/your-code/transfermarkt-scrapp.ipynb b/your-code/transfermarkt-scrapp.ipynb new file mode 100644 index 0000000..0b15183 --- /dev/null +++ b/your-code/transfermarkt-scrapp.ipynb @@ -0,0 +1,4251 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: bs4 in c:\\users\\dell\\anaconda3\\lib\\site-packages (0.0.1)\n", + "Requirement already satisfied: requests in c:\\users\\dell\\anaconda3\\lib\\site-packages (2.21.0)\n", + "Requirement already satisfied: pandas in c:\\users\\dell\\anaconda3\\lib\\site-packages (0.24.2)\n", + "Requirement already satisfied: lxml in c:\\users\\dell\\anaconda3\\lib\\site-packages (4.3.2)\n", + "Requirement already satisfied: html5lib in c:\\users\\dell\\anaconda3\\lib\\site-packages (1.0.1)\n", + "Requirement already satisfied: beautifulsoup4 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from bs4) (4.7.1)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from requests) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from requests) (2019.3.9)\n", + "Requirement already satisfied: urllib3<1.25,>=1.21.1 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from requests) (1.24.1)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from requests) (2.8)\n", + "Requirement already satisfied: python-dateutil>=2.5.0 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas) (2.8.0)\n", + "Requirement already satisfied: pytz>=2011k in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas) (2018.9)\n", + "Requirement already satisfied: numpy>=1.12.0 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas) (1.16.2)\n", + "Requirement already satisfied: webencodings in c:\\users\\dell\\anaconda3\\lib\\site-packages (from html5lib) (0.5.1)\n", + "Requirement already satisfied: six>=1.9 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from html5lib) (1.12.0)\n", + "Requirement already satisfied: soupsieve>=1.2 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from beautifulsoup4->bs4) (1.8)\n" + ] + } + ], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install bs4 requests pandas lxml html5lib\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import re\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 354, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Liga MX Clausura - Valores más altos 19/20 (Vista en detalle) | Transfermarkt\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Elfmeter\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Facebook-Blau\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Facebook-Messenger-Blau\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Facebook-Messenger-Weiss\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Groundhopping\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Laender\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Platzverweis\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Spiele\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Spieler\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Tore\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Torschuetzen\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Trainer\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Twitter-Blau\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Vereine\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Wettbewerbe\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " WhatsApp-gruen\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " alles_gelesen\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " bc-land\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " bc-spieler\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " bc-verein\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " bc-wettbewerb\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " facebook\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " fanticker\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " fanticker_white\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " favoriten\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " google+\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " home\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " instagram\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " kalender_blue\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " kommentare_beitraege_sprechblase\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " linkedin\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " liveticker_white\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " mail-schwarz\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " notification\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " offizielle-homepage\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " passwort\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " pinterest\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " rss\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " schliessen\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " share-blauaufweiss\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " share-weissaufblau\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " snapchat\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " suche-grau\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " suche\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " thread_erstellen\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " twitter\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " vimeo\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " whatsapp\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " xing\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " youtube\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"https://www.transfermarkt.es/liga-mx-clausura/marktwerte/wettbewerb/MEX1/pos//detailpos/0/altersklasse/alle/plus/1\"\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " |\n", + " \n", + " \n", + " \n", + " \n", + " |\n", + " \n", + " \n", + " \n", + " \n", + " |\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \"\"\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \"Inicio\"\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \"País\"\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \"Competición\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \"Club\"\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \"Jugadores\"\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

\n", + " Liga MX Clausura\n", + "

\n", + "
\n", + "
\n", + " \n", + " \"México\"\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \"Liga\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Liga:\n", + " \n", + " Primera División -\n", + " \"México\"\n", + " México\n", + "
\n", + " Cantidad de clubes:\n", + " \n", + " 19 Equipos\n", + "
\n", + " Numero de jugadores:\n", + " \n", + " 541\n", + "
\n", + " Legionarios:\n", + " \n", + " \n", + " 219 Jugadores\n", + " \n", + " \n", + " 40,5%\n", + " \n", + "
\n", + " ø-Valor de mercado:\n", + " \n", + " 1,39 mill. €\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Más títulos:\n", + " \n", + " \n", + " Santos Laguna\n", + " \n", + " \n", + " 5\n", + " \n", + " \n", + " veces\n", + " \n", + "
\n", + " ø-Edad:\n", + " \n", + " 26,8\n", + " \n", + " Años\n", + " \n", + "
\n", + " Campeón actual:\n", + " \n", + " \n", + " Tigres UANL\n", + " \n", + "
\n", + " Jugador más valioso:\n", + " \n", + " \n", + " Maximiliano Meza\n", + " \n", + " \n", + " 14,00 mill. €\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " Valor de mercado total:\n", + "
\n", + " \n", + " \n", + " 753,90\n", + " \n", + " mill. €\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " Valores más altos Liga MX Clausura\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "

\n", + " Competencia de los mejores valores de mercado\n", + "

\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Posición:\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + " Posiciones detalladas:\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + " Grupo de edad:\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " #\n", + " \n", + " Jugadores\n", + " \n", + " Nac.\n", + " \n", + " \n", + " Edad\n", + " \n", + " \n", + " Club\n", + " \n", + " \n", + " Valor más alto de carrera\n", + " \n", + " \n", + " \n", + " Última revisión\n", + " \n", + " \n", + " \n", + " Valor de mercado\n", + " \n", + "
\n", + " 1\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Maximiliano\n", + " \n", + " \n", + " Maximiliano Meza\n", + " \n", + "
\n", + " Medio centro ofensivo\n", + "
\n", + "
\n", + " \"Argentina\"\n", + " \n", + " 26\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 14,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 14,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 2\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Guido\n", + " \n", + " \n", + " Guido Rodríguez\n", + " \n", + "
\n", + " Pivote\n", + "
\n", + "
\n", + " \"Argentina\"\n", + " \n", + " 25\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 10,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 10,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 3\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Rodolfo\n", + " \n", + " \n", + " Rodolfo Pizarro\n", + " \n", + "
\n", + " Medio centro ofensivo\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 25\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 10,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 10,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 4\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Carlos\n", + " \n", + " \n", + " Carlos Salcedo\n", + " \n", + "
\n", + " Defensa central\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 25\n", + " \n", + " \n", + " \"Tigres\n", + " \n", + " \n", + " \n", + " 10,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 10,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 5\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Mateus\n", + " \n", + " \n", + " Mateus Uribe\n", + " \n", + "
\n", + " Mediocentro\n", + "
\n", + "
\n", + " \"Colombia\"\n", + " \n", + " 28\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 6\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Víctor\n", + " \n", + " \n", + " Víctor Guzmán\n", + " \n", + "
\n", + " Medio centro ofensivo\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 24\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 7\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Guido\n", + " \n", + " \n", + " Guido Pizarro\n", + " \n", + "
\n", + " Pivote\n", + "
\n", + "
\n", + " \"Argentina\"\n", + "
\n", + " \"México\"\n", + "
\n", + " 29\n", + " \n", + " \n", + " \"Tigres\n", + " \n", + " \n", + " \n", + " 15,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 8\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Orbelín\n", + " \n", + " \n", + " Orbelín Pineda\n", + " \n", + "
\n", + " Mediocentro\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 23\n", + " \n", + " \n", + " \"CD\n", + " \n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 9\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Edson\n", + " \n", + " \n", + " Edson Álvarez\n", + " \n", + "
\n", + " Defensa central\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 21\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 7,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 10\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Jonathan\n", + " \n", + " \n", + " Jonathan González\n", + " \n", + "
\n", + " Pivote\n", + "
\n", + "
\n", + " \"México\"\n", + "
\n", + " \"Estados\n", + "
\n", + " 20\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 6,50 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 6,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 11\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Guillermo\n", + " \n", + " \n", + " Guillermo Fernández\n", + " \n", + "
\n", + " Medio centro ofensivo\n", + "
\n", + "
\n", + " \"Argentina\"\n", + " \n", + " 27\n", + " \n", + " \n", + " \"CD\n", + " \n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " 18/12/2018\n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 12\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Timothée\n", + " \n", + " \n", + " Timothée Kolodziejczak\n", + " \n", + "
\n", + " Defensa central\n", + "
\n", + "
\n", + " \"Francia\"\n", + "
\n", + " \"Polonia\"\n", + "
\n", + " 27\n", + " \n", + " \n", + " \"Tigres\n", + " \n", + " \n", + " \n", + " 7,00 mill. €\n", + " \n", + " \n", + " 03/06/2019\n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 13\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Rogelio\n", + " \n", + " \n", + " Rogelio Funes Mori\n", + " \n", + "
\n", + " Delantero centro\n", + "
\n", + "
\n", + " \"Argentina\"\n", + " \n", + " 28\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 14\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Nicolás\n", + " \n", + " \n", + " Nicolás Castillo\n", + " \n", + "
\n", + " Delantero centro\n", + "
\n", + "
\n", + " \"Chile\"\n", + " \n", + " 26\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " 11/01/2019\n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 15\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Julio\n", + " \n", + " \n", + " Julio Furch\n", + " \n", + "
\n", + " Delantero centro\n", + "
\n", + "
\n", + " \"Argentina\"\n", + " \n", + " 29\n", + " \n", + " \n", + " \"Santos\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 16\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Bruno\n", + " \n", + " \n", + " Bruno Valdez\n", + " \n", + "
\n", + " Defensa central\n", + "
\n", + "
\n", + " \"Paraguay\"\n", + " \n", + " 26\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 17\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Roger\n", + " \n", + " \n", + " Roger Martínez\n", + " \n", + "
\n", + " Delantero centro\n", + "
\n", + "
\n", + " \"Colombia\"\n", + " \n", + " 25\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 18\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Jesús\n", + " \n", + " \n", + " Jesús Gallardo\n", + " \n", + "
\n", + " Extremo izquierdo\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 24\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 19\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Víctor\n", + " \n", + " \n", + " Víctor Dávila\n", + " \n", + "
\n", + " Delantero centro\n", + "
\n", + "
\n", + " \"Chile\"\n", + " \n", + " 21\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 20\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"César\n", + " \n", + " \n", + " César Montes\n", + " \n", + "
\n", + " Defensa central\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 22\n", + " \n", + " \n", + " \"CF\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 21\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Javier\n", + " \n", + " \n", + " Javier Aquino\n", + " \n", + "
\n", + " Extremo izquierdo\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 29\n", + " \n", + " \n", + " \"Tigres\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 22\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Roberto\n", + " \n", + " \n", + " Roberto Alvarado\n", + " \n", + "
\n", + " Medio centro ofensivo\n", + "
\n", + "
\n", + " \"México\"\n", + " \n", + " 20\n", + " \n", + " \n", + " \"CD\n", + " \n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 5,00 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 23\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Diego\n", + " \n", + " \n", + " Diego Valdés\n", + " \n", + "
\n", + " Medio centro ofensivo\n", + "
\n", + "
\n", + " \"Chile\"\n", + " \n", + " 25\n", + " \n", + " \n", + " \"Santos\n", + " \n", + " \n", + " \n", + " 4,50 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 4,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 24\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Rafael\n", + " \n", + " \n", + " Rafael Carioca\n", + " \n", + "
\n", + " Pivote\n", + "
\n", + "
\n", + " \"Brasil\"\n", + " \n", + " 30\n", + " \n", + " \n", + " \"Tigres\n", + " \n", + " \n", + " \n", + " 6,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 4,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + " 25\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \"Eduardo\n", + " \n", + " \n", + " Eduardo Vargas\n", + " \n", + "
\n", + " Delantero centro\n", + "
\n", + "
\n", + " \"Chile\"\n", + " \n", + " 29\n", + " \n", + " \n", + " \"Tigres\n", + " \n", + " \n", + " \n", + " 11,00 mill. €\n", + " \n", + " \n", + " 08/02/2019\n", + " \n", + " \n", + " 4,50 mill. €\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + " 14000000\n", + " \n", + " \n", + " 10000000\n", + " \n", + " \n", + " 10000000\n", + " \n", + " \n", + " 10000000\n", + " \n", + " \n", + " 7500000\n", + " \n", + " \n", + " 7500000\n", + " \n", + " \n", + " 7500000\n", + " \n", + " \n", + " 7500000\n", + " \n", + " \n", + " 7500000\n", + " \n", + " \n", + " 6500000\n", + " \n", + " \n", + " 6000000\n", + " \n", + " \n", + " 6000000\n", + " \n", + " \n", + " 6000000\n", + " \n", + " \n", + " 6000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 5000000\n", + " \n", + " \n", + " 4500000\n", + " \n", + " \n", + " 4500000\n", + " \n", + " \n", + " 4500000\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \"Transfermarkt\"\n", + " \n", + " \n", + " Aviso legal\n", + " \n", + " |\n", + " \n", + " Política de privacidad\n", + " \n", + " |\n", + " \n", + " Información corporativa\n", + " \n", + " |\n", + " \n", + " Equipo de TM\n", + " \n", + " |\n", + " \n", + " Redes sociales\n", + " \n", + " |\n", + " \n", + " Ayuda\n", + " \n", + " |\n", + " \n", + " Boletín informativo\n", + " \n", + " |\n", + " \n", + " Enviar reporte de errores\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n" + ] + } + ], + "source": [ + "# Step 1: Load the webpage containing the data.\n", + "url = 'https://www.transfermarkt.es/liga-mx-clausura/marktwerte/wettbewerb/MEX1/pos//detailpos/0/altersklasse/alle/plus/1'\n", + "html = requests.get(url).content\n", + "headers = {\"User-Agent\":\"Mozilla/5.0\"}\n", + "response = requests.get(url, headers=headers).content\n", + "soup = BeautifulSoup(response, 'html')\n", + "print(soup.prettify())" + ] + }, + { + "cell_type": "code", + "execution_count": 355, + "metadata": {}, + "outputs": [], + "source": [ + "players_table = soup.select('table.items tr')" + ] + }, + { + "cell_type": "code", + "execution_count": 356, + "metadata": {}, + "outputs": [], + "source": [ + "players_table = [[e for e in p if e!= '\\n'] for p in players_table]" + ] + }, + { + "cell_type": "code", + "execution_count": 357, + "metadata": {}, + "outputs": [], + "source": [ + "players_table1 = [[e.text if e.text else e.img['alt'] for e in p] for p in players_table]" + ] + }, + { + "cell_type": "code", + "execution_count": 358, + "metadata": {}, + "outputs": [], + "source": [ + "players_table_final = [e for e in players_table1[:] if len(e) > 2]" + ] + }, + { + "cell_type": "code", + "execution_count": 359, + "metadata": {}, + "outputs": [], + "source": [ + "name_position = soup.find_all(\"table\", {\"class\": \"inline-table\"})\n", + "name_position_list = [i.text.replace('\\n', '') for i in name_position]" + ] + }, + { + "cell_type": "code", + "execution_count": 360, + "metadata": {}, + "outputs": [], + "source": [ + "nested_position = [i.split()[2:] for i in name_position_list]" + ] + }, + { + "cell_type": "code", + "execution_count": 361, + "metadata": {}, + "outputs": [], + "source": [ + "position_final = [\" \".join(nested_position[i]) for i in range(len(nested_position))]" + ] + }, + { + "cell_type": "code", + "execution_count": 362, + "metadata": {}, + "outputs": [], + "source": [ + "players = soup.find_all(\"a\", {\"class\": \"spielprofil_tooltip\"})\n", + "players_list = [i.text for i in players]\n", + "players_list_f = players_list[1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 363, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(players_table_final[1:], columns=players_table_final[0])\n", + "df.insert(1, 'Name', players_list_f)\n", + "df.insert(2, 'Posición', position_final)" + ] + }, + { + "cell_type": "code", + "execution_count": 365, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NamePosiciónNac.EdadClubValor más alto de carreraÚltima revisiónValor de mercado
01Maximiliano MezaMedio centro ofensivoArgentina26CF Monterrey14,00 mill. €08/02/201914,00 mill. €
12Guido RodríguezPivoteArgentina25CF América10,00 mill. €08/02/201910,00 mill. €
23Rodolfo PizarroMedio centro ofensivoMéxico25CF Monterrey10,00 mill. €08/02/201910,00 mill. €
34Carlos SalcedoDefensa centralMéxico25Tigres UANL10,00 mill. €08/02/201910,00 mill. €
45Mateus UribeMediocentroColombia28CF América7,50 mill. €08/02/20197,50 mill. €
\n", + "
" + ], + "text/plain": [ + " # Name Posición Nac. Edad Club \\\n", + "0 1 Maximiliano Meza Medio centro ofensivo Argentina 26 CF Monterrey \n", + "1 2 Guido Rodríguez Pivote Argentina 25 CF América \n", + "2 3 Rodolfo Pizarro Medio centro ofensivo México 25 CF Monterrey \n", + "3 4 Carlos Salcedo Defensa central México 25 Tigres UANL \n", + "4 5 Mateus Uribe Mediocentro Colombia 28 CF América \n", + "\n", + " Valor más alto de carrera Última revisión Valor de mercado \n", + "0 14,00 mill. € 08/02/2019 14,00 mill. €  \n", + "1 10,00 mill. € 08/02/2019 10,00 mill. €  \n", + "2 10,00 mill. € 08/02/2019 10,00 mill. €  \n", + "3 10,00 mill. € 08/02/2019 10,00 mill. €  \n", + "4 7,50 mill. € 08/02/2019 7,50 mill. €  " + ] + }, + "execution_count": 365, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_dataframe = df.drop(\"Jugadores\", axis=1)\n", + "final_dataframe.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 366, + "metadata": {}, + "outputs": [], + "source": [ + "final_dataframe.to_excel('transfermkt_ligamx.xlsx', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#we can apply a for loop to the whole program above, changing the initial webpage name slightly\n", + "#to scrape the next year." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# regex pattern for the urls to scrape... " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df.groupby('xxx').count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Los valores de mercado que Transfermkt calcula para los jugadores generalmente\n", + "# equivalen al 80% del valor real al que se compran-venden, entonces... \n", + "# x = alumnos_df['name'].apply(lambda n:n.upper())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/your-code/transfermkt_ligamx.xlsx b/your-code/transfermkt_ligamx.xlsx new file mode 100644 index 0000000..9903319 Binary files /dev/null and b/your-code/transfermkt_ligamx.xlsx differ diff --git a/your-code/web-project README.txt b/your-code/web-project README.txt new file mode 100644 index 0000000..74c86b2 --- /dev/null +++ b/your-code/web-project README.txt @@ -0,0 +1,30 @@ +Process for extracting data: + + -Load the webpage containing the data. + -Locate the data within the page and extract it. + -Organise the data into a dataframe + -Export the data into an csv file + -Clean, Manipulate and Filter the csv file. + +Load the webpage containing the data: 1) create a variable called ‘headers’ and assign it a string that will tell the website that we are a browser, and not a scraping tool. 2) assign the address that we want to scrape to a variable called ‘page’. 3) use the requests library to grab the code of the page and assign it to ‘pageTree’. 3) parse the website code into html. + +--------------------------------------------------------------------------------------------------------- +Valor de Mercado (todas las posiciones y grupos de edad de la liga): +https://www.transfermarkt.es/liga-mx-clausura/marktwerte/wettbewerb/MEX1/pos//detailpos/0/altersklasse/alle/plus/1 +*mismo URL para las 4 páginas (100 jugadores). + +Menores de 23 años: +https://www.transfermarkt.es/liga-mx-clausura/marktwerte/wettbewerb/MEX1/plus/1/galerie/0?pos=&detailpos=&altersklasse=u23 +*mismo URL para las 4 páginas (100 jugadores). + +Menores de 21 años: +https://www.transfermarkt.es/liga-mx-clausura/marktwerte/wettbewerb/MEX1/plus/1/galerie/0?pos=&detailpos=&altersklasse=u21 +*mismo URL para las 2 páginas (42 jugadores). + +Cuando juntemos estos DataFrames, muchos jugadores se van a repetir (incluso habrá jugadores que aparezcan tres veces), habrá que eliminar a los duplicados en el dataframe final. + +Grupos de edad: Todos, u19, u20, u23, 23-30, o30, o32 y o34 + +¿Qué queremos de este primer paso? Un DataFrame con los siguientes datos de los jugadores: índice(ránking), nombre, posición, nacionalidad, edad, club, valor más alto de carrera y valor de mercado. Es decir, básicamente todas las columnas de la tabla EXCEPTO 'última revisión'. + + diff --git a/your-code/wyscout-api-scrapping.ipynb b/your-code/wyscout-api-scrapping.ipynb new file mode 100644 index 0000000..27b7b06 --- /dev/null +++ b/your-code/wyscout-api-scrapping.ipynb @@ -0,0 +1,294 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import requests\n", + "from pandas.io.json import json_normalize\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexagebirth_country_codebirth_country_namebirth_datebirth_daycontract_expirescurrent_team_colorcurrent_team_logocurrent_team_name...positionsprimary_positionprimary_position_percentsecondary_positionsecondary_position_percentthird_positionthird_position_percenttotal_matchesweightxg_shot
0019MEXMexico2000-06-092000-06-092024-06-3014ad58https://cdn5.wyscout.com/photos/team/public/45...Real Betis...[LW, RW, AMF]LW35RW31AMF1615581.37
1126ARGArgentina1992-12-151992-12-152020-06-30000000https://cdn5.wyscout.com/photos/team/public/59...Monterrey...[AMF, RW, LW]AMF37RW10LW1019771.09
2225MEXMexico1993-09-291993-09-292022-06-30000000https://cdn5.wyscout.com/photos/team/public/92...Tigres UANL...[LCB, CB]LCB71CB14None015770.49
3325MEXMexico1994-02-151994-02-152023-06-30000000https://cdn5.wyscout.com/photos/team/public/59...Monterrey...[AMF, LW]AMF52LW10None030645.09
4425ARGArgentina1994-04-121994-04-12None000000https://cdn5.wyscout.com/photos/team/public/46...América...[LCMF, LDMF, DMF]LCMF41LDMF23DMF1844803.40
\n", + "

5 rows × 34 columns

\n", + "
" + ], + "text/plain": [ + " index age birth_country_code birth_country_name birth_date birth_day \\\n", + "0 0 19 MEX Mexico 2000-06-09 2000-06-09 \n", + "1 1 26 ARG Argentina 1992-12-15 1992-12-15 \n", + "2 2 25 MEX Mexico 1993-09-29 1993-09-29 \n", + "3 3 25 MEX Mexico 1994-02-15 1994-02-15 \n", + "4 4 25 ARG Argentina 1994-04-12 1994-04-12 \n", + "\n", + " contract_expires current_team_color \\\n", + "0 2024-06-30 14ad58 \n", + "1 2020-06-30 000000 \n", + "2 2022-06-30 000000 \n", + "3 2023-06-30 000000 \n", + "4 None 000000 \n", + "\n", + " current_team_logo current_team_name ... \\\n", + "0 https://cdn5.wyscout.com/photos/team/public/45... Real Betis ... \n", + "1 https://cdn5.wyscout.com/photos/team/public/59... Monterrey ... \n", + "2 https://cdn5.wyscout.com/photos/team/public/92... Tigres UANL ... \n", + "3 https://cdn5.wyscout.com/photos/team/public/59... Monterrey ... \n", + "4 https://cdn5.wyscout.com/photos/team/public/46... América ... \n", + "\n", + " positions primary_position primary_position_percent \\\n", + "0 [LW, RW, AMF] LW 35 \n", + "1 [AMF, RW, LW] AMF 37 \n", + "2 [LCB, CB] LCB 71 \n", + "3 [AMF, LW] AMF 52 \n", + "4 [LCMF, LDMF, DMF] LCMF 41 \n", + "\n", + " secondary_position secondary_position_percent third_position \\\n", + "0 RW 31 AMF \n", + "1 RW 10 LW \n", + "2 CB 14 None \n", + "3 LW 10 None \n", + "4 LDMF 23 DMF \n", + "\n", + " third_position_percent total_matches weight xg_shot \n", + "0 16 15 58 1.37 \n", + "1 10 19 77 1.09 \n", + "2 0 15 77 0.49 \n", + "3 0 30 64 5.09 \n", + "4 18 44 80 3.40 \n", + "\n", + "[5 rows x 34 columns]" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l = []\n", + "for i in range(22): \n", + " url= f'https://searchapi.wyscout.com/api/v1/search/results.json?search%5Btime_frame%5D=default&search%5Bcompetition%5D=155&search%5Byouth_stats%5D=false&count=30&page={i}&sort=market_value+desc&language=es&columns=name%2Cid%2Cimage%2Ccurrent_team_logo%2Ccurrent_team_color%2Cbirth_country_name%2Cpassport_country_names%2Ccurrent_team_name%2Cmarket_value%2Ctotal_matches%2Cminutes_on_field%2Cpositions%2Cage%2Ccontract_expires%2Cgoals%2Cgoals_tagged%2Cxg_shot%2Cfoot%2Cheight%2Cweight%2Con_loan&token=98de50b5ada2adab5da2b5796031f14751c9c6a6&groupId=1110432&subgroupId=163958'\n", + " headers = {\"User-Agent\":\"Mozilla/5.0\"}\n", + " response = requests.get(url, headers=headers).json() \n", + " flattened_data = json_normalize(response['players'])\n", + " l.append(flattened_data)\n", + "dataframe = pd.concat(l)\n", + "dataframe = dataframe.reset_index()\n", + "dataframe.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "league_id = {'México':'155', 'Venezuela':'163', 'Argentina': '87'} " + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe.to_excel('wyscout_ligamx.xlsx', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/your-code/wyscout_ligamx.xlsx b/your-code/wyscout_ligamx.xlsx new file mode 100644 index 0000000..68c4426 Binary files /dev/null and b/your-code/wyscout_ligamx.xlsx differ