diff --git a/src/scraper/functions/federations.py b/src/scraper/functions/federations.py new file mode 100644 index 0000000..3c7d733 --- /dev/null +++ b/src/scraper/functions/federations.py @@ -0,0 +1,205 @@ +"""FIDE federation name to abbreviation mapping.""" + +FEDERATION_ABBREV: dict[str, str] = { + "United States of America": "USA", + "India": "IND", + "China": "CHN", + "Russia": "RUS", + "Germany": "GER", + "France": "FRA", + "Azerbaijan": "AZE", + "Ukraine": "UKR", + "Hungary": "HUN", + "Netherlands": "NED", + "Spain": "ESP", + "Uzbekistan": "UZB", + "Armenia": "ARM", + "Norway": "NOR", + "Poland": "POL", + "England": "ENG", + "Israel": "ISR", + "Iran": "IRI", + "Serbia": "SRB", + "Turkiye": "TUR", + "Greece": "GRE", + "Denmark": "DEN", + "Romania": "ROU", + "Croatia": "CRO", + "Czech Republic": "CZE", + "Argentina": "ARG", + "Austria": "AUT", + "Brazil": "BRA", + "Georgia": "GEO", + "Italy": "ITA", + "Slovenia": "SLO", + "Bulgaria": "BUL", + "Sweden": "SWE", + "Switzerland": "SUI", + "Kazakhstan": "KAZ", + "Cuba": "CUB", + "Slovakia": "SVK", + "Vietnam": "VIE", + "Peru": "PER", + "Moldova": "MDA", + "Lithuania": "LTU", + "Mexico": "MEX", + "Montenegro": "MNE", + "Colombia": "COL", + "Belgium": "BEL", + "Bosnia and Herzegovina": "BIH", + "Australia": "AUS", + "Canada": "CAN", + "Iceland": "ISL", + "Mongolia": "MGL", + "Finland": "FIN", + "Latvia": "LAT", + "Indonesia": "INA", + "Egypt": "EGY", + "North Macedonia": "MKD", + "Belarus": "BLR", + "Portugal": "POR", + "Chile": "CHI", + "Philippines": "PHI", + "Estonia": "EST", + "Venezuela": "VEN", + "Paraguay": "PAR", + "Malaysia": "MAS", + "Ireland": "IRL", + "Singapore": "SGP", + "Scotland": "SCO", + "Bangladesh": "BAN", + "Turkmenistan": "TKM", + "Uruguay": "URU", + "Kyrgyzstan": "KGZ", + "Morocco": "MAR", + "South Africa": "RSA", + "Albania": "ALB", + "New Zealand": "NZL", + "Faroe Islands": "FAI", + "Myanmar": "MYA", + "Zambia": "ZAM", + "Algeria": "ALG", + "Kosovo": "KOS", + "Costa Rica": "CRC", + "Japan": "JPN", + "Tunisia": "TUN", + "Bolivia": "BOL", + "United Arab Emirates": "UAE", + "Nigeria": "NGR", + "Tajikistan": "TJK", + "Dominican Republic": "DOM", + "Syria": "SYR", + "Luxembourg": "LUX", + "Ecuador": "ECU", + "South Korea": "KOR", + "Andorra": "AND", + "Monaco": "MNC", + "Iraq": "IRQ", + "Panama": "PAN", + "Wales": "WLS", + "Lebanon": "LBN", + "El Salvador": "ESA", + "Sri Lanka": "SRI", + "Uganda": "UGA", + "Trinidad and Tobago": "TTO", + "Angola": "ANG", + "Nicaragua": "NCA", + "Jordan": "JOR", + "Zimbabwe": "ZIM", + "Thailand": "THA", + "Guatemala": "GUA", + "Madagascar": "MAD", + "Libya": "LBA", + "Yemen": "YEM", + "Malta": "MLT", + "Puerto Rico": "PUR", + "Hong Kong, China": "HKG", + "Chinese Taipei": "TPE", + "Jamaica": "JAM", + "Honduras": "HON", + "Cyprus": "CYP", + "Nepal": "NEP", + "South Sudan": "SSD", + "Palestine": "PLE", + "Sudan": "SUD", + "Barbados": "BAR", + "Malawi": "MAW", + "Ethiopia": "ETH", + "Botswana": "BOT", + "Kenya": "KEN", + "Netherlands Antilles": "AHO", + "Mozambique": "MOZ", + "Pakistan": "PAK", + "Liechtenstein": "LIE", + "Aruba": "ARU", + "Cote d'Ivoire": "CIV", + "Saudi Arabia": "KSA", + "Namibia": "NAM", + "Cape Verde": "CPV", + "Afghanistan": "AFG", + "Mauritania": "MTN", + "Mauritius": "MRI", + "Oman": "OMA", + "Liberia": "LBR", + "Guam": "GUM", + "Somalia": "SOM", + "Suriname": "SUR", + "Bermuda": "BER", + "Ghana": "GHA", + "Jersey": "JCI", + "Haiti": "HAI", + "Kuwait": "KUW", + "Bahrain": "BRN", + "Guernsey": "GCI", + "Saint Lucia": "LCA", + "Palau": "PLW", + "Senegal": "SEN", + "Bahamas": "BAH", + "Burundi": "BDI", + "Lesotho": "LES", + "Guyana": "GUY", + "San Marino": "SMR", + "Cayman Islands": "CAY", + "Togo": "TOG", + "Nauru": "NRU", + "Maldives": "MDV", + "Tanzania": "TAN", + "Qatar": "QAT", + "Eswatini": "SWZ", + "Sierra Leone": "SLE", + "Gabon": "GAB", + "Cameroon": "CMR", + "Central African Republic": "CAF", + "Macau, China": "MAC", + "Democratic Republic of the Congo": "COD", + "Laos": "LAO", + "Vanuatu": "VAN", + "US Virgin Islands": "ISV", + "Brunei Darussalam": "BRU", + "Seychelles": "SEY", + "Papua New Guinea": "PNG", + "Mali": "MLI", + "Dominica": "DMA", + "Equatorial Guinea": "GEQ", + "Sao Tome and Principe": "STP", + "Rwanda": "RWA", + "Eritrea": "ERI", + "Comoros Islands": "COM", + "Belize": "BIZ", + "Saint Vincent and the Grenadines": "VIN", + "Grenada": "GRN", + "Cambodia": "CAM", + "Bhutan": "BHU", + "Antigua and Barbuda": "ANT", + "Burkina Faso": "BUR", + "Djibouti": "DJI", + "Chad": "CHA", + "Fiji": "FIJ", + "Gambia": "GAM", + "British Virgin Islands": "IVB", + "Timor-Leste": "TLS", + "Saint Kitts and Nevis": "SKN", + "Niger": "NIG", + "Solomon Islands": "SOL", + "Tonga": "TGA", +} diff --git a/src/scraper/functions/player_info.py b/src/scraper/functions/player_info.py index 4d2b75c..09956b3 100644 --- a/src/scraper/functions/player_info.py +++ b/src/scraper/functions/player_info.py @@ -1,5 +1,12 @@ +import logging + from bs4 import BeautifulSoup, Tag +from src.scraper.functions.federations import FEDERATION_ABBREV + +logger = logging.getLogger(__name__) + + def get_player_info(html_doc: str): soup = BeautifulSoup(html_doc, "html.parser") @@ -21,10 +28,18 @@ def get_player_info(html_doc: str): "blitz_rating": soup.select_one(".profile-blitz > p:nth-child(2)"), } + federation = safely_get_string(player_info_raw["federation"]) + federation_abbrev = "" + if federation: + federation_abbrev = FEDERATION_ABBREV.get(federation, "") + if not federation_abbrev: + logger.error(f"Unknown federation: {federation}") + player_info = { "fide_id": safely_get_string(player_info_raw["fide_id"]), "fide_title": safely_get_string(player_info_raw["fide_title"]), - "federation": safely_get_string(player_info_raw["federation"]), + "federation": federation, + "federation_abbrev": federation_abbrev, "birth_year": safely_get_int(player_info_raw["birth_year"]), "sex": safely_get_string(player_info_raw["sex"]), "name": safely_get_string(player_info_raw["name"]),