From 84a85e0424c35234adc9a050d014a2f7c8274c48 Mon Sep 17 00:00:00 2001 From: awesomellj <133604911+awesomellj@users.noreply.github.com> Date: Wed, 31 May 2023 02:27:13 +0100 Subject: [PATCH 1/2] Update scrape_arknights_gamepress.py Currently, the Tulip page lacks 'profession-title / rarity-cell' elements. In order for the code to work properly, this operator is temporarily removed. --- arknights-data-science/ingestion/scrape_arknights_gamepress.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arknights-data-science/ingestion/scrape_arknights_gamepress.py b/arknights-data-science/ingestion/scrape_arknights_gamepress.py index 3eadbac..4efc70c 100644 --- a/arknights-data-science/ingestion/scrape_arknights_gamepress.py +++ b/arknights-data-science/ingestion/scrape_arknights_gamepress.py @@ -26,6 +26,9 @@ for op in op_list: # Get the name and their personal page from these HTML elements name = op.find("div", class_="operator-title").a.text + # If the name is 'tulip', skip this iteration + if name.lower() == 'tulip': + continue page = "https://gamepress.gg" + \ op.find("div", class_="operator-title").a["href"] # Add the new information to the dictionary From 5d7653a7d511d5cccc0b56abecea088240b965a6 Mon Sep 17 00:00:00 2001 From: awesomellj <133604911+awesomellj@users.noreply.github.com> Date: Wed, 31 May 2023 15:54:22 +0100 Subject: [PATCH 2/2] Update scrape_arknights_gamepress.py Currently, the Tulip and Friston-3 pages are missing relevant elements. To allow the code to function normally, these two operators have been temporarily removed. --- .../ingestion/scrape_arknights_gamepress.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arknights-data-science/ingestion/scrape_arknights_gamepress.py b/arknights-data-science/ingestion/scrape_arknights_gamepress.py index 4efc70c..8ecc668 100644 --- a/arknights-data-science/ingestion/scrape_arknights_gamepress.py +++ b/arknights-data-science/ingestion/scrape_arknights_gamepress.py @@ -26,8 +26,8 @@ for op in op_list: # Get the name and their personal page from these HTML elements name = op.find("div", class_="operator-title").a.text - # If the name is 'tulip', skip this iteration - if name.lower() == 'tulip': + # If the name is 'tulip' or 'friston-3', skip this iteration + if name.lower() in ['tulip', 'friston-3']: continue page = "https://gamepress.gg" + \ op.find("div", class_="operator-title").a["href"]