From a997bff877bade8ebdd97d6a5f220fe24b7f30df Mon Sep 17 00:00:00 2001 From: Neelima J <78197684+neelima-j@users.noreply.github.com> Date: Mon, 22 Dec 2025 13:44:26 +0000 Subject: [PATCH] - Closes https://github.com/OpenDataServices/notebooks-oc4ids/issues/70 - Adds an if condition to download logic to build URLs for each year, download zip files, convert to JSON, extract package information and combine to one package. - Refactoring for similar logic in Uganda will be done in a separate PR --- OC4IDS_Database_Data_Import.ipynb | 65 +++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/OC4IDS_Database_Data_Import.ipynb b/OC4IDS_Database_Data_Import.ipynb index 29119b6..86e0c6e 100644 --- a/OC4IDS_Database_Data_Import.ipynb +++ b/OC4IDS_Database_Data_Import.ipynb @@ -243,11 +243,11 @@ "\n", "# @markdown After running this cell manually, it will auto-run when you change the source_id.\n", "\n", - "source_id = 'ukraine_cost_ukraine' #@param [ 'ghana_cost_sekondi_takoradi', 'indonesia_cost_ntb', 'indonesia_cost_west_lombok', 'malawi_cost_malawi', 'mexico_cost_jalisco', 'mexico_nuevo_leon', 'uganda_gpp', 'ukraine_cost_ukraine']\n", + "source_id = 'ecuador_cost_ecuador' #@param [ 'ecuador_cost_ecuador','ghana_cost_sekondi_takoradi', 'indonesia_cost_ntb', 'indonesia_cost_west_lombok', 'malawi_cost_malawi', 'mexico_cost_jalisco', 'mexico_nuevo_leon', 'uganda_gpp', 'ukraine_cost_ukraine']\n", "\n", "print('Source selected:', source_id)" ], - "execution_count": 9, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -292,6 +292,7 @@ }, "source": [ "sources = {\n", + " 'ecuador_cost_ecuador':'https://costecuador.org/storage/json/json_',\n", " 'mexico_cost_jalisco': 'http://www.costjalisco.org.mx/jsonprojects',\n", " 'ghana_cost_sekondi_takoradi': 'https://costsekondi-takoradigh.org/uploads/projectJson.json',\n", " 'mexico_nuevo_leon': 'http://si.nl.gob.mx/siasi_ws/api/edcapi/DescargarProjectPackage',\n", @@ -313,6 +314,63 @@ " with open('project_package.json', 'wb') as f:\n", " f.write(response.content)\n", "\n", + "elif source_id == 'ecuador_cost_ecuador':\n", + " packages = []\n", + " start_year = 2020\n", + " current_year = datetime.now().year\n", + " years = list(range(start_year, current_year + 1))\n", + "\n", + " for year in years:\n", + " url = f\"{sources[source_id]}{year}.zip\"\n", + " try:\n", + " response = requests.get(url, verify=False, stream=True)\n", + " if response.status_code != 200:\n", + " print(f\"Could not download {url}: Status {response.status_code}\"\n", + " )\n", + " continue\n", + " zip_data = io.BytesIO(response.content)\n", + " with zipfile.ZipFile(zip_data, \"r\") as zip_ref:\n", + " for filename in zip_ref.namelist():\n", + " if filename.endswith(\".json\"):\n", + " with zip_ref.open(filename) as f:\n", + " package = json.load(f)\n", + " packages.append(package)\n", + " except (zipfile.BadZipFile, requests.RequestException) as e:\n", + " print(f\"Error processing {url}: {e}\")\n", + " continue\n", + "\n", + " if not packages:\n", + " raise RuntimeError(f\"No valid packages found at {sources[source_id]}\")\n", + "\n", + " # Arguments to combine packages\n", + " combine_args = {}\n", + "\n", + " if packages[-1].get(\"uri\"):\n", + " combine_args[\"uri\"] = packages[-1].get(\"uri\")\n", + "\n", + " versions = {\n", + " package.get(\"version\") for package in packages if package.get(\"version\")\n", + " }\n", + " if len(versions) > 1:\n", + " print(f\"Packages declare more than one version: {versions}\")\n", + " if versions:\n", + " combine_args[\"version\"] = list(versions)[0]\n", + "\n", + " published_dates = {\n", + " package.get(\"publishedDate\")\n", + " for package in packages\n", + " if package.get(\"publishedDate\")\n", + " }\n", + " if published_dates:\n", + " combine_args[\"published_date\"] = max(published_dates)\n", + "\n", + " print(f\"Combining {len(packages)} packages for Ecuador\")\n", + " combined_package = combine_project_packages(packages, **combine_args)\n", + "\n", + " with open('project_package.json', 'w') as f:\n", + " json.dump(combined_package, f, indent=2) # Use json.dump() to write the dictionary as JSON\n", + "\n", + "\n", "elif source_id == 'uganda_gpp':\n", " packages = []\n", " start_year = 2022\n", @@ -687,8 +745,7 @@ "metadata": { "colab": { "name": " OC4IDS Database - Data Import", - "provenance": [], - "toc_visible": true + "provenance": [] }, "kernelspec": { "display_name": "Python 3",