Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 61 additions & 4 deletions OC4IDS_Database_Data_Import.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,11 @@
"\n",
"# @markdown After running this cell manually, it will auto-run when you change the source_id.\n",
"\n",
"source_id = 'ukraine_cost_ukraine' #@param [ 'ghana_cost_sekondi_takoradi', 'indonesia_cost_ntb', 'indonesia_cost_west_lombok', 'malawi_cost_malawi', 'mexico_cost_jalisco', 'mexico_nuevo_leon', 'uganda_gpp', 'ukraine_cost_ukraine']\n",
"source_id = 'ecuador_cost_ecuador' #@param [ 'ecuador_cost_ecuador','ghana_cost_sekondi_takoradi', 'indonesia_cost_ntb', 'indonesia_cost_west_lombok', 'malawi_cost_malawi', 'mexico_cost_jalisco', 'mexico_nuevo_leon', 'uganda_gpp', 'ukraine_cost_ukraine']\n",
"\n",
"print('Source selected:', source_id)"
],
"execution_count": 9,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -292,6 +292,7 @@
},
"source": [
"sources = {\n",
" 'ecuador_cost_ecuador':'https://costecuador.org/storage/json/json_',\n",
" 'mexico_cost_jalisco': 'http://www.costjalisco.org.mx/jsonprojects',\n",
" 'ghana_cost_sekondi_takoradi': 'https://costsekondi-takoradigh.org/uploads/projectJson.json',\n",
" 'mexico_nuevo_leon': 'http://si.nl.gob.mx/siasi_ws/api/edcapi/DescargarProjectPackage',\n",
Expand All @@ -313,6 +314,63 @@
" with open('project_package.json', 'wb') as f:\n",
" f.write(response.content)\n",
"\n",
"elif source_id == 'ecuador_cost_ecuador':\n",
" packages = []\n",
" start_year = 2020\n",
" current_year = datetime.now().year\n",
" years = list(range(start_year, current_year + 1))\n",
"\n",
" for year in years:\n",
" url = f\"{sources[source_id]}{year}.zip\"\n",
" try:\n",
" response = requests.get(url, verify=False, stream=True)\n",
" if response.status_code != 200:\n",
" print(f\"Could not download {url}: Status {response.status_code}\"\n",
" )\n",
" continue\n",
" zip_data = io.BytesIO(response.content)\n",
" with zipfile.ZipFile(zip_data, \"r\") as zip_ref:\n",
" for filename in zip_ref.namelist():\n",
" if filename.endswith(\".json\"):\n",
" with zip_ref.open(filename) as f:\n",
" package = json.load(f)\n",
" packages.append(package)\n",
" except (zipfile.BadZipFile, requests.RequestException) as e:\n",
" print(f\"Error processing {url}: {e}\")\n",
" continue\n",
"\n",
" if not packages:\n",
" raise RuntimeError(f\"No valid packages found at {sources[source_id]}\")\n",
"\n",
" # Arguments to combine packages\n",
" combine_args = {}\n",
"\n",
" if packages[-1].get(\"uri\"):\n",
" combine_args[\"uri\"] = packages[-1].get(\"uri\")\n",
"\n",
" versions = {\n",
" package.get(\"version\") for package in packages if package.get(\"version\")\n",
" }\n",
" if len(versions) > 1:\n",
" print(f\"Packages declare more than one version: {versions}\")\n",
" if versions:\n",
" combine_args[\"version\"] = list(versions)[0]\n",
"\n",
" published_dates = {\n",
" package.get(\"publishedDate\")\n",
" for package in packages\n",
" if package.get(\"publishedDate\")\n",
" }\n",
" if published_dates:\n",
" combine_args[\"published_date\"] = max(published_dates)\n",
"\n",
" print(f\"Combining {len(packages)} packages for Ecuador\")\n",
" combined_package = combine_project_packages(packages, **combine_args)\n",
"\n",
" with open('project_package.json', 'w') as f:\n",
" json.dump(combined_package, f, indent=2) # Use json.dump() to write the dictionary as JSON\n",
"\n",
"\n",
"elif source_id == 'uganda_gpp':\n",
" packages = []\n",
" start_year = 2022\n",
Expand Down Expand Up @@ -687,8 +745,7 @@
"metadata": {
"colab": {
"name": " OC4IDS Database - Data Import",
"provenance": [],
"toc_visible": true
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
Expand Down
Loading