From 332905a09e2e93ca5e4ebc76f6c0a978e71be987 Mon Sep 17 00:00:00 2001 From: Abena Boatemaa Asare-Ansah Date: Mon, 8 Jan 2024 18:32:22 -0500 Subject: [PATCH 1/7] Add corrective labels_2022 --- data/raw.dvc | 6 +++--- datasets.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index f27b2414..1af2f597 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: c033b32e0901a197d9f743c8329b24ae.dir - size: 443888649 - nfiles: 387 +- md5: 4e3d5d0013c41fca834ac18be36710fc.dir + size: 443967181 + nfiles: 388 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 36b5e828..b800acf9 100644 --- a/datasets.py +++ b/datasets.py @@ -448,6 +448,16 @@ def load_labels(self) -> pd.DataFrame: return df +class UgandaNorthCorrective2022(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + df = pd.read_csv(raw_dir / "Uganda_North_Corrective_2022.csv") + df.rename(columns={"latitude": LAT, "longitude": LON}, inplace=True) + df[CLASS_PROB] = (df["Wrong value"] == 0).astype(int) + df[START], df[END] = date(2022, 1, 1), date(2023, 7, 31) + df[SUBSET] = "training" + return df + + datasets: List[LabeledDataset] = [ CustomLabeledDataset( dataset="geowiki_landcover_2017", @@ -1232,6 +1242,7 @@ def load_labels(self) -> pd.DataFrame: Namibia_field_samples_22_23(), SudanGedarefDarfurAlJazirah2022(), Uganda_NorthCEO2022(), + UgandaNorthCorrective2022(), ] if __name__ == "__main__": From 85f85cb5714124f91a6e99920364a52935ad9132 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Jan 2024 23:33:35 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets.py b/datasets.py index b800acf9..546dc0af 100644 --- a/datasets.py +++ b/datasets.py @@ -456,8 +456,8 @@ def load_labels(self) -> pd.DataFrame: df[START], df[END] = date(2022, 1, 1), date(2023, 7, 31) df[SUBSET] = "training" return df - - + + datasets: List[LabeledDataset] = [ CustomLabeledDataset( dataset="geowiki_landcover_2017", From 99ff7da124dd5579fcdb2f3b256985ae83f22cd1 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Mon, 8 Jan 2024 23:43:42 +0000 Subject: [PATCH 3/7] Automated dataset updates --- data/datasets.dvc | 6 +++--- data/report.txt | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 95f44e6e..ba1c8518 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 63c6f22459e60941aa459671fbff891b.dir - size: 671350235 - nfiles: 48 +- md5: 36e581b53fc4bd0b961382ee8c5c686c.dir + size: 671463296 + nfiles: 49 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 35af2f70..95800526 100644 --- a/data/report.txt +++ b/data/report.txt @@ -422,3 +422,10 @@ eo_data_complete 1000 ✔ training amount: 387, positive class: 20.9% ✔ validation amount: 294, positive class: 23.1% ✔ testing amount: 319, positive class: 17.6% + + + +UgandaNorthCorrective2022 (Timesteps: 19) +---------------------------------------------------------------------------- +eo_data_exporting 615 +✖ training: 615 labels, but 0 features From d296a0bb9bcc5f0eb6a5b7eebfce2e6a72191f8a Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Tue, 9 Jan 2024 01:53:20 +0000 Subject: [PATCH 4/7] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index ba1c8518..edff34b4 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 36e581b53fc4bd0b961382ee8c5c686c.dir - size: 671463296 +- md5: dd2217dd68b7a30d1dec1222e15a2f7f.dir + size: 673116830 nfiles: 49 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 95800526..c606bb41 100644 --- a/data/report.txt +++ b/data/report.txt @@ -427,5 +427,6 @@ eo_data_complete 1000 UgandaNorthCorrective2022 (Timesteps: 19) ---------------------------------------------------------------------------- -eo_data_exporting 615 -✖ training: 615 labels, but 0 features +eo_data_complete 387 +eo_data_exporting 228 +✖ training: 615 labels, but 387 features From 0ad9b9f2f1807d57c986ac71037cd86deca58fa2 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Tue, 9 Jan 2024 04:26:14 +0000 Subject: [PATCH 5/7] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index edff34b4..d2c85aa0 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: dd2217dd68b7a30d1dec1222e15a2f7f.dir - size: 673116830 +- md5: 9041fef59c732efe6fa78ba3ac8a53fd.dir + size: 674094424 nfiles: 49 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index c606bb41..843232c7 100644 --- a/data/report.txt +++ b/data/report.txt @@ -427,6 +427,5 @@ eo_data_complete 1000 UgandaNorthCorrective2022 (Timesteps: 19) ---------------------------------------------------------------------------- -eo_data_complete 387 -eo_data_exporting 228 -✖ training: 615 labels, but 387 features +eo_data_complete 615 +✔ training amount: 615, positive class: 24.4% From b37e3081e5f06bf973169a9c76edff5cf433ed4c Mon Sep 17 00:00:00 2001 From: Abena Boatemaa Asare-Ansah Date: Mon, 8 Jan 2024 23:28:39 -0500 Subject: [PATCH 6/7] Trigger Build From 97bdbe7785ba5d8be49ad35aa45c99527ac8f20e Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 9 Jan 2024 04:56:55 +0000 Subject: [PATCH 7/7] Trained new: Uganda_North_2022_V2 --- data/all_dataset_params.json | 50 ++++++++++++++++++++++++++++++++++++ data/models.dvc | 6 ++--- data/models.json | 17 ++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/data/all_dataset_params.json b/data/all_dataset_params.json index fe410c2a..4c5e1ad8 100644 --- a/data/all_dataset_params.json +++ b/data/all_dataset_params.json @@ -808,6 +808,56 @@ 12 ] }, + "geowiki_landcover_2017,Kenya,Mali,Mali_lower_CEO_2019,Mali_upper_CEO_2019,Togo,Rwanda,Uganda,open_buildings,digitalearthafrica_eastern,digitalearthafrica_sahel,Ethiopia,Ethiopia_Tigray_2020,Ethiopia_Tigray_2021,Ethiopia_Bure_Jimma_2019,Ethiopia_Bure_Jimma_2020,Malawi_CEO_2020,Malawi_CEO_2019,Malawi_FAO,Malawi_FAO_corrected,Zambia_CEO_2019,Tanzania_CEO_2019,Namibia_corrective_labels_2020,Malawi_corrected,Namibia_CEO_2020,Namibia_WFP,Sudan_Blue_Nile_CEO_2019,Hawaii_CEO_2020,Senegal_CEO_2022,HawaiiAgriculturalLandUse2020,KenyaCEO2019,HawaiiCorrective2020,HawaiiCorrectiveGuided2020,MalawiCorrectiveLabels2020,SudanBlueNileCEO2020,SudanBlueNileCorrectiveLabels2019,EthiopiaTigrayCorrective2020,SudanAlGadarefCEO2019,MaliStratifiedCEO2019,SudanAlGadarefCEO2020,NamibiaNorthStratified2020,Namibia_field_samples_22_23,SudanGedarefDarfurAlJazirah2022,Uganda_NorthCEO2022,UgandaNorthCorrective2022_February_2022": { + "normalizing_dict": { + "mean": [ + -11.255516637559671, + -18.33725418405467, + 1436.4352778557773, + 1402.4272840659528, + 1426.5960525466035, + 1661.7485119083951, + 2419.643972667638, + 2797.8451262347635, + 2686.9292412186446, + 3059.8382332649467, + 839.7753738647574, + 2495.1758697170035, + 1659.2587797822239, + 290.3950519378234, + 0.004524519648932057, + 863.1242659143134, + 5.64862939589125, + 0.3389100980227688 + ], + "std": [ + 4.15337809763524, + 5.063815336282717, + 995.2868060874501, + 966.5722368300801, + 1161.837450348849, + 1109.9708883027095, + 1050.9170893461524, + 1116.5020691579969, + 1073.7932665528149, + 1145.8993288368395, + 654.7737839623924, + 1168.4268896644307, + 1057.7198269424493, + 35.65957791987571, + 0.013440213523815855, + 646.8263409336159, + 7.10792465117609, + 0.22962240493696942 + ] + }, + "train_num_timesteps": [ + 12 + ], + "val_num_timesteps": [ + 12 + ] + }, "geowiki_landcover_2017,Kenya,Mali,Mali_lower_CEO_2019,Mali_upper_CEO_2019,Togo,Rwanda,Uganda,open_buildings,digitalearthafrica_eastern,digitalearthafrica_sahel,Ethiopia,Ethiopia_Tigray_2020,Ethiopia_Tigray_2021,Ethiopia_Bure_Jimma_2019,Ethiopia_Bure_Jimma_2020,Malawi_CEO_2020,Malawi_CEO_2019,Malawi_FAO,Malawi_FAO_corrected,Zambia_CEO_2019,Tanzania_CEO_2019,Namibia_corrective_labels_2020,Malawi_corrected,Namibia_CEO_2020,Namibia_WFP,Sudan_Blue_Nile_CEO_2019,Hawaii_CEO_2020,Senegal_CEO_2022,HawaiiAgriculturalLandUse2020,KenyaCEO2019,HawaiiCorrective2020,HawaiiCorrectiveGuided2020,MalawiCorrectiveLabels2020,SudanBlueNileCEO2020,SudanBlueNileCorrectiveLabels2019,EthiopiaTigrayCorrective2020,SudanAlGadarefCEO2019,MaliStratifiedCEO2019,SudanAlGadarefCEO2020,NamibiaNorthStratified2020,Namibia_field_samples_22_23,SudanGedarefDarfurAlJazirah2022_February_2022": { "normalizing_dict": { "mean": [ diff --git a/data/models.dvc b/data/models.dvc index 4cba6db6..e558af3e 100644 --- a/data/models.dvc +++ b/data/models.dvc @@ -1,6 +1,6 @@ outs: -- md5: f6d9020a5f9aa7b02a6982af93c571f1.dir - size: 77720213 - nfiles: 60 +- md5: d2b001db28f37466ed4a787d483319a2.dir + size: 79241780 + nfiles: 62 path: models hash: md5 diff --git a/data/models.json b/data/models.json index 1d71e9a9..d2dbacc9 100644 --- a/data/models.json +++ b/data/models.json @@ -322,6 +322,23 @@ "roc_auc_score": 0.8647 } }, + "Uganda_North_2022_V2": { + "params": "https://wandb.ai/nasa-harvest/crop-mask/runs/0ngyleht", + "test_metrics": { + "accuracy": 0.6388, + "f1_score": 0.5341, + "precision_score": 0.3917, + "recall_score": 0.8393, + "roc_auc_score": 0.8004 + }, + "val_metrics": { + "accuracy": 0.6886, + "f1_score": 0.6321, + "precision_score": 0.488, + "recall_score": 0.8971, + "roc_auc_score": 0.8259 + } + }, "Zambia_2019": { "params": "https://wandb.ai/nasa-harvest/crop-mask/runs/rnhjdn44", "test_metrics": {