diff --git a/data/all_dataset_params.json b/data/all_dataset_params.json index fe410c2a..4c5e1ad8 100644 --- a/data/all_dataset_params.json +++ b/data/all_dataset_params.json @@ -808,6 +808,56 @@ 12 ] }, + "geowiki_landcover_2017,Kenya,Mali,Mali_lower_CEO_2019,Mali_upper_CEO_2019,Togo,Rwanda,Uganda,open_buildings,digitalearthafrica_eastern,digitalearthafrica_sahel,Ethiopia,Ethiopia_Tigray_2020,Ethiopia_Tigray_2021,Ethiopia_Bure_Jimma_2019,Ethiopia_Bure_Jimma_2020,Malawi_CEO_2020,Malawi_CEO_2019,Malawi_FAO,Malawi_FAO_corrected,Zambia_CEO_2019,Tanzania_CEO_2019,Namibia_corrective_labels_2020,Malawi_corrected,Namibia_CEO_2020,Namibia_WFP,Sudan_Blue_Nile_CEO_2019,Hawaii_CEO_2020,Senegal_CEO_2022,HawaiiAgriculturalLandUse2020,KenyaCEO2019,HawaiiCorrective2020,HawaiiCorrectiveGuided2020,MalawiCorrectiveLabels2020,SudanBlueNileCEO2020,SudanBlueNileCorrectiveLabels2019,EthiopiaTigrayCorrective2020,SudanAlGadarefCEO2019,MaliStratifiedCEO2019,SudanAlGadarefCEO2020,NamibiaNorthStratified2020,Namibia_field_samples_22_23,SudanGedarefDarfurAlJazirah2022,Uganda_NorthCEO2022,UgandaNorthCorrective2022_February_2022": { + "normalizing_dict": { + "mean": [ + -11.255516637559671, + -18.33725418405467, + 1436.4352778557773, + 1402.4272840659528, + 1426.5960525466035, + 1661.7485119083951, + 2419.643972667638, + 2797.8451262347635, + 2686.9292412186446, + 3059.8382332649467, + 839.7753738647574, + 2495.1758697170035, + 1659.2587797822239, + 290.3950519378234, + 0.004524519648932057, + 863.1242659143134, + 5.64862939589125, + 0.3389100980227688 + ], + "std": [ + 4.15337809763524, + 5.063815336282717, + 995.2868060874501, + 966.5722368300801, + 1161.837450348849, + 1109.9708883027095, + 1050.9170893461524, + 1116.5020691579969, + 1073.7932665528149, + 1145.8993288368395, + 654.7737839623924, + 1168.4268896644307, + 1057.7198269424493, + 35.65957791987571, + 0.013440213523815855, + 646.8263409336159, + 7.10792465117609, + 0.22962240493696942 + ] + }, + "train_num_timesteps": [ + 12 + ], + "val_num_timesteps": [ + 12 + ] + }, "geowiki_landcover_2017,Kenya,Mali,Mali_lower_CEO_2019,Mali_upper_CEO_2019,Togo,Rwanda,Uganda,open_buildings,digitalearthafrica_eastern,digitalearthafrica_sahel,Ethiopia,Ethiopia_Tigray_2020,Ethiopia_Tigray_2021,Ethiopia_Bure_Jimma_2019,Ethiopia_Bure_Jimma_2020,Malawi_CEO_2020,Malawi_CEO_2019,Malawi_FAO,Malawi_FAO_corrected,Zambia_CEO_2019,Tanzania_CEO_2019,Namibia_corrective_labels_2020,Malawi_corrected,Namibia_CEO_2020,Namibia_WFP,Sudan_Blue_Nile_CEO_2019,Hawaii_CEO_2020,Senegal_CEO_2022,HawaiiAgriculturalLandUse2020,KenyaCEO2019,HawaiiCorrective2020,HawaiiCorrectiveGuided2020,MalawiCorrectiveLabels2020,SudanBlueNileCEO2020,SudanBlueNileCorrectiveLabels2019,EthiopiaTigrayCorrective2020,SudanAlGadarefCEO2019,MaliStratifiedCEO2019,SudanAlGadarefCEO2020,NamibiaNorthStratified2020,Namibia_field_samples_22_23,SudanGedarefDarfurAlJazirah2022_February_2022": { "normalizing_dict": { "mean": [ diff --git a/data/datasets.dvc b/data/datasets.dvc index 95f44e6e..d2c85aa0 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 63c6f22459e60941aa459671fbff891b.dir - size: 671350235 - nfiles: 48 +- md5: 9041fef59c732efe6fa78ba3ac8a53fd.dir + size: 674094424 + nfiles: 49 path: datasets hash: md5 diff --git a/data/models.dvc b/data/models.dvc index 4cba6db6..e558af3e 100644 --- a/data/models.dvc +++ b/data/models.dvc @@ -1,6 +1,6 @@ outs: -- md5: f6d9020a5f9aa7b02a6982af93c571f1.dir - size: 77720213 - nfiles: 60 +- md5: d2b001db28f37466ed4a787d483319a2.dir + size: 79241780 + nfiles: 62 path: models hash: md5 diff --git a/data/models.json b/data/models.json index 1d71e9a9..d2dbacc9 100644 --- a/data/models.json +++ b/data/models.json @@ -322,6 +322,23 @@ "roc_auc_score": 0.8647 } }, + "Uganda_North_2022_V2": { + "params": "https://wandb.ai/nasa-harvest/crop-mask/runs/0ngyleht", + "test_metrics": { + "accuracy": 0.6388, + "f1_score": 0.5341, + "precision_score": 0.3917, + "recall_score": 0.8393, + "roc_auc_score": 0.8004 + }, + "val_metrics": { + "accuracy": 0.6886, + "f1_score": 0.6321, + "precision_score": 0.488, + "recall_score": 0.8971, + "roc_auc_score": 0.8259 + } + }, "Zambia_2019": { "params": "https://wandb.ai/nasa-harvest/crop-mask/runs/rnhjdn44", "test_metrics": { diff --git a/data/raw.dvc b/data/raw.dvc index f27b2414..1af2f597 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: c033b32e0901a197d9f743c8329b24ae.dir - size: 443888649 - nfiles: 387 +- md5: 4e3d5d0013c41fca834ac18be36710fc.dir + size: 443967181 + nfiles: 388 path: raw hash: md5 diff --git a/data/report.txt b/data/report.txt index 35af2f70..843232c7 100644 --- a/data/report.txt +++ b/data/report.txt @@ -422,3 +422,10 @@ eo_data_complete 1000 ✔ training amount: 387, positive class: 20.9% ✔ validation amount: 294, positive class: 23.1% ✔ testing amount: 319, positive class: 17.6% + + + +UgandaNorthCorrective2022 (Timesteps: 19) +---------------------------------------------------------------------------- +eo_data_complete 615 +✔ training amount: 615, positive class: 24.4% diff --git a/datasets.py b/datasets.py index 36b5e828..546dc0af 100644 --- a/datasets.py +++ b/datasets.py @@ -448,6 +448,16 @@ def load_labels(self) -> pd.DataFrame: return df +class UgandaNorthCorrective2022(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + df = pd.read_csv(raw_dir / "Uganda_North_Corrective_2022.csv") + df.rename(columns={"latitude": LAT, "longitude": LON}, inplace=True) + df[CLASS_PROB] = (df["Wrong value"] == 0).astype(int) + df[START], df[END] = date(2022, 1, 1), date(2023, 7, 31) + df[SUBSET] = "training" + return df + + datasets: List[LabeledDataset] = [ CustomLabeledDataset( dataset="geowiki_landcover_2017", @@ -1232,6 +1242,7 @@ def load_labels(self) -> pd.DataFrame: Namibia_field_samples_22_23(), SudanGedarefDarfurAlJazirah2022(), Uganda_NorthCEO2022(), + UgandaNorthCorrective2022(), ] if __name__ == "__main__":