From 9d071f72c9d224e3b8fe4c8e31dc70774f8386dc Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Wed, 20 Aug 2025 12:11:48 -0400 Subject: [PATCH 1/2] Created new dataset Uganda 2024 --- datasets.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/datasets.py b/datasets.py index 05ccec15..980bae59 100644 --- a/datasets.py +++ b/datasets.py @@ -635,6 +635,37 @@ def load_labels(self) -> pd.DataFrame: df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df +class Uganda_NorthCEO2024(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2024" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2024---Feb-2025-(Set-1)-sample-data-2025-08-19.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2024---Feb-2025-(Set-2)-sample-data-2025-08-19.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2024, 1, 1), date(2025, 4, 30) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df + class Uganda_NorthCEO2021(LabeledDataset): def load_labels(self) -> pd.DataFrame: @@ -1683,6 +1714,7 @@ def load_labels(self) -> pd.DataFrame: Uganda_NorthCEO2018(), UgandaNorthCorLabel2019(), Uganda_NorthCEO2023(), + Uganda_NorthCEO2024(), ] if __name__ == "__main__": From ecd29e975a736c0008642b0f60abb5c50667613e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:15:02 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets.py b/datasets.py index 980bae59..3ace9c41 100644 --- a/datasets.py +++ b/datasets.py @@ -635,6 +635,7 @@ def load_labels(self) -> pd.DataFrame: df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df + class Uganda_NorthCEO2024(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2024"