From d083629e615e928bf021b7573147a2e17a6c4307 Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Wed, 20 Aug 2025 12:34:41 -0400 Subject: [PATCH 1/3] Added Uganda North 2024 dataset --- data/raw.dvc | 6 +++--- datasets.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index 1c473283..d34c5cbc 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: ac9574651c4522bf6fb2b9b80cd3ac88.dir - size: 447837070 - nfiles: 415 +- md5: 2c38b4cf61ebe96c70d6a88e5d75708f.dir + size: 448189015 + nfiles: 417 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 05ccec15..33310cb6 100644 --- a/datasets.py +++ b/datasets.py @@ -634,6 +634,37 @@ def load_labels(self) -> pd.DataFrame: df[START], df[END] = date(2023, 1, 1), date(2024, 12, 31) df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df + +class Uganda_NorthCEO2024(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2024" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2024---Feb-2025-(Set-1)-sample-data-2025-08-19.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2024---Feb-2025-(Set-2)-sample-data-2025-08-19.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2024, 1, 1), date(2025, 4, 30) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df class Uganda_NorthCEO2021(LabeledDataset): @@ -1683,6 +1714,7 @@ def load_labels(self) -> pd.DataFrame: Uganda_NorthCEO2018(), UgandaNorthCorLabel2019(), Uganda_NorthCEO2023(), + Uganda_NorthCEO2024(), ] if __name__ == "__main__": From 4036b2ae30b32490ad8934285b9fbb1c3b15cffe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:37:23 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datasets.py b/datasets.py index 33310cb6..3ace9c41 100644 --- a/datasets.py +++ b/datasets.py @@ -634,7 +634,8 @@ def load_labels(self) -> pd.DataFrame: df[START], df[END] = date(2023, 1, 1), date(2024, 12, 31) df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df - + + class Uganda_NorthCEO2024(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2024" From f40022be3f245ba0e6ff7aa3403be99f773f4305 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Wed, 20 Aug 2025 16:38:23 +0000 Subject: [PATCH 3/3] Automated dataset updates --- data/datasets.dvc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index d8577b78..05adc98b 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: f970b4180d74216cbbd2b58d15ccc45d.dir - size: 775527775 - nfiles: 66 +- md5: a56f9d5d558d11227f5f46a9bcf8104f.dir + size: 775656745 + nfiles: 67 path: datasets hash: md5