diff --git a/data/datasets.dvc b/data/datasets.dvc index d8577b78..05adc98b 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: f970b4180d74216cbbd2b58d15ccc45d.dir - size: 775527775 - nfiles: 66 +- md5: a56f9d5d558d11227f5f46a9bcf8104f.dir + size: 775656745 + nfiles: 67 path: datasets hash: md5 diff --git a/data/raw.dvc b/data/raw.dvc index 1c473283..d34c5cbc 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: ac9574651c4522bf6fb2b9b80cd3ac88.dir - size: 447837070 - nfiles: 415 +- md5: 2c38b4cf61ebe96c70d6a88e5d75708f.dir + size: 448189015 + nfiles: 417 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 05ccec15..3ace9c41 100644 --- a/datasets.py +++ b/datasets.py @@ -636,6 +636,38 @@ def load_labels(self) -> pd.DataFrame: return df +class Uganda_NorthCEO2024(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2024" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2024---Feb-2025-(Set-1)-sample-data-2025-08-19.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2024---Feb-2025-(Set-2)-sample-data-2025-08-19.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2024, 1, 1), date(2025, 4, 30) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df + + class Uganda_NorthCEO2021(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2021" @@ -1683,6 +1715,7 @@ def load_labels(self) -> pd.DataFrame: Uganda_NorthCEO2018(), UgandaNorthCorLabel2019(), Uganda_NorthCEO2023(), + Uganda_NorthCEO2024(), ] if __name__ == "__main__":