From bd34f48d69083223529321985cf3b8b353113718 Mon Sep 17 00:00:00 2001
From: Ooberaj <yuvrajvirk@campus-028-163.ucdavis.edu>
Date: Tue, 29 Mar 2022 00:04:48 -0700
Subject: [PATCH] Add sugarbeet_weed_segmentation_europe dataset

---
 agml/_assets/public_datasources.json | 20 ++++++++
 agml/_assets/source_citations.json   |  4 ++
 agml/_internal/preprocess.py         | 70 ++++++++++++++++++++++++++--
 agml/_internal/process_utils.py      | 28 +++++++++++
 4 files changed, 118 insertions(+), 4 deletions(-)

diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
index fa31ef876..4dd965bb3 100644
--- a/agml/_assets/public_datasources.json
+++ b/agml/_assets/public_datasources.json
@@ -919,5 +919,25 @@
                 0.22038120031356812
             ]
         }
+    },
+    "sugarbeet_weed_segmentation_europe": {
+        "classes": {
+            "1": "background",
+            "2": "sugarbeet",
+            "3": "weed"
+        },
+        "ml_task": "semantic_segmentation",
+        "ag_task": "weed_segmentation",
+        "location": {
+            "continent": "Europe",
+            "country": "Switzerland and Germany"
+        },
+        "sensor_modality": "multispectral",
+        "platform": "uav",
+        "input_data_format": "png",
+        "annotation_format": "image",
+        "n_images": "11971",
+        "docs_url": "https://projects.asl.ethz.ch/datasets/doku.php?id=weedmap:remotesensing2018weedmap#dataset_summary",
+        "external_image_sources": ["rgb-images", "g-images", "b-images", "cir-images", "ndvi-images", "nir-images", "re-images", "binary_masks-images"]
     }
 }
\ No newline at end of file
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
index d13ba9433..89083ca57 100644
--- a/agml/_assets/source_citations.json
+++ b/agml/_assets/source_citations.json
@@ -102,5 +102,9 @@
     "plant_doc_detection": {
         "license": "CC BY-SA 4.0",
         "citation": "@inproceedings{10.1145/3371158.3371196,\n  author = {Singh, Davinder and Jain, Naman and Jain, Pranjali and Kayal, Pratik and Kumawat, Sudhakar and Batra, Nipun},\n  title = {PlantDoc: A Dataset for Visual Plant Disease Detection},\n  year = {2020},\n  isbn = {9781450377386},\n  publisher = {Association for Computing Machinery},\n  address = {New York, NY, USA},\n  url = {https://doi.org/10.1145/3371158.3371196},\n  doi = {10.1145/3371158.3371196},\n  booktitle = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD},\n  pages = {249–253},\n  numpages = {5},\n  keywords = {Deep Learning, Object Detection, Image Classification},\n  location = {Hyderabad, India},\n  series = {CoDS COMAD 2020}\n  }"
+    },
+    "sugarbeet_weed_segmentation_europe": {
+        "license": "CC BY-SA 4.0",
+        "citation": "@ARTICLE{weedMap-2018, \n  author={I. Sa, M. Popovic, R. Khanna, Z. Chen, P. Lottes, F. Liebisch, J. Nieto, C. Stachniss, A. Walter, and R. Siegwart}, \n  journal={MDPI Remote Sensing}, \n  title={WeedMap: A large-scale semantic weed mapping framework using aerial multispectral imaging and deep neural network for precision farming}, \n  year={2018}, \n  volume={10}, \n  number={9}, \n  doi={doi: 10.3390/rs10091423}, \n  month={Aug}}"
     }
 }
\ No newline at end of file
diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py
index 91b6e291a..611ffae19 100644
--- a/agml/_internal/preprocess.py
+++ b/agml/_internal/preprocess.py
@@ -41,7 +41,7 @@
     read_txt_file, get_image_info, get_label2id,
     convert_bbox_to_coco, get_coco_annotation_from_obj, convert_xmls_to_cocojson,
     mask_annotation_per_bbox, move_segmentation_dataset,
-    create_sub_masks, create_sub_mask_annotation_per_bbox
+    create_sub_masks, create_sub_mask_annotation_per_bbox, rgb2mask
 )
 
 
@@ -815,10 +815,72 @@ def plant_doc_detection(self, dataset_name):
             output_imgpath = output_img_path,
             extract_num_from_imgid=False
         )
+        
+    def sugarbeet_weed_segmentation_europe(self, dataset_name):
+        dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+        tiles_dir = os.path.join(dataset_dir, 'Tiles')
+        rgb_paths, r_paths, g_paths, b_paths, cir_paths, ndvi_paths, nir_paths, re_paths, binary_masks, rgb_masks = \
+        ['rgb-images'], ['images'], ['g-images'], ['b-images'], ['cir-images'], \
+        ['ndvi-images'], ['nir-images'], ['re-images'], ['binary_masks-images'], []
+
+        def getImages(root, files):
+          images = []
+          for file in sorted(files):
+              unique_name = root.split('/')[-3] + file
+              images.append([os.path.join(root, file), unique_name])
+          return images
+
+        # Get image paths for each type of image
+        for root, subdirs, files in os.walk(tiles_dir):
+          dir_ = root.split('/')[-1]
+          if dir_ == 'R':
+            r_paths.extend(getImages(root, files))
+          elif dir_ == 'G':
+            g_paths.extend(getImages(root, files))
+          elif dir_ == 'CIR':
+            cir_paths.extend(getImages(root, files))
+          elif dir_ == 'NDVI':
+            ndvi_paths.extend(getImages(root, files))
+          elif dir_ == 'NIR':
+            nir_paths.extend(getImages(root, files))
+          elif dir_ == 'RE':
+            re_paths.extend(getImages(root, files))
+          elif dir_ == 'mask':
+            for file in sorted(files):
+              unique_name = root.split('/')[-2] + file
+              binary_masks.append([os.path.join(root, file), unique_name])
+          elif dir_ == 'B':
+            b_paths.extend(getImages(root, files))
+          elif dir_ == 'RGB':
+            rgb_paths.extend(getImages(root, files))
+          elif dir_ == 'groundtruth':
+            for file in sorted(files):
+              if file.split('_')[-1] == 'color.png':
+                rgb_masks.append([os.path.join(root, file), file])
+
+        image_types = [rgb_paths, r_paths, g_paths, b_paths, cir_paths, ndvi_paths, nir_paths, re_paths, binary_masks]
+            
+        processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+        os.makedirs(processed_dir, exist_ok = True)
+        processed_annotation_dir = os.path.join(processed_dir, 'annotations')
+        os.makedirs(processed_annotation_dir, exist_ok = True)
 
+        for image_type in image_types:
+          processed_image_dir = os.path.join(processed_dir, image_type[0])
+          os.makedirs(processed_image_dir, exist_ok = True)
+          for image_path in image_type[1:]:
+            shutil.copyfile(image_path[0], os.path.join(processed_image_dir, image_path[1]))
 
+        color2index = {
+            (0, 0, 0) : 0, # black is background
+            (0, 255, 0) : 1, # green is sugarbeet
+            (0, 0, 255) : 2, # red is weed
+        }
 
-
-
-
+        for rgb_mask in rgb_masks:
+          rgb_mask_img = cv2.imread(rgb_mask[0])
+          index_mask = rgb2mask(rgb_mask_img, color2index)
+          mask_name = rgb_mask[1].split('_')[0] + rgb_mask[1].split('_')[1] + ".png"
+          anno_out = os.path.join(processed_annotation_dir, mask_name)
+          cv2.imwrite(anno_out, index_mask)
 
diff --git a/agml/_internal/process_utils.py b/agml/_internal/process_utils.py
index 3dc0154f5..db6653a42 100644
--- a/agml/_internal/process_utils.py
+++ b/agml/_internal/process_utils.py
@@ -568,3 +568,31 @@ def move_segmentation_dataset(
             shutil.copyfile(orig_annotation_path, out_label_path)
         else:
             annotation_preprocess_fn(orig_annotation_path, out_label_path)
+
+def rgb2mask(img, color2index):
+    '''
+    Convert rgb image to mask
+    Arguments:
+        img: image with 3 channels, rbg
+        color2index: dictionary. key: tuple containing color values (b, g, r). value: corresponding index.
+    Returns:
+        a mask with no channels and index values assigned to each pixel
+    Source: https://stackoverflow.com/a/62170172
+    '''
+    assert len(img.shape) == 3
+    height, width, ch = img.shape
+    assert ch == 3
+
+    W = np.power(256, [[0],[1],[2]])
+
+    img_id = img.dot(W).squeeze(-1) 
+    values = np.unique(img_id)
+
+    mask = np.zeros(img_id.shape)
+
+    for i, c in enumerate(values):
+        try:
+            mask[img_id==c] = color2index[tuple(img[img_id==c][0])] 
+        except:
+            pass
+    return mask
\ No newline at end of file