From bd34f48d69083223529321985cf3b8b353113718 Mon Sep 17 00:00:00 2001 From: Ooberaj Date: Tue, 29 Mar 2022 00:04:48 -0700 Subject: [PATCH] Add sugarbeet_weed_segmentation_europe dataset --- agml/_assets/public_datasources.json | 20 ++++++++ agml/_assets/source_citations.json | 4 ++ agml/_internal/preprocess.py | 70 ++++++++++++++++++++++++++-- agml/_internal/process_utils.py | 28 +++++++++++ 4 files changed, 118 insertions(+), 4 deletions(-) diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json index fa31ef876..4dd965bb3 100644 --- a/agml/_assets/public_datasources.json +++ b/agml/_assets/public_datasources.json @@ -919,5 +919,25 @@ 0.22038120031356812 ] } + }, + "sugarbeet_weed_segmentation_europe": { + "classes": { + "1": "background", + "2": "sugarbeet", + "3": "weed" + }, + "ml_task": "semantic_segmentation", + "ag_task": "weed_segmentation", + "location": { + "continent": "Europe", + "country": "Switzerland and Germany" + }, + "sensor_modality": "multispectral", + "platform": "uav", + "input_data_format": "png", + "annotation_format": "image", + "n_images": "11971", + "docs_url": "https://projects.asl.ethz.ch/datasets/doku.php?id=weedmap:remotesensing2018weedmap#dataset_summary", + "external_image_sources": ["rgb-images", "g-images", "b-images", "cir-images", "ndvi-images", "nir-images", "re-images", "binary_masks-images"] } } \ No newline at end of file diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json index d13ba9433..89083ca57 100644 --- a/agml/_assets/source_citations.json +++ b/agml/_assets/source_citations.json @@ -102,5 +102,9 @@ "plant_doc_detection": { "license": "CC BY-SA 4.0", "citation": "@inproceedings{10.1145/3371158.3371196,\n author = {Singh, Davinder and Jain, Naman and Jain, Pranjali and Kayal, Pratik and Kumawat, Sudhakar and Batra, Nipun},\n title = {PlantDoc: A Dataset for Visual Plant Disease Detection},\n year = {2020},\n isbn = {9781450377386},\n publisher = {Association for Computing Machinery},\n address = {New York, NY, USA},\n url = {https://doi.org/10.1145/3371158.3371196},\n doi = {10.1145/3371158.3371196},\n booktitle = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD},\n pages = {249–253},\n numpages = {5},\n keywords = {Deep Learning, Object Detection, Image Classification},\n location = {Hyderabad, India},\n series = {CoDS COMAD 2020}\n }" + }, + "sugarbeet_weed_segmentation_europe": { + "license": "CC BY-SA 4.0", + "citation": "@ARTICLE{weedMap-2018, \n author={I. Sa, M. Popovic, R. Khanna, Z. Chen, P. Lottes, F. Liebisch, J. Nieto, C. Stachniss, A. Walter, and R. Siegwart}, \n journal={MDPI Remote Sensing}, \n title={WeedMap: A large-scale semantic weed mapping framework using aerial multispectral imaging and deep neural network for precision farming}, \n year={2018}, \n volume={10}, \n number={9}, \n doi={doi: 10.3390/rs10091423}, \n month={Aug}}" } } \ No newline at end of file diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py index 91b6e291a..611ffae19 100644 --- a/agml/_internal/preprocess.py +++ b/agml/_internal/preprocess.py @@ -41,7 +41,7 @@ read_txt_file, get_image_info, get_label2id, convert_bbox_to_coco, get_coco_annotation_from_obj, convert_xmls_to_cocojson, mask_annotation_per_bbox, move_segmentation_dataset, - create_sub_masks, create_sub_mask_annotation_per_bbox + create_sub_masks, create_sub_mask_annotation_per_bbox, rgb2mask ) @@ -815,10 +815,72 @@ def plant_doc_detection(self, dataset_name): output_imgpath = output_img_path, extract_num_from_imgid=False ) + + def sugarbeet_weed_segmentation_europe(self, dataset_name): + dataset_dir = os.path.join(self.data_original_dir, dataset_name) + tiles_dir = os.path.join(dataset_dir, 'Tiles') + rgb_paths, r_paths, g_paths, b_paths, cir_paths, ndvi_paths, nir_paths, re_paths, binary_masks, rgb_masks = \ + ['rgb-images'], ['images'], ['g-images'], ['b-images'], ['cir-images'], \ + ['ndvi-images'], ['nir-images'], ['re-images'], ['binary_masks-images'], [] + + def getImages(root, files): + images = [] + for file in sorted(files): + unique_name = root.split('/')[-3] + file + images.append([os.path.join(root, file), unique_name]) + return images + + # Get image paths for each type of image + for root, subdirs, files in os.walk(tiles_dir): + dir_ = root.split('/')[-1] + if dir_ == 'R': + r_paths.extend(getImages(root, files)) + elif dir_ == 'G': + g_paths.extend(getImages(root, files)) + elif dir_ == 'CIR': + cir_paths.extend(getImages(root, files)) + elif dir_ == 'NDVI': + ndvi_paths.extend(getImages(root, files)) + elif dir_ == 'NIR': + nir_paths.extend(getImages(root, files)) + elif dir_ == 'RE': + re_paths.extend(getImages(root, files)) + elif dir_ == 'mask': + for file in sorted(files): + unique_name = root.split('/')[-2] + file + binary_masks.append([os.path.join(root, file), unique_name]) + elif dir_ == 'B': + b_paths.extend(getImages(root, files)) + elif dir_ == 'RGB': + rgb_paths.extend(getImages(root, files)) + elif dir_ == 'groundtruth': + for file in sorted(files): + if file.split('_')[-1] == 'color.png': + rgb_masks.append([os.path.join(root, file), file]) + + image_types = [rgb_paths, r_paths, g_paths, b_paths, cir_paths, ndvi_paths, nir_paths, re_paths, binary_masks] + + processed_dir = os.path.join(self.data_processed_dir, dataset_name) + os.makedirs(processed_dir, exist_ok = True) + processed_annotation_dir = os.path.join(processed_dir, 'annotations') + os.makedirs(processed_annotation_dir, exist_ok = True) + for image_type in image_types: + processed_image_dir = os.path.join(processed_dir, image_type[0]) + os.makedirs(processed_image_dir, exist_ok = True) + for image_path in image_type[1:]: + shutil.copyfile(image_path[0], os.path.join(processed_image_dir, image_path[1])) + color2index = { + (0, 0, 0) : 0, # black is background + (0, 255, 0) : 1, # green is sugarbeet + (0, 0, 255) : 2, # red is weed + } - - - + for rgb_mask in rgb_masks: + rgb_mask_img = cv2.imread(rgb_mask[0]) + index_mask = rgb2mask(rgb_mask_img, color2index) + mask_name = rgb_mask[1].split('_')[0] + rgb_mask[1].split('_')[1] + ".png" + anno_out = os.path.join(processed_annotation_dir, mask_name) + cv2.imwrite(anno_out, index_mask) diff --git a/agml/_internal/process_utils.py b/agml/_internal/process_utils.py index 3dc0154f5..db6653a42 100644 --- a/agml/_internal/process_utils.py +++ b/agml/_internal/process_utils.py @@ -568,3 +568,31 @@ def move_segmentation_dataset( shutil.copyfile(orig_annotation_path, out_label_path) else: annotation_preprocess_fn(orig_annotation_path, out_label_path) + +def rgb2mask(img, color2index): + ''' + Convert rgb image to mask + Arguments: + img: image with 3 channels, rbg + color2index: dictionary. key: tuple containing color values (b, g, r). value: corresponding index. + Returns: + a mask with no channels and index values assigned to each pixel + Source: https://stackoverflow.com/a/62170172 + ''' + assert len(img.shape) == 3 + height, width, ch = img.shape + assert ch == 3 + + W = np.power(256, [[0],[1],[2]]) + + img_id = img.dot(W).squeeze(-1) + values = np.unique(img_id) + + mask = np.zeros(img_id.shape) + + for i, c in enumerate(values): + try: + mask[img_id==c] = color2index[tuple(img[img_id==c][0])] + except: + pass + return mask \ No newline at end of file