From 744949698cf6b900d02d7cdda697032eb6b10afa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 27 Mar 2025 00:45:06 +0100 Subject: [PATCH 1/3] Changing CSV annotations uploads and fixing sources delete --- bigmler/delete/dispatcher.py | 6 ++- bigmler/processing/annotations.py | 81 ++++++++----------------------- bigmler/processing/args.py | 3 +- bigmler/resourcesapi/sources.py | 16 ++---- bigmler/utils.py | 2 +- 5 files changed, 30 insertions(+), 78 deletions(-) diff --git a/bigmler/delete/dispatcher.py b/bigmler/delete/dispatcher.py index 844d325d..4b8b033a 100644 --- a/bigmler/delete/dispatcher.py +++ b/bigmler/delete/dispatcher.py @@ -346,7 +346,6 @@ def delete_resources(command_args, api, deleted_list=None, step=0): res_type for res_type in resource_types if res_type in types] else: command_args.resource_types_ = types - if command_args.resource_types_: delete_list = [] # by ids @@ -479,6 +478,11 @@ def delete_resources(command_args, api, deleted_list=None, step=0): deleted_list.extend(delete_list) delete_resources(command_args, api, deleted_list=deleted_list, step=step + 1) + else: + # no resources to delete in that group. Try the next + if step < 2: + delete_resources(command_args, api, deleted_list=deleted_list, + step=step + 1) if step == 2 and deleted_list: u.print_generated_files(path, log_file=session_file, diff --git a/bigmler/processing/annotations.py b/bigmler/processing/annotations.py index 4b1aa37e..5d97f6cd 100644 --- a/bigmler/processing/annotations.py +++ b/bigmler/processing/annotations.py @@ -27,7 +27,6 @@ import csv -from PIL import Image from zipfile import ZipFile import cv2 @@ -42,8 +41,6 @@ FILE_ATTR = "file" BBOXES_ATTR = "boxes" -REGION_FIELD_SEPARATOR = "] [" -REGION_FIELD_JSON_SEPARATOR = "],[" def relative_path(base_dir, absolute_path): @@ -177,7 +174,6 @@ def bigml_coco_file(args, session_file): MSCOCO to the format accepted by BigML """ - if args.annotations_file is not None: args.original_annotations_file = args.annotations_file args.annotations_file = os.path.join(args.output_dir, "annotations.json") @@ -196,7 +192,7 @@ def bigml_coco_file(args, session_file): args.annotations_field = BBOXES_ATTR return bigml_metadata(args, images_list=filenames, - new_fields=[{"name": BBOXES_ATTR, + new_fields=[{"name": args.annotations_field, "optype": "regions"}]) @@ -358,7 +354,7 @@ def yolo_to_cocojson(yolo_dir, args, session_file): one_image_dict = { ## possible args options for full path or basename FILE_ATTR: image_filename_base, - BBOXES_ATTR: [] + args.annotations_field: [] } ## yolo format - (label, xc, yc, width, height) @@ -488,7 +484,7 @@ def yolo_to_cocojson(yolo_dir, args, session_file): 'xmax': x_max, 'ymax': y_max } - one_image_dict[BBOXES_ATTR].append(annotation) + one_image_dict[args.annotations_field].append(annotation) # output_json_dict['annotations'].append(one_image_dict) output_json_array.append(one_image_dict) @@ -571,13 +567,13 @@ def voc_to_cocojson(voc_dir, args, session_file): one_image_dict = { ## possible args options for full path or basename FILE_ATTR: image_filename_base, - BBOXES_ATTR: [] + args.annotations_field: [] } for obj in annotation_root.findall('object'): annotation, warnings = get_coco_annotation_from_object( \ obj, filename, logfile, warnings) - one_image_dict[BBOXES_ATTR].append(annotation) + one_image_dict[args.annotations_field].append(annotation) bndbox_id = bndbox_id + 1 output_json_array.append(one_image_dict) @@ -636,7 +632,8 @@ def mscoco_to_cocojson(mscoco_file, args, session_file): # Extracting the file_name and id into a dict images = dict([image['id'], - { FILE_ATTR: image['file_name'], BBOXES_ATTR: [] }] + { FILE_ATTR: image['file_name'], + args.annotations_field: [] }] for image in data['images'] if image['file_name'] in filenames) if data.get("categories") and data['categories'][0].get("name"): @@ -648,7 +645,7 @@ def mscoco_to_cocojson(mscoco_file, args, session_file): # Adding the regions data if data.get('annotations'): for annotation in data['annotations']: - images[annotation["image_id"]]["boxes"].append({ + images[annotation["image_id"]][args.annotations_field].append({ "label": labels[annotation['category_id']]['name'], "xmin": int(annotation["bbox"][0]), "ymin": int(annotation["bbox"][1]), @@ -657,7 +654,8 @@ def mscoco_to_cocojson(mscoco_file, args, session_file): }) if labels[annotation['category_id']]['super']: - images[annotation["image_id"]]["boxes"].append({ + images[annotation["image_id"]][ + args.annotations_field].append({ "label": labels[annotation['category_id']]['super'], "xmin": int(annotation["bbox"][0]), "ymin": int(annotation["bbox"][1]), @@ -683,8 +681,10 @@ def mscoco_to_cocojson(mscoco_file, args, session_file): filenames] -def expand_regions(data, regions_field): - """Expanding the regions information as exported in a CSV from a dataset""" +def transform_regions(data, regions_field): + """Escaping quotes in regions labels as exported in a CSV from a dataset + and setting the expected attributes. + """ annotations = {} for record in data: @@ -693,33 +693,10 @@ def expand_regions(data, regions_field): if regions != "": annotations[filename] = [] boxes = [] - regions = regions.replace(REGION_FIELD_SEPARATOR, - REGION_FIELD_JSON_SEPARATOR) - # includes scientific notation. E.g. - # [["label" 0.0 6.262755E-4 7.608954E-5 7.238262E-4]] - regions = re.sub(r'(.+?) (\d+?\.?\d*?E\-\d+) (.+?)', '\\1,\\2,\\3', - regions) - regions = re.sub(r'(.+?) (\d+?\.?\d*?E\-\d+),', '\\1,\\2,', regions) - regions = re.sub(r'(.+?) (\d+?\.?\d*?E\-\d+)]', '\\1,\\2]', - regions) - regions = re.sub(r'(.+?) (\d+?\.?\d*?) (.+?)', '\\1,\\2,\\3', - regions) - regions = re.sub(r'(.+?) (\d+?\.?\d*?),', '\\1,\\2,', regions) - regions = re.sub(r'(.+?) (\d+?\.?\d*?)]', '\\1,\\2]', - regions) - regions_list = json.loads(regions) - label_components = len(regions_list) - for region_index, region in enumerate(regions_list): - annotation = {"label": region[0], - "xmin": float(region[1]), - "ymin": float(region[2]), - "xmax": float(region[3]), - "ymax": float(region[4])} - if len(region) > 5: - annotation.update({"score": float(region[5])}) - boxes.append(annotation) - annotations[filename] = {FILE_ATTR: filename, BBOXES_ATTR: boxes} - + # we keep the compact format, but scape quotes + regions = re.sub(r'""', '\\"', regions) + annotations[filename] = {FILE_ATTR: filename, + regions_field: regions} return annotations @@ -763,8 +740,7 @@ def csv_to_cocojson(csv_file, args, session_file): filenames = [os.path.relpath(path, start=args.images_dir) for path in paths] - annotations = expand_regions( - data, args.annotations_field or BBOXES_ATTR) + annotations = transform_regions(data, args.annotations_field) annotated_images = list(annotations.keys()) annotation_boxes = list(annotations.values()) @@ -776,25 +752,6 @@ def csv_to_cocojson(csv_file, args, session_file): sys.exit(f"Failed to find the annotated file {image} in" f" {args.images_dir}.") - for boxes in annotation_boxes: - filename = boxes["file"].replace("/", os.path.sep) - path = paths[filenames.index(filename)] - try: - img = Image.open(path) - except ValueError: - sys.exit(f"Failed to find the annotated file: {path}.") - - width, height = img.size - for index, box in enumerate(boxes["boxes"]): - boxes["boxes"][index].update( - {"xmin": int(round(box["xmin"] * width, 0))}) - boxes["boxes"][index].update( - {"ymin": int(round(box["ymin"] * height, 0))}) - boxes["boxes"][index].update( - {"xmax": int(round(box["xmax"] * width, 0))}) - boxes["boxes"][index].update( - {"ymax": int(round(box["ymax"] * height, 0))}) - if warnings > 0: message = f"\nThere are {warnings} warnings, " \ f"see the log file {logfile_name}\n" diff --git a/bigmler/processing/args.py b/bigmler/processing/args.py index f0abbed0..6c146c37 100644 --- a/bigmler/processing/args.py +++ b/bigmler/processing/args.py @@ -312,7 +312,8 @@ def get_api_instance(command_args, storage_path): 'api_key': command_args.api_key, 'debug': command_args.debug, 'storage': retrieve_dir}) - + if command_args.debug: + print(api_command_args) return command_args.api_ diff --git a/bigmler/resourcesapi/sources.py b/bigmler/resourcesapi/sources.py index 1bb56db6..234f36f8 100644 --- a/bigmler/resourcesapi/sources.py +++ b/bigmler/resourcesapi/sources.py @@ -113,7 +113,8 @@ def set_source_args(args, name=None, multi_label_data=None, "delete_sources_": "delete_sources", "sources_": "sources"} set_config_attrs(args, exclusive_attrs, - source_args, attr_aliases=attr_aliases, exclusive=True) + source_args, attr_aliases=attr_aliases, + exclusive=True) row_attrs = ["row_components", "row_indices", "row_values"] set_config_attrs(args, row_attrs, source_args) @@ -132,11 +133,6 @@ def set_source_args(args, name=None, multi_label_data=None, fields_struct = fields.new_fields_structure(args.import_fields) check_fields_struct(fields_struct, "source") update_attributes(source_args, fields_struct) - if args.annotations_field: - update_attributes(source_args, - {"fields": {"boxes": { - "name": args.annotations_field}}}, - fields=fields) if 'source' in args.json_args: update_json_args(source_args, args.json_args.get('source'), fields) return source_args @@ -243,7 +239,7 @@ def data_to_source(args): """ data_set = None data_set_header = None - if (args.training_set and not args.source and not + if (args.training_set and (not args.source or args.annotations_file) and not (hasattr(args, "dataset") and args.dataset) and not args.has_models_): data_set = args.training_set @@ -310,12 +306,6 @@ def update_source(source, source_args, args, log_message(message, log_file=session_file, console=args.verbosity) log_message("%s\n" % source["object"]["resource"], log_file=log) - if args.annotations_file and args.images_file: - source = api.update_composite_annotations( - source, args.images_file, args.annotations_file, - new_fields=None, - source_changes=source_args) - source = api.update_source(source, source_args) check_resource_error(source, "Failed to update source: ") source = check_resource(source, api.get_source) diff --git a/bigmler/utils.py b/bigmler/utils.py index 37591427..f1507152 100644 --- a/bigmler/utils.py +++ b/bigmler/utils.py @@ -60,7 +60,7 @@ # resources that can have other resources as components COMPOSED_RESOURCES = [ "cluster", "ensemble", "fusion", "composites", "optiml", - "evaluation", "timeseries"] + "evaluation", "timeseries", "source"] def read_description(path): From 5ae35f1800e5a6504ab4f3c0c0777ae6912acdc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 27 Mar 2025 00:58:49 +0100 Subject: [PATCH 2/3] Updating version and bindings bump --- HISTORY.rst | 6 ++++++ README.rst | 2 +- bigmler/__init__.py | 2 +- docs/index.rst | 2 +- setup.py | 11 +++++------ 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 256dc0a5..b08b4af3 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +5.10.2 (2025-03-27) +~~~~~~~~~~~~~~~~~~~ + +- Improving annotations updates to avoid temporary failures. +- Fixing composite source deletes. + 5.10.1 (2025-03-11) ~~~~~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index d40552d4..40258dc2 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Requirements BigMLer needs Python 3.8 or higher versions to work. Compatibility with Python 2.X was discontinued in version 3.27.2. -BigMLer requires `bigml 9.8.1 `_ or +BigMLer requires `bigml 9.8.2 `_ or higher, that contains the bindings providing support to use the ``BigML`` platform to create, update, get and delete resources, but also to produce local predictions using the diff --git a/bigmler/__init__.py b/bigmler/__init__.py index b58764c7..cdd32d86 100644 --- a/bigmler/__init__.py +++ b/bigmler/__init__.py @@ -1,2 +1,2 @@ # -*- coding: utf-8 -*- -__version__ = '5.10.1' +__version__ = '5.10.2' diff --git a/docs/index.rst b/docs/index.rst index 20dd8897..bb3ed927 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -70,7 +70,7 @@ Requirements BigMLer needs Python 3.8 or higher versions to work. Compatibility with Python 2.X was discontinued in version 3.27.2. -BigMLer requires `bigml 9.8.1 `_ or +BigMLer requires `bigml 9.8.2 `_ or higher, that contains the bindings providing support to use the ``BigML`` platform to create, update, get and delete resources, but also to produce local predictions using the diff --git a/setup.py b/setup.py index 7781076e..b86a7f95 100644 --- a/setup.py +++ b/setup.py @@ -59,14 +59,13 @@ 'bigmler.linearregression', 'bigmler.pca', 'bigmler.fusion', 'bigmler.dataset', 'bigmler.externalconnector', 'bigmler.export.out_tree', 'bigmler.source'], - install_requires=['bigml>=9.8.1, <10.0.0', + install_requires=['bigml>=9.8.2, <10.0.0', 'jsonschema>=2.6.0', 'nbformat>=4.4.0', - 'opencv-python>=4.5.3', - 'pillow'], - extras_require={"images": "bigml[images]>=9.8.1, <10.0.0", - "topics": "bigml[topics]>=9.8.1, <10.0.0", - "full": "bigml[full]>=9.8.1, <10.0.0"}, + 'opencv-python>=4.5.3'], + extras_require={"images": "bigml[images]>=9.8.2, <10.0.0", + "topics": "bigml[topics]>=9.8.2, <10.0.0", + "full": "bigml[full]>=9.8.2, <10.0.0"}, package_data={'bigmler':['static/*.json', 'static/*.html', 'static/out_model/*', From 445c29f1f2c0cfa450b7fd6711e8356983041e8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Merc=C3=A8=20Mart=C3=ADn=20Prats?= Date: Thu, 27 Mar 2025 23:31:58 +0100 Subject: [PATCH 3/3] Fixing annotations field reference name --- README.rst | 2 +- bigmler/processing/annotations.py | 10 +++++++++- docs/index.rst | 2 +- setup.py | 8 ++++---- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 40258dc2..79ee8793 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ Requirements BigMLer needs Python 3.8 or higher versions to work. Compatibility with Python 2.X was discontinued in version 3.27.2. -BigMLer requires `bigml 9.8.2 `_ or +BigMLer requires `bigml 9.8.3 `_ or higher, that contains the bindings providing support to use the ``BigML`` platform to create, update, get and delete resources, but also to produce local predictions using the diff --git a/bigmler/processing/annotations.py b/bigmler/processing/annotations.py index 5d97f6cd..4507563d 100644 --- a/bigmler/processing/annotations.py +++ b/bigmler/processing/annotations.py @@ -268,6 +268,8 @@ def yolo_to_cocojson(yolo_dir, args, session_file): output_json_array = [] filenames = [] + if args.annotations_field is None: + args.annotations_field = BBOXES_ATTR logfile_name = args.annotations_file + ".log" with open(logfile_name, "w") as logfile: @@ -519,6 +521,9 @@ def voc_to_cocojson(voc_dir, args, session_file): filenames = [] annotation_file_list = [] + if args.annotations_field is None: + args.annotations_field = BBOXES_ATTR + for file in os.listdir(voc_dir): if file.endswith(".xml"): annotation_file_list.append(os.path.join(voc_dir, file)) @@ -563,7 +568,7 @@ def voc_to_cocojson(voc_dir, args, session_file): ## possible args options for full path or basename logfile.write("converting for: " + filename + "\n") logfile.write("taking as filename: " + image_filename_base + "\n") - + one_image_dict = { ## possible args options for full path or basename FILE_ATTR: image_filename_base, @@ -630,6 +635,9 @@ def mscoco_to_cocojson(mscoco_file, args, session_file): IMAGE_EXTENSIONS] filenames = [os.path.basename(path) for path in paths] + if args.annotations_field is None: + args.annotations_field = BBOXES_ATTR + # Extracting the file_name and id into a dict images = dict([image['id'], { FILE_ATTR: image['file_name'], diff --git a/docs/index.rst b/docs/index.rst index bb3ed927..0a2fd106 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -70,7 +70,7 @@ Requirements BigMLer needs Python 3.8 or higher versions to work. Compatibility with Python 2.X was discontinued in version 3.27.2. -BigMLer requires `bigml 9.8.2 `_ or +BigMLer requires `bigml 9.8.3 `_ or higher, that contains the bindings providing support to use the ``BigML`` platform to create, update, get and delete resources, but also to produce local predictions using the diff --git a/setup.py b/setup.py index b86a7f95..35f150a5 100644 --- a/setup.py +++ b/setup.py @@ -59,13 +59,13 @@ 'bigmler.linearregression', 'bigmler.pca', 'bigmler.fusion', 'bigmler.dataset', 'bigmler.externalconnector', 'bigmler.export.out_tree', 'bigmler.source'], - install_requires=['bigml>=9.8.2, <10.0.0', + install_requires=['bigml>=9.8.3, <10.0.0', 'jsonschema>=2.6.0', 'nbformat>=4.4.0', 'opencv-python>=4.5.3'], - extras_require={"images": "bigml[images]>=9.8.2, <10.0.0", - "topics": "bigml[topics]>=9.8.2, <10.0.0", - "full": "bigml[full]>=9.8.2, <10.0.0"}, + extras_require={"images": "bigml[images]>=9.8.3, <10.0.0", + "topics": "bigml[topics]>=9.8.3, <10.0.0", + "full": "bigml[full]>=9.8.3, <10.0.0"}, package_data={'bigmler':['static/*.json', 'static/*.html', 'static/out_model/*',