bigmlcom · jaor · Mar 28, 2025 · Mar 26, 2025 · Mar 26, 2025 · Mar 27, 2025
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,12 @@
 History
 -------
 
+5.10.2 (2025-03-27)
+~~~~~~~~~~~~~~~~~~~
+
+- Improving annotations updates to avoid temporary failures.
+- Fixing composite source deletes.
+
 5.10.1 (2025-03-11)
 ~~~~~~~~~~~~~~~~~~~
 

diff --git a/README.rst b/README.rst
@@ -20,7 +20,7 @@ Requirements
 BigMLer needs Python 3.8 or higher versions to work.
 Compatibility with Python 2.X was discontinued in version 3.27.2.
 
-BigMLer requires `bigml 9.8.1 <https://github.com/bigmlcom/python>`_  or
+BigMLer requires `bigml 9.8.3 <https://github.com/bigmlcom/python>`_  or
 higher, that contains the bindings providing support to use the ``BigML``
 platform to create, update, get and delete resources,
 but also to produce local predictions using the

diff --git a/bigmler/__init__.py b/bigmler/__init__.py
@@ -1,2 +1,2 @@
 # -*- coding: utf-8 -*-
-__version__ = '5.10.1'
+__version__ = '5.10.2'
diff --git a/bigmler/delete/dispatcher.py b/bigmler/delete/dispatcher.py
@@ -346,7 +346,6 @@ def delete_resources(command_args, api, deleted_list=None, step=0):
                 res_type for res_type in resource_types if res_type in types]
     else:
         command_args.resource_types_ = types
-
     if command_args.resource_types_:
         delete_list = []
         # by ids
@@ -479,6 +478,11 @@ def delete_resources(command_args, api, deleted_list=None, step=0):
             deleted_list.extend(delete_list)
             delete_resources(command_args, api, deleted_list=deleted_list,
                              step=step + 1)
+    else:
+        # no resources to delete in that group. Try the next
+        if step < 2:
+            delete_resources(command_args, api, deleted_list=deleted_list,
+                             step=step + 1)
 
     if step == 2 and deleted_list:
         u.print_generated_files(path, log_file=session_file,

diff --git a/bigmler/processing/annotations.py b/bigmler/processing/annotations.py
@@ -27,7 +27,6 @@
 import csv
 
 
-from PIL import Image
 from zipfile import ZipFile
 
 import cv2
@@ -42,8 +41,6 @@
 
 FILE_ATTR = "file"
 BBOXES_ATTR = "boxes"
-REGION_FIELD_SEPARATOR = "] ["
-REGION_FIELD_JSON_SEPARATOR = "],["
 
 
 def relative_path(base_dir, absolute_path):
@@ -177,7 +174,6 @@ def bigml_coco_file(args, session_file):
     MSCOCO to the format accepted by BigML
 
     """
-
     if args.annotations_file is not None:
         args.original_annotations_file = args.annotations_file
     args.annotations_file = os.path.join(args.output_dir, "annotations.json")
@@ -196,7 +192,7 @@ def bigml_coco_file(args, session_file):
         args.annotations_field = BBOXES_ATTR
 
     return bigml_metadata(args, images_list=filenames,
-                          new_fields=[{"name": BBOXES_ATTR,
+                          new_fields=[{"name": args.annotations_field,
                                        "optype": "regions"}])
 
 
@@ -272,6 +268,8 @@ def yolo_to_cocojson(yolo_dir, args, session_file):
     output_json_array = []
 
     filenames = []
+    if args.annotations_field is None:
+        args.annotations_field = BBOXES_ATTR
 
     logfile_name = args.annotations_file + ".log"
     with open(logfile_name, "w") as logfile:
@@ -358,7 +356,7 @@ def yolo_to_cocojson(yolo_dir, args, session_file):
                 one_image_dict = {
                     ## possible args options for full path or basename
                     FILE_ATTR: image_filename_base,
-                    BBOXES_ATTR: []
+                    args.annotations_field: []
                 }
 
                 ## yolo format - (label, xc, yc, width, height)
@@ -488,7 +486,7 @@ def yolo_to_cocojson(yolo_dir, args, session_file):
                             'xmax': x_max,
                             'ymax': y_max
                             }
-                    one_image_dict[BBOXES_ATTR].append(annotation)
+                    one_image_dict[args.annotations_field].append(annotation)
 
                 # output_json_dict['annotations'].append(one_image_dict)
                 output_json_array.append(one_image_dict)
@@ -523,6 +521,9 @@ def voc_to_cocojson(voc_dir, args, session_file):
     filenames = []
 
     annotation_file_list = []
+    if args.annotations_field is None:
+        args.annotations_field = BBOXES_ATTR
+
     for file in os.listdir(voc_dir):
         if file.endswith(".xml"):
             annotation_file_list.append(os.path.join(voc_dir, file))
@@ -567,17 +568,17 @@ def voc_to_cocojson(voc_dir, args, session_file):
                 ## possible args options for full path or basename
                 logfile.write("converting for: " + filename + "\n")
                 logfile.write("taking as filename: " + image_filename_base + "\n")
-
+                
                 one_image_dict = {
                     ## possible args options for full path or basename
                     FILE_ATTR: image_filename_base,
-                    BBOXES_ATTR: []
+                    args.annotations_field: []
                 }
 
                 for obj in annotation_root.findall('object'):
                     annotation, warnings = get_coco_annotation_from_object( \
                         obj, filename, logfile, warnings)
-                    one_image_dict[BBOXES_ATTR].append(annotation)
+                    one_image_dict[args.annotations_field].append(annotation)
                     bndbox_id = bndbox_id + 1
 
                 output_json_array.append(one_image_dict)
@@ -634,9 +635,13 @@ def mscoco_to_cocojson(mscoco_file, args, session_file):
                      IMAGE_EXTENSIONS]
             filenames = [os.path.basename(path) for path in paths]
 
+        if args.annotations_field is None:
+            args.annotations_field = BBOXES_ATTR
+
         # Extracting the file_name and id into a dict
         images = dict([image['id'],
-                       { FILE_ATTR: image['file_name'], BBOXES_ATTR: [] }]
+                       { FILE_ATTR: image['file_name'],
+                         args.annotations_field: [] }]
                       for image in data['images'] if image['file_name'] in
                       filenames)
         if data.get("categories") and data['categories'][0].get("name"):
@@ -648,7 +653,7 @@ def mscoco_to_cocojson(mscoco_file, args, session_file):
         # Adding the regions data
         if data.get('annotations'):
             for annotation in data['annotations']:
-                images[annotation["image_id"]]["boxes"].append({
+                images[annotation["image_id"]][args.annotations_field].append({
                     "label": labels[annotation['category_id']]['name'],
                     "xmin": int(annotation["bbox"][0]),
                     "ymin": int(annotation["bbox"][1]),
@@ -657,7 +662,8 @@ def mscoco_to_cocojson(mscoco_file, args, session_file):
                 })
 
                 if labels[annotation['category_id']]['super']:
-                    images[annotation["image_id"]]["boxes"].append({
+                    images[annotation["image_id"]][
+                        args.annotations_field].append({
                         "label": labels[annotation['category_id']]['super'],
                         "xmin": int(annotation["bbox"][0]),
                         "ymin": int(annotation["bbox"][1]),
@@ -683,8 +689,10 @@ def mscoco_to_cocojson(mscoco_file, args, session_file):
         filenames]
 
 
-def expand_regions(data, regions_field):
-    """Expanding the regions information as exported in a CSV from a dataset"""
+def transform_regions(data, regions_field):
+    """Escaping quotes in regions labels as exported in a CSV from a dataset
+    and setting the expected attributes.
+    """
 
     annotations = {}
     for record in data:
@@ -693,33 +701,10 @@ def expand_regions(data, regions_field):
         if regions != "":
             annotations[filename] = []
             boxes = []
-            regions = regions.replace(REGION_FIELD_SEPARATOR,
-                                      REGION_FIELD_JSON_SEPARATOR)
-            # includes scientific notation. E.g.
-            # [["label" 0.0 6.262755E-4 7.608954E-5 7.238262E-4]]
-            regions = re.sub(r'(.+?) (\d+?\.?\d*?E\-\d+) (.+?)', '\\1,\\2,\\3',
-                             regions)
-            regions = re.sub(r'(.+?) (\d+?\.?\d*?E\-\d+),', '\\1,\\2,', regions)
-            regions = re.sub(r'(.+?) (\d+?\.?\d*?E\-\d+)]', '\\1,\\2]',
-                             regions)
-            regions = re.sub(r'(.+?) (\d+?\.?\d*?) (.+?)', '\\1,\\2,\\3',
-                             regions)
-            regions = re.sub(r'(.+?) (\d+?\.?\d*?),', '\\1,\\2,', regions)
-            regions = re.sub(r'(.+?) (\d+?\.?\d*?)]', '\\1,\\2]',
-                             regions)
-            regions_list = json.loads(regions)
-            label_components = len(regions_list)
-            for region_index, region in enumerate(regions_list):
-                annotation = {"label": region[0],
-                              "xmin": float(region[1]),
-                              "ymin": float(region[2]),
-                              "xmax": float(region[3]),
-                              "ymax": float(region[4])}
-                if len(region) > 5:
-                    annotation.update({"score": float(region[5])})
-                boxes.append(annotation)
-            annotations[filename] = {FILE_ATTR: filename, BBOXES_ATTR: boxes}
-
+            # we keep the compact format, but scape quotes
+            regions = re.sub(r'""', '\\"', regions)
+            annotations[filename] = {FILE_ATTR: filename,
+                                     regions_field: regions}
     return annotations
 
 
@@ -763,8 +748,7 @@ def csv_to_cocojson(csv_file, args, session_file):
             filenames = [os.path.relpath(path, start=args.images_dir)
                          for path in paths]
 
-        annotations = expand_regions(
-            data, args.annotations_field or BBOXES_ATTR)
+        annotations = transform_regions(data, args.annotations_field)
 
         annotated_images = list(annotations.keys())
         annotation_boxes = list(annotations.values())
@@ -776,25 +760,6 @@ def csv_to_cocojson(csv_file, args, session_file):
                 sys.exit(f"Failed to find the annotated file {image} in"
                          f" {args.images_dir}.")
 
-        for boxes in annotation_boxes:
-            filename = boxes["file"].replace("/", os.path.sep)
-            path = paths[filenames.index(filename)]
-            try:
-                img = Image.open(path)
-            except ValueError:
-                sys.exit(f"Failed to find the annotated file: {path}.")
-
-            width, height = img.size
-            for index, box in enumerate(boxes["boxes"]):
-                boxes["boxes"][index].update(
-                    {"xmin": int(round(box["xmin"] * width, 0))})
-                boxes["boxes"][index].update(
-                    {"ymin": int(round(box["ymin"] * height, 0))})
-                boxes["boxes"][index].update(
-                    {"xmax": int(round(box["xmax"] * width, 0))})
-                boxes["boxes"][index].update(
-                    {"ymax": int(round(box["ymax"] * height, 0))})
-
     if warnings > 0:
         message = f"\nThere are {warnings} warnings, " \
                 f"see the log file {logfile_name}\n"

diff --git a/bigmler/processing/args.py b/bigmler/processing/args.py
@@ -312,7 +312,8 @@ def get_api_instance(command_args, storage_path):
         'api_key': command_args.api_key,
         'debug': command_args.debug,
         'storage': retrieve_dir})
-
+    if command_args.debug:
+        print(api_command_args)
     return command_args.api_
 
 

diff --git a/bigmler/resourcesapi/sources.py b/bigmler/resourcesapi/sources.py
@@ -113,7 +113,8 @@ def set_source_args(args, name=None, multi_label_data=None,
                         "delete_sources_": "delete_sources",
                         "sources_": "sources"}
         set_config_attrs(args, exclusive_attrs,
-                         source_args, attr_aliases=attr_aliases, exclusive=True)
+                         source_args, attr_aliases=attr_aliases,
+                         exclusive=True)
 
         row_attrs = ["row_components", "row_indices", "row_values"]
         set_config_attrs(args, row_attrs, source_args)
@@ -132,11 +133,6 @@ def set_source_args(args, name=None, multi_label_data=None,
             fields_struct = fields.new_fields_structure(args.import_fields)
             check_fields_struct(fields_struct, "source")
             update_attributes(source_args, fields_struct)
-        if args.annotations_field:
-            update_attributes(source_args,
-                              {"fields": {"boxes": {
-                                "name": args.annotations_field}}},
-                              fields=fields)
         if 'source' in args.json_args:
             update_json_args(source_args, args.json_args.get('source'), fields)
     return source_args
@@ -243,7 +239,7 @@ def data_to_source(args):
     """
     data_set = None
     data_set_header = None
-    if (args.training_set and not args.source and not
+    if (args.training_set and (not args.source or args.annotations_file) and not
             (hasattr(args, "dataset") and args.dataset) and
             not args.has_models_):
         data_set = args.training_set
@@ -310,12 +306,6 @@ def update_source(source, source_args, args,
         log_message(message, log_file=session_file, console=args.verbosity)
         log_message("%s\n" % source["object"]["resource"], log_file=log)
 
-    if args.annotations_file and args.images_file:
-        source = api.update_composite_annotations(
-            source, args.images_file, args.annotations_file,
-            new_fields=None,
-            source_changes=source_args)
-
     source = api.update_source(source, source_args)
     check_resource_error(source, "Failed to update source: ")
     source = check_resource(source, api.get_source)

diff --git a/bigmler/utils.py b/bigmler/utils.py
@@ -60,7 +60,7 @@
 # resources that can have other resources as components
 COMPOSED_RESOURCES = [
     "cluster", "ensemble", "fusion", "composites", "optiml",
-    "evaluation", "timeseries"]
+    "evaluation", "timeseries", "source"]
 
 
 def read_description(path):

diff --git a/docs/index.rst b/docs/index.rst
@@ -70,7 +70,7 @@ Requirements
 BigMLer needs Python 3.8 or higher versions to work.
 Compatibility with Python 2.X was discontinued in version 3.27.2.
 
-BigMLer requires `bigml 9.8.1 <https://github.com/bigmlcom/python>`_  or
+BigMLer requires `bigml 9.8.3 <https://github.com/bigmlcom/python>`_  or
 higher, that contains the bindings providing support to use the ``BigML``
 platform to create, update, get and delete resources,
 but also to produce local predictions using the

diff --git a/setup.py b/setup.py
@@ -59,14 +59,13 @@
               'bigmler.linearregression', 'bigmler.pca', 'bigmler.fusion',
               'bigmler.dataset', 'bigmler.externalconnector',
               'bigmler.export.out_tree', 'bigmler.source'],
-    install_requires=['bigml>=9.8.1, <10.0.0',
+    install_requires=['bigml>=9.8.3, <10.0.0',
                       'jsonschema>=2.6.0',
                       'nbformat>=4.4.0',
-                      'opencv-python>=4.5.3',
-                      'pillow'],
-    extras_require={"images": "bigml[images]>=9.8.1, <10.0.0",
-                    "topics": "bigml[topics]>=9.8.1, <10.0.0",
-                    "full": "bigml[full]>=9.8.1, <10.0.0"},
+                      'opencv-python>=4.5.3'],
+    extras_require={"images": "bigml[images]>=9.8.3, <10.0.0",
+                    "topics": "bigml[topics]>=9.8.3, <10.0.0",
+                    "full": "bigml[full]>=9.8.3, <10.0.0"},
     package_data={'bigmler':['static/*.json',
                              'static/*.html',
                              'static/out_model/*',