diff --git a/docs/docs/answer-modes/4-bounding-box-detectors.md b/docs/docs/answer-modes/4-bounding-box-detectors.md new file mode 100644 index 00000000..4f40fbaa --- /dev/null +++ b/docs/docs/answer-modes/4-bounding-box-detectors.md @@ -0,0 +1,130 @@ +# Bounding Box Detectors + +Bounding box detectors are used to detect and localize objects in an image by returning bounding boxes around each detected object. + +```python notest +from groundlight import ExperimentalApi +gl = ExperimentalApi() + +# highlight-start +detector = gl.create_bounding_box_detector( + name="dog-detector", + query="Draw a bounding box around each dog in the image", + class_name="dog", + max_num_bboxes=25, + confidence_threshold=0.6, +) +# highlight-end +``` + +Bounding box detectors should be provided with a query that asks the model to identify and localize objects in an image, such as "Draw a bounding box around each dog in the image". + +The `class_name` parameter specifies the type of object to detect, and this label will be assigned to each returned bounding box. + +The `max_num_bboxes` parameter sets the maximum number of bounding boxes that the detector will return (default: 10). If there are more objects in the image than the maximum, the result label will be `GREATER_THAN_MAX`. + +The `confidence_threshold` parameter sets the minimum confidence level required for the ML model's predictions. If the model's confidence falls below this threshold, the query will be sent for human review. + +:::note +Bounding Box Detectors are currently in beta and available through the `ExperimentalApi`. They are available on [Business and Enterprise plans](https://www.groundlight.ai/pricing). +::: + +## Submit an Image Query to a Bounding Box Detector + +Now that you have created a bounding box detector, you can submit an image query to it. + +```python notest +from groundlight import ExperimentalApi +gl = ExperimentalApi() + +detector = gl.get_detector_by_name("dog-detector") + +# highlight-start +# Detect dogs in an image +image_query = gl.submit_image_query(detector, "path/to/image.jpg") +# highlight-end + +print(f"Label: {image_query.result.label}") +print(f"Confidence: {image_query.result.confidence}") +print(f"Bounding Boxes: {image_query.rois}") +``` + +For bounding box detectors, the `label` attribute of the result object will be one of: +- `NO_OBJECTS`: No objects of the specified class were detected in the image +- `BOUNDING_BOX`: Objects were detected and bounding boxes are available in `image_query.rois` +- `GREATER_THAN_MAX`: More objects were detected than the `max_num_bboxes` limit +- `UNCLEAR`: The result was unclear + +The `rois` (regions of interest) attribute contains the list of bounding boxes, each with: +- `geometry`: Bounding box coordinates (`left`, `top`, `right`, `bottom`) as values between 0 and 1 +- `label`: The class name of the detected object +- `score`: Confidence score for this specific object + + + +:::tip Drawing Bounding Boxes +You can visualize the bounding boxes returned by bounding box detectors using a library like OpenCV. Here's an example of how to draw bounding boxes on an image: + +```python notest +import cv2 +import numpy as np + +def draw_bounding_boxes(image_path, rois): + """ + Draw bounding boxes on an image based on ROIs returned from a bounding box detector. + + Args: + image_path: Path to the image file + rois: List of ROI objects returned from image_query.rois + """ + image = cv2.imread(image_path) + if image is None: + raise ValueError(f"Could not read image from {image_path}") + height, width = image.shape[:2] + + # Draw bounding boxes + for roi in rois: + x1 = int(roi.geometry.left * width) + y1 = int(roi.geometry.top * height) + x2 = int(roi.geometry.right * width) + y2 = int(roi.geometry.bottom * height) + cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) + label_text = f"{roi.label}: {roi.score:.2f}" + cv2.putText(image, label_text, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # Display the image + cv2.imshow("Image with Bounding Boxes", image) + cv2.waitKey(0) + cv2.destroyAllWindows() + +# Example usage: +# image_query = gl.submit_image_query(detector, "path/to/image.jpg") +# draw_bounding_boxes("path/to/image.jpg", image_query.rois) +``` +::: + +## Add a Label to a Bounding Box Detector + +The Groundlight API allows you to add labels to image queries, including Region of Interest (ROI) data. +When adding a label to a bounding box detector, you must include the ROIs that correspond to the objects in the image. + +```python notest +from groundlight import ExperimentalApi +gl = ExperimentalApi() + +# highlight-start +# Add a bounding box label with corresponding ROIs to the image query from the previous example. +# ROIs are specified as (left, top) and (right, bottom) coordinates, with values +# between 0 and 1 representing the percentage of the image width and height. +roi1 = gl.create_roi("dog", (0.1, 0.2), (0.3, 0.4)) +roi2 = gl.create_roi("dog", (0.5, 0.3), (0.7, 0.6)) +rois = [roi1, roi2] +gl.add_label(image_query, label="BOUNDING_BOX", rois=rois) +# highlight-end +``` + +Valid label values for bounding box detectors are: +- `"NO_OBJECTS"`: Use when there are no objects of the target class in the image (no ROIs needed) +- `"BOUNDING_BOX"`: Use when objects are present and you are providing ROIs +- `"GREATER_THAN_MAX"`: Use when there are more objects than `max_num_bboxes` +- `"UNCLEAR"`: Use when the image is unclear or the answer cannot be determined diff --git a/docs/docs/answer-modes/answer-modes.md b/docs/docs/answer-modes/answer-modes.md index 48cdb173..e70b6b41 100644 --- a/docs/docs/answer-modes/answer-modes.md +++ b/docs/docs/answer-modes/answer-modes.md @@ -6,6 +6,5 @@ Groundlight offers several detector modalities to suit different computer vision - **[Binary Detectors](1-binary-detectors.md)**: Learn how to create detectors that answer yes/no questions about images. - **[Multiple Choice (Choose One) Detectors](2-multi-choice-detectors.md)**: Create detectors that select one answer from a predefined list of options. - **[Count Detectors](3-counting-detectors.md)**: Use detectors to count the number of objects present in an image - and return bounding boxes around the counted objects. - - - +- **[Bounding Box Detectors](4-bounding-box-detectors.md)**: Detectors to identify and localize objects in an image. + diff --git a/src/groundlight/client.py b/src/groundlight/client.py index f8908a27..669d3a7c 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -1193,7 +1193,7 @@ def add_label( Provide a new label (annotation) for an image query. This is used to provide ground-truth labels for training detectors, or to correct the results of detectors. - **Example usage**:: + **Example usage for binary detectors**:: gl = Groundlight() @@ -1208,12 +1208,15 @@ def add_label( rois = [ROI(x=100, y=100, width=50, height=50)] gl.add_label(image_query, "YES", rois=rois) + Examples for other answer modes can be found in the documentation for each of the modes. + :param image_query: Either an ImageQuery object (returned from methods like `ask_ml`) or an image query ID string starting with "iq_". :param label: The label value to assign, typically "YES" or "NO" for binary classification detectors. For multi-class detectors, use one of - the defined class names. + the defined class names. See answer mode documentation for all + possible label options for all modes. :param rois: Optional list of ROI objects defining regions of interest in the image. Each ROI specifies a bounding box with x, y coordinates diff --git a/test/unit/test_experimental.py b/test/unit/test_experimental.py index 157cfe04..2b8f55f7 100644 --- a/test/unit/test_experimental.py +++ b/test/unit/test_experimental.py @@ -103,12 +103,6 @@ def test_text_recognition_detector(gl_experimental: ExperimentalApi): assert mc_iq.result.text is not None -@pytest.mark.skip( - reason=( - "General users currently currently can't use bounding box detectors. If you have questions, reach out" - " to Groundlight support, or upgrade your plan." - ) -) def test_bounding_box_detector(gl_experimental: ExperimentalApi): """ Verify that we can create and submit to a bounding box detector @@ -123,12 +117,6 @@ def test_bounding_box_detector(gl_experimental: ExperimentalApi): assert bbox_iq.rois is not None -@pytest.mark.skip( - reason=( - "General users currently currently can't use bounding box detectors. If you have questions, reach out" - " to Groundlight support, or upgrade your plan." - ) -) def test_bounding_box_detector_async(gl_experimental: ExperimentalApi): """ Verify that we can create and submit to a bounding box detector with ask_async diff --git a/test/unit/test_labels.py b/test/unit/test_labels.py index 770a379e..6851a8c5 100644 --- a/test/unit/test_labels.py +++ b/test/unit/test_labels.py @@ -66,6 +66,20 @@ def test_multiclass_labels(gl_experimental: ExperimentalApi): gl_experimental.add_label(iq1, "MAYBE") +def test_bounding_box_labels(gl_experimental: ExperimentalApi): + name = f"Test bounding box labels{datetime.utcnow()}" + det = gl_experimental.create_bounding_box_detector(name, "test_query", "test_class") + iq1 = gl_experimental.submit_image_query(det, "test/assets/cat.jpeg") + gl_experimental.add_label(iq1, "NO_OBJECTS") + iq1 = gl_experimental.get_image_query(iq1.id) + assert iq1.result.label == "NO_OBJECTS" + gl_experimental.add_label(iq1, "BOUNDING_BOX", rois=[ROI(x=0.1, y=0.1, width=0.5, height=0.5)]) + iq1 = gl_experimental.get_image_query(iq1.id) + assert iq1.result.label == "BOUNDING_BOX" + with pytest.raises(ApiException) as _: + gl_experimental.add_label(iq1, "MAYBE") + + def test_text_recognition_labels(gl_experimental: ExperimentalApi): name = f"Test text recognition labels{datetime.utcnow()}" det = gl_experimental.create_text_recognition_detector(name, "test_query")