diff --git a/docs/docs/03-hooks/02-computer-vision/useClassification.md b/docs/docs/03-hooks/02-computer-vision/useClassification.md index 9c70a04d2..f57b7b254 100644 --- a/docs/docs/03-hooks/02-computer-vision/useClassification.md +++ b/docs/docs/03-hooks/02-computer-vision/useClassification.md @@ -52,7 +52,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ClassificationType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The function returns a promise, which can resolve either to an error or an object containing categories with their probabilities. +To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ClassificationType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The function returns a promise, which can resolve either to an error or an object containing categories with their probabilities. :::info Images from external sources are stored in your application's temporary directory. diff --git a/docs/docs/03-hooks/02-computer-vision/useImageEmbeddings.md b/docs/docs/03-hooks/02-computer-vision/useImageEmbeddings.md index c85e9c67e..caef87cdf 100644 --- a/docs/docs/03-hooks/02-computer-vision/useImageEmbeddings.md +++ b/docs/docs/03-hooks/02-computer-vision/useImageEmbeddings.md @@ -63,7 +63,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ImageEmbeddingsType.md#forward) method. It accepts one argument which is a URI/URL to an image you want to encode. The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding. +To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ImageEmbeddingsType.md#forward) method. It accepts one argument which is a URI/URL to an image you want to encode or base64 (whole URI or only raw base64). The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding. ## Example diff --git a/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md b/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md index edcb6de05..390bb96c2 100644 --- a/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md +++ b/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md @@ -51,7 +51,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ImageSegmentationType.md#forward) method. It accepts three arguments: a required image, an optional list of classes, and an optional flag whether to resize the output to the original dimensions. +To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ImageSegmentationType.md#forward) method. It accepts three arguments: a required image - can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64), an optional list of classes, and an optional flag whether to resize the output to the original dimensions. - The image can be a remote URL, a local file URI, or a base64-encoded image. - The [`classesOfInterest`](../../06-api-reference/interfaces/ImageSegmentationType.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes. diff --git a/docs/docs/03-hooks/02-computer-vision/useOCR.md b/docs/docs/03-hooks/02-computer-vision/useOCR.md index e7fa60d07..76e7ad695 100644 --- a/docs/docs/03-hooks/02-computer-vision/useOCR.md +++ b/docs/docs/03-hooks/02-computer-vision/useOCR.md @@ -50,7 +50,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/interfaces/OCRType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The function returns an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the text recognized within the box, and the confidence score. For more information, please refer to the reference or type definitions. +To run the model, you can use the [`forward`](../../06-api-reference/interfaces/OCRType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The function returns an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the text recognized within the box, and the confidence score. For more information, please refer to the reference or type definitions. ## Detection object diff --git a/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md b/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md index 796caf0be..4dc96eafb 100644 --- a/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md +++ b/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md @@ -54,7 +54,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ObjectDetectionType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The function returns an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. For more information, please refer to the reference or type definitions. +To run the model, you can use the [`forward`](../../06-api-reference/interfaces/ObjectDetectionType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The function returns an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. For more information, please refer to the reference or type definitions. ## Detection object diff --git a/docs/docs/03-hooks/02-computer-vision/useStyleTransfer.md b/docs/docs/03-hooks/02-computer-vision/useStyleTransfer.md index 10c9a11b3..471bde35e 100644 --- a/docs/docs/03-hooks/02-computer-vision/useStyleTransfer.md +++ b/docs/docs/03-hooks/02-computer-vision/useStyleTransfer.md @@ -51,7 +51,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use [`forward`](../../06-api-reference/interfaces/StyleTransferType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The function returns a promise which can resolve either to an error or a URL to generated image. +To run the model, you can use [`forward`](../../06-api-reference/interfaces/StyleTransferType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The function returns a promise which can resolve either to an error or a URL to generated image. :::info Images from external sources and the generated image are stored in your application's temporary directory. diff --git a/docs/docs/03-hooks/02-computer-vision/useVerticalOCR.md b/docs/docs/03-hooks/02-computer-vision/useVerticalOCR.md index 19f1eb344..b9d29fc42 100644 --- a/docs/docs/03-hooks/02-computer-vision/useVerticalOCR.md +++ b/docs/docs/03-hooks/02-computer-vision/useVerticalOCR.md @@ -58,7 +58,7 @@ You need more details? Check the following resources: ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/interfaces/OCRType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The function returns an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the text recognized within the box, and the confidence score. For more information, please refer to the reference or type definitions. +To run the model, you can use the [`forward`](../../06-api-reference/interfaces/OCRType.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The function returns an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the text recognized within the box, and the confidence score. For more information, please refer to the reference or type definitions. ## Detection object diff --git a/docs/docs/04-typescript-api/02-computer-vision/ClassificationModule.md b/docs/docs/04-typescript-api/02-computer-vision/ClassificationModule.md index 6aa8a0430..df94656e7 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/ClassificationModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/ClassificationModule.md @@ -48,7 +48,7 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/classes/ClassificationModule.md#forward) method on the module object. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The method returns a promise, which can resolve either to an error or an object containing categories with their probabilities. +To run the model, you can use the [`forward`](../../06-api-reference/classes/ClassificationModule.md#forward) method on the module object. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The method returns a promise, which can resolve either to an error or an object containing categories with their probabilities. ## Managing memory diff --git a/docs/docs/04-typescript-api/02-computer-vision/ImageEmbeddingsModule.md b/docs/docs/04-typescript-api/02-computer-vision/ImageEmbeddingsModule.md index 4700222f7..8c6691754 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/ImageEmbeddingsModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/ImageEmbeddingsModule.md @@ -48,4 +48,4 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -[`forward`](../../06-api-reference/classes/ImageEmbeddingsModule.md#forward) accepts one argument, which is a URI/URL to an image you want to encode. The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding. +[`forward`](../../06-api-reference/classes/ImageEmbeddingsModule.md#forward) accepts one argument: image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding. diff --git a/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md b/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md index 373da8b9d..6a16694a7 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md @@ -48,7 +48,7 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/classes/ImageSegmentationModule.md#forward) method on the module object. It accepts three arguments: a required image, an optional list of classes, and an optional flag whether to resize the output to the original dimensions. +To run the model, you can use the [`forward`](../../06-api-reference/classes/ImageSegmentationModule.md#forward) method on the module object. It accepts three arguments: a required image - can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64), an optional list of classes, and an optional flag whether to resize the output to the original dimensions. - The image can be a remote URL, a local file URI, or a base64-encoded image. - The [`classesOfInterest`](../../06-api-reference/classes/ImageSegmentationModule.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes. diff --git a/docs/docs/04-typescript-api/02-computer-vision/OCRModule.md b/docs/docs/04-typescript-api/02-computer-vision/OCRModule.md index 306ea67da..cfcc14a05 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/OCRModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/OCRModule.md @@ -46,4 +46,4 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/classes/OCRModule.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The method returns a promise, which can resolve either to an error or an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. +To run the model, you can use the [`forward`](../../06-api-reference/classes/OCRModule.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The method returns a promise, which can resolve either to an error or an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. diff --git a/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md b/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md index 05f039174..1fa95b1ba 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md @@ -48,7 +48,7 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/classes/ObjectDetectionModule.md#forward) method on the module object. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The method returns a promise, which can resolve either to an error or an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. +To run the model, you can use the [`forward`](../../06-api-reference/classes/ObjectDetectionModule.md#forward) method on the module object. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The method returns a promise, which can resolve either to an error or an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. ## Managing memory diff --git a/docs/docs/04-typescript-api/02-computer-vision/StyleTransferModule.md b/docs/docs/04-typescript-api/02-computer-vision/StyleTransferModule.md index abce81bf7..3f26a44bb 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/StyleTransferModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/StyleTransferModule.md @@ -48,7 +48,7 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/classes/StyleTransferModule.md#forward) method on the module object. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The method returns a promise, which can resolve either to an error or a URL to generated image. +To run the model, you can use the [`forward`](../../06-api-reference/classes/StyleTransferModule.md#forward) method on the module object. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The method returns a promise, which can resolve either to an error or a URL to generated image. ## Managing memory diff --git a/docs/docs/04-typescript-api/02-computer-vision/VerticalOCRModule.md b/docs/docs/04-typescript-api/02-computer-vision/VerticalOCRModule.md index 3cd750740..94c222518 100644 --- a/docs/docs/04-typescript-api/02-computer-vision/VerticalOCRModule.md +++ b/docs/docs/04-typescript-api/02-computer-vision/VerticalOCRModule.md @@ -49,4 +49,4 @@ For more information on loading resources, take a look at [loading models](../.. ## Running the model -To run the model, you can use the [`forward`](../../06-api-reference/classes/VerticalOCRModule.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image. The method returns a promise, which can resolve either to an error or an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. +To run the model, you can use the [`forward`](../../06-api-reference/classes/VerticalOCRModule.md#forward) method. It accepts one argument, which is the image. The image can be a remote URL, a local file URI, or a base64-encoded image (whole URI or only raw base64). The method returns a promise, which can resolve either to an error or an array of [`OCRDetection`](../../06-api-reference/interfaces/OCRDetection.md) objects. Each object contains coordinates of the bounding box, the label of the detected object, and the confidence score. diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp index ab28d1263..bdf3f97cb 100644 --- a/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp @@ -86,7 +86,7 @@ cv::Mat readImage(const std::string &imageURI) { while (std::getline(uriStream, stringData, ',')) { ++segmentIndex; } - if (segmentIndex != 1) { + if (segmentIndex != 2) { throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, "Read image error: invalid base64 URI"); } @@ -104,8 +104,10 @@ cv::Mat readImage(const std::string &imageURI) { cv::Mat(1, imageData.size(), CV_8UC1, (void *)imageData.data()), cv::IMREAD_COLOR); } else { - throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, - "Read image error: unknown protocol"); + // fallback to raw base64 content + auto data = base64_decode(imageURI); + cv::Mat encodedData(1, data.size(), CV_8UC1, (void *)data.data()); + image = cv::imdecode(encodedData, cv::IMREAD_COLOR); } if (image.empty()) { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index b0c99586a..e2a8c16bf 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -136,6 +136,13 @@ endfunction() add_rn_test(NumericalTests unit/NumericalTest.cpp) add_rn_test(LogTests unit/LogTest.cpp) +add_rn_test(FileUtilsTest unit/FileUtilsTest.cpp) +add_rn_test(ImageProcessingTest unit/ImageProcessingTest.cpp + SOURCES + ${IMAGE_UTILS_SOURCES} + LIBS opencv_deps +) + add_rn_test(BaseModelTests integration/BaseModelTest.cpp) add_rn_test(ClassificationTests integration/ClassificationTest.cpp diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh b/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh index 6f2de5228..360aa9d11 100755 --- a/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh +++ b/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh @@ -17,6 +17,8 @@ MODELS_DIR="$SCRIPT_DIR/integration/assets/models" TEST_EXECUTABLES=( "NumericalTests" "LogTests" + "FileUtilsTest" + "ImageProcessingTest" "BaseModelTests" "ClassificationTests" "ObjectDetectionTests" diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/unit/ImageProcessingTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/unit/ImageProcessingTest.cpp new file mode 100644 index 000000000..d8a5a2a7f --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/tests/unit/ImageProcessingTest.cpp @@ -0,0 +1,63 @@ +#include "../data_processing/ImageProcessing.h" +#include +#include +#include + +#include +#include + +const std::string RAW_BASE64_JPEG = + "/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAP////////////////////////////////////////" + "//////////////////////////////////////////////2wBDAf//////////////////////" + "////////////////////////////////////////////////////////////////" + "wAARCAABAAEDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAj/" + "xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFAEBAQAAAAAAAAAAAAAAAAAAAAH/" + "xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwC/gAf/2Q=="; + +namespace rnexecutorch::image_processing { +TEST(ReadImageTest, WorksWithWholeUriBase64) { + std::string dataUri = "data:image/jpeg;base64," + RAW_BASE64_JPEG; + + cv::Mat image; + ASSERT_NO_THROW({ image = readImage(dataUri); }); + + EXPECT_FALSE(image.empty()); + EXPECT_EQ(image.rows, 1); + EXPECT_EQ(image.cols, 1); +} + +TEST(ReadImageTest, WorksWithRawBase64Content) { + cv::Mat image; + ASSERT_NO_THROW({ image = readImage(RAW_BASE64_JPEG); }); + + EXPECT_FALSE(image.empty()); + EXPECT_EQ(image.rows, 1); + EXPECT_EQ(image.cols, 1); +} + +TEST(ReadImageTest, FailsForInvalidBase64UriFormat) { + std::string invalidUri = + "data:image/jpeg;base64,extra,comma," + RAW_BASE64_JPEG; + + EXPECT_THROW({ readImage(invalidUri); }, RnExecutorchError); + + try { + readImage(invalidUri); + } catch (const RnExecutorchError &e) { + EXPECT_EQ(e.getNumericCode(), + static_cast(RnExecutorchErrorCode::FileReadFailed)); + } +} + +TEST(ReadImageTest, FailsForInvalidBase64Data) { + std::string badDataUri = "data:image/jpeg;base64,NOT_A_VALID_IMAGE_DATA"; + + EXPECT_THROW({ readImage(badDataUri); }, RnExecutorchError); + + try { + readImage(badDataUri); + } catch (const RnExecutorchError &e) { + EXPECT_STREQ(e.what(), "Read image error: invalid argument"); + } +} +} // namespace rnexecutorch::image_processing \ No newline at end of file