diff --git a/benches/benchmark.rs b/benches/benchmark.rs index d61abad..254b4d0 100644 --- a/benches/benchmark.rs +++ b/benches/benchmark.rs @@ -1,7 +1,7 @@ use std::{path::Path, time::Duration}; use criterion::{Criterion, criterion_group, criterion_main}; -use oneocr_rs::OneOcrError; +use oneocr_rs::{ImageInput, OcrOptions, OneOcrError}; pub fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("ocr_bench_group"); @@ -20,7 +20,7 @@ pub fn ocr_simple() -> Result<(), OneOcrError> { // Perform OCR on an image let image_path = Path::new("./assets/sample.jpg"); - let _ocr_result = ocr_engine.run(image_path, false)?; + let _ocr_result = ocr_engine.run(ImageInput::FilePath(image_path.to_path_buf()))?; Ok(()) } @@ -28,11 +28,15 @@ pub fn ocr_simple() -> Result<(), OneOcrError> { #[inline] pub fn ocr_advance() -> Result<(), OneOcrError> { // Create a new OCR instance - let ocr_engine = oneocr_rs::OcrEngine::new()?; + let ocr_options = OcrOptions { + include_word_level_details: true, + ..Default::default() + }; + let ocr_engine = oneocr_rs::OcrEngine::new_with_options(ocr_options)?; // Perform OCR on an image let image_path = Path::new("./assets/sample.jpg"); - let _ocr_result = ocr_engine.run(image_path, true)?; + let _ocr_result = ocr_engine.run(ImageInput::FilePath(image_path.to_path_buf()))?; Ok(()) } diff --git a/examples/bbox_draw.rs b/examples/bbox_draw.rs index 562d961..91b5ada 100644 --- a/examples/bbox_draw.rs +++ b/examples/bbox_draw.rs @@ -1,6 +1,6 @@ use image::Rgba; use imageproc::drawing::draw_line_segment_mut; -use oneocr_rs::{OcrEngine, OneOcrError}; +use oneocr_rs::{OcrEngine, OcrOptions, OneOcrError}; use std::path::Path; // cargo run --example bbox_draw -- "/path/to/input/image.jpg" "/path/to/draw_output.jpg" @@ -18,14 +18,17 @@ fn main() -> Result<(), OneOcrError> { let output_image_path = Path::new(&output_image_path); // Create a new OCR instance - let ocr_engine = OcrEngine::new()?; + let options = OcrOptions { + include_word_level_details: true, + ..Default::default() + }; + let ocr_engine = OcrEngine::new_with_options(options)?; // Set to the max recognition line count possible. ocr_engine.set_max_recognition_line_count(1000)?; // Perform OCR on an image - let include_word_level_detail = true; - let ocr_result = ocr_engine.run(input_image_path, include_word_level_detail)?; + let ocr_result = ocr_engine.run(input_image_path.into())?; // Load the image let mut img = image::open(input_image_path)?; diff --git a/examples/ocr_advance.rs b/examples/ocr_advance.rs index 1602b5f..7608e3a 100644 --- a/examples/ocr_advance.rs +++ b/examples/ocr_advance.rs @@ -1,4 +1,4 @@ -use oneocr_rs::{OcrEngine, OneOcrError}; +use oneocr_rs::{OcrEngine, OcrOptions, OneOcrError}; use std::path::Path; // cargo run --example ocr_advance -- "/path/to/input/image.png" @@ -11,14 +11,17 @@ fn main() -> Result<(), OneOcrError> { let image_path = Path::new(&input_image_path); // Create a new OCR instance - let ocr_engine = OcrEngine::new()?; + let ocr_options = OcrOptions { + include_word_level_details: true, + ..Default::default() + }; + let ocr_engine = OcrEngine::new_with_options(ocr_options)?; // Set to the max recognition line count possible. ocr_engine.set_max_recognition_line_count(1000)?; // Perform OCR on an image - let include_word_level_detail = true; - let ocr_result = ocr_engine.run(image_path, include_word_level_detail)?; + let ocr_result = ocr_engine.run(image_path.into())?; // Print the OCR result println!("Image angle: {:.2}", ocr_result.image_angle); @@ -30,8 +33,7 @@ fn main() -> Result<(), OneOcrError> { let (handwritten, confidence) = line.get_line_style()?; println!( - "Line style: handwritten: {}, handwritten style confidence: {}", - handwritten, confidence + "Line style: handwritten: {handwritten}, handwritten style confidence: {confidence}" ); if let Some(words) = &line.words { diff --git a/examples/ocr_simple.rs b/examples/ocr_simple.rs index 506ec4b..882a6c2 100644 --- a/examples/ocr_simple.rs +++ b/examples/ocr_simple.rs @@ -15,7 +15,7 @@ fn main() -> Result<(), OneOcrError> { let ocr_engine = OcrEngine::new()?; // Perform OCR on an image - let ocr_result = ocr_engine.run(image_path, false)?; + let ocr_result = ocr_engine.run(image_path.into())?; // Print the OCR lines. for line in &ocr_result.lines { diff --git a/src/bounding_box.rs b/src/bounding_box.rs index e2861f0..1a7f9bf 100644 --- a/src/bounding_box.rs +++ b/src/bounding_box.rs @@ -52,7 +52,7 @@ impl std::fmt::Display for BoundingBox { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "[ ⌜ {}, ⌝ {}, ⌟ {}, ⌞ {}", + "[ ⌜ {}, ⌝ {}, ⌟ {}, ⌞ {} ]", self.top_left, self.top_right, self.bottom_right, self.bottom_left, ) } diff --git a/src/errors.rs b/src/errors.rs index 8766b63..db7874b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,4 +1,4 @@ -// Define a custom error type named OneOcrError using thiserror crate for better error handling +/// Custom error type for OneOcr operations. #[derive(Debug, thiserror::Error)] pub enum OneOcrError { #[error("Failed to open image: {0}")] diff --git a/src/ffi.rs b/src/ffi.rs index 3bb6088..36fa5ac 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -2,7 +2,7 @@ use std::ffi::{c_char, c_void}; use windows_link::link; link!("oneocr.dll" "system" fn CreateOcrInitOptions(init_option: *mut *mut c_void) -> i32); -link!("oneocr.dll" "system" fn OcrInitOptionsSetUseModelDelayLoad(init_option: *mut c_void) -> i32); +link!("oneocr.dll" "system" fn OcrInitOptionsSetUseModelDelayLoad(init_option: *mut c_void, delay_load: u8) -> i32); link!("oneocr.dll" "system" fn CreateOcrPipeline( model_path: *const c_char, key: *const c_char, diff --git a/src/image_input.rs b/src/image_input.rs new file mode 100644 index 0000000..94efefb --- /dev/null +++ b/src/image_input.rs @@ -0,0 +1,39 @@ +use std::path::{Path, PathBuf}; + +use image::{DynamicImage, ImageBuffer, Rgba}; + +/// Input source for OCR processing. +#[derive(Debug)] +pub enum ImageInput { + /// Process an image from a file path. + FilePath(PathBuf), + /// Process an image from an in-memory buffer. + /// The buffer should contain RGBA pixel data. + Buffer(ImageBuffer, Vec>), + /// Process a dynamic image. + Dynamic(DynamicImage), +} + +impl From<&Path> for ImageInput { + fn from(path: &Path) -> Self { + ImageInput::FilePath(path.to_path_buf()) + } +} + +impl From for ImageInput { + fn from(path: PathBuf) -> Self { + ImageInput::FilePath(path) + } +} + +impl From, Vec>> for ImageInput { + fn from(buffer: ImageBuffer, Vec>) -> Self { + ImageInput::Buffer(buffer) + } +} + +impl From for ImageInput { + fn from(image: DynamicImage) -> Self { + ImageInput::Dynamic(image) + } +} diff --git a/src/lib.rs b/src/lib.rs index 5b82804..8c85d9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,10 @@ mod bounding_box; mod errors; mod ffi; +mod image_input; mod ocr_engine; mod ocr_line; +mod ocr_options; mod ocr_result; mod ocr_word; @@ -10,8 +12,10 @@ mod ocr_word; pub use bounding_box::BoundingBox; pub use bounding_box::Point; pub use errors::OneOcrError; +pub use image_input::ImageInput; pub use ocr_engine::OcrEngine; pub use ocr_line::OcrLine; +pub use ocr_options::{OcrOptions, Resolution}; pub use ocr_result::OcrResult; pub use ocr_word::OcrWord; diff --git a/src/ocr_engine.rs b/src/ocr_engine.rs index 2927f43..f557ca9 100644 --- a/src/ocr_engine.rs +++ b/src/ocr_engine.rs @@ -7,10 +7,9 @@ use crate::ffi::{ ReleaseOcrProcessOptions, RunOcrPipeline, }; use crate::ocr_result::OcrResult; -use crate::{ONE_OCR_MODEL_FILE_NAME, ONE_OCR_MODEL_KEY}; -use image::DynamicImage; +use crate::{ImageInput, ONE_OCR_MODEL_FILE_NAME, ONE_OCR_MODEL_KEY, OcrOptions}; +use image::{DynamicImage, ImageBuffer, Rgba}; use std::ffi::{CString, c_void}; -use std::path::Path; use std::ptr; // Macros @@ -22,33 +21,32 @@ pub struct OcrEngine { init_options: *mut c_void, pipeline: *mut c_void, process_options: *mut c_void, + ocr_options: OcrOptions, } impl OcrEngine { - /// Creates a new instance of the OCR engine. - /// This function loads the necessary library and initializes the OCR pipeline. - pub fn new() -> Result { + /// Creates a new instance of the OCR engine with specified options. + /// This function loads the necessary library and initializes the OCR pipeline with the provided options. + pub fn new_with_options(ocr_options: OcrOptions) -> Result { let mut init_options: *mut c_void = ptr::null_mut(); check_ocr_call!( unsafe { CreateOcrInitOptions(&mut init_options) }, "Failed to create init options" ); + // Disable model delay load check_ocr_call!( - unsafe { OcrInitOptionsSetUseModelDelayLoad(init_options) }, + unsafe { OcrInitOptionsSetUseModelDelayLoad(init_options, 0) }, "Failed to set model delay load" ); let model_path = Self::get_model_path()?; let model_path_cstr = CString::new(model_path).map_err(|e| { - OneOcrError::ModelFileLoadError(format!( - "Failed to convert model path to CString: {}", - e - )) + OneOcrError::ModelFileLoadError(format!("Failed to convert model path to CString: {e}")) })?; let key_cstr = CString::new(ONE_OCR_MODEL_KEY).map_err(|e| { - OneOcrError::InvalidModelKey(format!("Failed to convert model key to CString: {}", e)) + OneOcrError::InvalidModelKey(format!("Failed to convert model key to CString: {e}")) })?; let mut pipeline: *mut c_void = ptr::null_mut(); @@ -70,13 +68,41 @@ impl OcrEngine { "Failed to create OCR process options" ); + check_ocr_call!( + unsafe { + OcrProcessOptionsSetMaxRecognitionLineCount( + process_options, + ocr_options.max_recognition_line_count, + ) + }, + "Failed to set max recognition line count" + ); + + check_ocr_call!( + unsafe { + OcrProcessOptionsSetResizeResolution( + process_options, + ocr_options.resize_resolution.width, + ocr_options.resize_resolution.height, + ) + }, + "Failed to set resize resolution" + ); + Ok(Self { init_options, pipeline, process_options, + ocr_options, }) } + /// Creates a new instance of the OCR engine with default options. + /// This function loads the necessary library and initializes the OCR pipeline. + pub fn new() -> Result { + Self::new_with_options(OcrOptions::default()) + } + /// Retrieves the maximum number of lines that can be recognized. /// Default is 100. pub fn get_max_recognition_line_count(&self) -> Result { @@ -132,28 +158,72 @@ impl OcrEngine { Ok(()) } - /// Run the OCR pipeline on the given image path. - pub fn run( + /// Run OCR processing on an image. + /// + /// This method accepts various input types through the `ImageInput` enum + /// and allows configuration through `OcrOptions`. + /// + /// # Arguments + /// + /// * `input` - The image input source (file path, image buffer, or dynamic image) + /// + /// # Returns + /// + /// Returns an `OcrResult` containing the recognized text and associated metadata, + /// or an error if the OCR processing fails. + /// + /// # Examples + /// + /// ```no_run + /// use oneocr_rs::{OcrEngine, OcrOptions, ImageInput}; + /// use std::path::Path; + /// let engine = OcrEngine::new().unwrap(); + /// + /// // Process from file path + /// let result = engine.run(Path::new("image.jpg").into()).unwrap(); + /// ``` + /// + /// ```ignore + /// // Process from in-memory image buffer + /// let img_buffer: ImageBuffer, Vec> = capture_screenshot(); // Your screenshot function + /// let result = engine.run(img_buffer.into()).unwrap(); + /// ``` + pub fn run(&self, input: ImageInput) -> Result { + let img_rgba = self.load_image(input)?; + self.run_ocr_on_rgba_image(&img_rgba, self.ocr_options.include_word_level_details) + } + + /// Loads an image from various input sources and converts it to RGBA format. + fn load_image(&self, input: ImageInput) -> Result, Vec>, OneOcrError> { + match input { + ImageInput::FilePath(path) => { + let img = image::open(path)?; + Ok(self.convert_to_rgba(img)) + } + ImageInput::Buffer(buffer) => Ok(buffer), + ImageInput::Dynamic(img) => Ok(self.convert_to_rgba(img)), + } + } + + /// Converts a DynamicImage to RGBA format. + fn convert_to_rgba(&self, img: DynamicImage) -> ImageBuffer, Vec> { + match img { + DynamicImage::ImageRgba8(i) => i, + _ => img.to_rgba8(), + } + } + + /// Performs OCR on an RGBA image buffer. + fn run_ocr_on_rgba_image( &self, - image_path: &Path, + img_rgba: &ImageBuffer, Vec>, word_level_detail: bool, ) -> Result { - let img = image::open(Path::new(image_path))?; - let img_rgba = match img { - DynamicImage::ImageRgba8(i) => i, - DynamicImage::ImageRgb8(i) => DynamicImage::ImageRgb8(i).to_rgba8(), - _ => { - return Err(OneOcrError::ImageFormatError(format!( - "Unsupported image format: {:?}", - img - ))); - } - }; let (rows, cols) = (img_rgba.height() as i32, img_rgba.width() as i32); let step = (img_rgba.sample_layout().height_stride) as i64; let data_ptr = img_rgba.as_ptr() as i64; let image = RawImage { - t: 3, // Assuming 3 means RGBA or a type the C API expects + t: 3, // RGBA format identifier expected by the C API col: cols, row: rows, _unk: 0, @@ -173,7 +243,7 @@ impl OcrEngine { /// Retrieves the path to the model file. fn get_model_path() -> Result { let exe_path = std::env::current_exe().map_err(|e| { - OneOcrError::ModelFileLoadError(format!("Failed to get current executable path: {}", e)) + OneOcrError::ModelFileLoadError(format!("Failed to get current executable path: {e}")) })?; let model_path_buf = exe_path .parent() diff --git a/src/ocr_options.rs b/src/ocr_options.rs new file mode 100644 index 0000000..a982b0b --- /dev/null +++ b/src/ocr_options.rs @@ -0,0 +1,40 @@ +/// A simple width×height pair. +#[derive(Debug, Clone, Copy)] +pub struct Resolution { + pub width: i32, + pub height: i32, +} + +/// Configuration for OCR processing behavior. +#[derive(Debug, Clone)] +pub struct OcrOptions { + /// The maximum number of lines that can be recognized. + /// Default is 100, range is 0-1000. + pub max_recognition_line_count: i32, + + /// The maximum internal resize resolution (width, height). + /// + /// The `resize resolution` defines the maximum dimensions to which an image will be automatically scaled internally before OCR processing. + /// It’s a performance and accuracy trade-off rather than a restriction on the original image’s resolution. + /// + /// The default and maximum resolution is (1152, 768). + pub resize_resolution: Resolution, + + /// Whether to include word-level details in the result. + /// If `true`, the result will contain bounding boxes and confidence scores for individual words. + /// If `false`, only line-level information will be available. + pub include_word_level_details: bool, +} + +impl Default for OcrOptions { + fn default() -> Self { + OcrOptions { + max_recognition_line_count: 100, + resize_resolution: Resolution { + width: 1152, + height: 768, + }, + include_word_level_details: false, + } + } +}