Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{path::Path, time::Duration};

use criterion::{Criterion, criterion_group, criterion_main};
use oneocr_rs::OneOcrError;
use oneocr_rs::{ImageInput, OcrOptions, OneOcrError};

pub fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("ocr_bench_group");
Expand All @@ -20,19 +20,23 @@ pub fn ocr_simple() -> Result<(), OneOcrError> {

// Perform OCR on an image
let image_path = Path::new("./assets/sample.jpg");
let _ocr_result = ocr_engine.run(image_path, false)?;
let _ocr_result = ocr_engine.run(ImageInput::FilePath(image_path.to_path_buf()))?;

Ok(())
}

#[inline]
pub fn ocr_advance() -> Result<(), OneOcrError> {
// Create a new OCR instance
let ocr_engine = oneocr_rs::OcrEngine::new()?;
let ocr_options = OcrOptions {
include_word_level_details: true,
..Default::default()
};
let ocr_engine = oneocr_rs::OcrEngine::new_with_options(ocr_options)?;

// Perform OCR on an image
let image_path = Path::new("./assets/sample.jpg");
let _ocr_result = ocr_engine.run(image_path, true)?;
let _ocr_result = ocr_engine.run(ImageInput::FilePath(image_path.to_path_buf()))?;

Ok(())
}
Expand Down
11 changes: 7 additions & 4 deletions examples/bbox_draw.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use image::Rgba;
use imageproc::drawing::draw_line_segment_mut;
use oneocr_rs::{OcrEngine, OneOcrError};
use oneocr_rs::{OcrEngine, OcrOptions, OneOcrError};
use std::path::Path;

// cargo run --example bbox_draw -- "/path/to/input/image.jpg" "/path/to/draw_output.jpg"
Expand All @@ -18,14 +18,17 @@ fn main() -> Result<(), OneOcrError> {
let output_image_path = Path::new(&output_image_path);

// Create a new OCR instance
let ocr_engine = OcrEngine::new()?;
let options = OcrOptions {
include_word_level_details: true,
..Default::default()
};
let ocr_engine = OcrEngine::new_with_options(options)?;

// Set to the max recognition line count possible.
ocr_engine.set_max_recognition_line_count(1000)?;

// Perform OCR on an image
let include_word_level_detail = true;
let ocr_result = ocr_engine.run(input_image_path, include_word_level_detail)?;
let ocr_result = ocr_engine.run(input_image_path.into())?;

// Load the image
let mut img = image::open(input_image_path)?;
Expand Down
14 changes: 8 additions & 6 deletions examples/ocr_advance.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use oneocr_rs::{OcrEngine, OneOcrError};
use oneocr_rs::{OcrEngine, OcrOptions, OneOcrError};
use std::path::Path;

// cargo run --example ocr_advance -- "/path/to/input/image.png"
Expand All @@ -11,14 +11,17 @@ fn main() -> Result<(), OneOcrError> {
let image_path = Path::new(&input_image_path);

// Create a new OCR instance
let ocr_engine = OcrEngine::new()?;
let ocr_options = OcrOptions {
include_word_level_details: true,
..Default::default()
};
let ocr_engine = OcrEngine::new_with_options(ocr_options)?;

// Set to the max recognition line count possible.
ocr_engine.set_max_recognition_line_count(1000)?;

// Perform OCR on an image
let include_word_level_detail = true;
let ocr_result = ocr_engine.run(image_path, include_word_level_detail)?;
let ocr_result = ocr_engine.run(image_path.into())?;

// Print the OCR result
println!("Image angle: {:.2}", ocr_result.image_angle);
Expand All @@ -30,8 +33,7 @@ fn main() -> Result<(), OneOcrError> {

let (handwritten, confidence) = line.get_line_style()?;
println!(
"Line style: handwritten: {}, handwritten style confidence: {}",
handwritten, confidence
"Line style: handwritten: {handwritten}, handwritten style confidence: {confidence}"
);

if let Some(words) = &line.words {
Expand Down
2 changes: 1 addition & 1 deletion examples/ocr_simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn main() -> Result<(), OneOcrError> {
let ocr_engine = OcrEngine::new()?;

// Perform OCR on an image
let ocr_result = ocr_engine.run(image_path, false)?;
let ocr_result = ocr_engine.run(image_path.into())?;

// Print the OCR lines.
for line in &ocr_result.lines {
Expand Down
2 changes: 1 addition & 1 deletion src/bounding_box.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl std::fmt::Display for BoundingBox {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[ ⌜ {}, ⌝ {}, ⌟ {}, ⌞ {}",
"[ ⌜ {}, ⌝ {}, ⌟ {}, ⌞ {} ]",
self.top_left, self.top_right, self.bottom_right, self.bottom_left,
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Define a custom error type named OneOcrError using thiserror crate for better error handling
/// Custom error type for OneOcr operations.
#[derive(Debug, thiserror::Error)]
pub enum OneOcrError {
#[error("Failed to open image: {0}")]
Expand Down
2 changes: 1 addition & 1 deletion src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::ffi::{c_char, c_void};
use windows_link::link;

link!("oneocr.dll" "system" fn CreateOcrInitOptions(init_option: *mut *mut c_void) -> i32);
link!("oneocr.dll" "system" fn OcrInitOptionsSetUseModelDelayLoad(init_option: *mut c_void) -> i32);
link!("oneocr.dll" "system" fn OcrInitOptionsSetUseModelDelayLoad(init_option: *mut c_void, delay_load: u8) -> i32);
link!("oneocr.dll" "system" fn CreateOcrPipeline(
model_path: *const c_char,
key: *const c_char,
Expand Down
39 changes: 39 additions & 0 deletions src/image_input.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use std::path::{Path, PathBuf};

use image::{DynamicImage, ImageBuffer, Rgba};

/// Input source for OCR processing.
#[derive(Debug)]
pub enum ImageInput {
/// Process an image from a file path.
FilePath(PathBuf),
/// Process an image from an in-memory buffer.
/// The buffer should contain RGBA pixel data.
Buffer(ImageBuffer<Rgba<u8>, Vec<u8>>),
/// Process a dynamic image.
Dynamic(DynamicImage),
}

impl From<&Path> for ImageInput {
fn from(path: &Path) -> Self {
ImageInput::FilePath(path.to_path_buf())
}
}

impl From<PathBuf> for ImageInput {
fn from(path: PathBuf) -> Self {
ImageInput::FilePath(path)
}
}

impl From<ImageBuffer<Rgba<u8>, Vec<u8>>> for ImageInput {
fn from(buffer: ImageBuffer<Rgba<u8>, Vec<u8>>) -> Self {
ImageInput::Buffer(buffer)
}
}

impl From<DynamicImage> for ImageInput {
fn from(image: DynamicImage) -> Self {
ImageInput::Dynamic(image)
}
}
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
mod bounding_box;
mod errors;
mod ffi;
mod image_input;
mod ocr_engine;
mod ocr_line;
mod ocr_options;
mod ocr_result;
mod ocr_word;

// Re-export the public structs for easier access
pub use bounding_box::BoundingBox;
pub use bounding_box::Point;
pub use errors::OneOcrError;
pub use image_input::ImageInput;
pub use ocr_engine::OcrEngine;
pub use ocr_line::OcrLine;
pub use ocr_options::{OcrOptions, Resolution};
pub use ocr_result::OcrResult;
pub use ocr_word::OcrWord;

Expand Down
126 changes: 98 additions & 28 deletions src/ocr_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ use crate::ffi::{
ReleaseOcrProcessOptions, RunOcrPipeline,
};
use crate::ocr_result::OcrResult;
use crate::{ONE_OCR_MODEL_FILE_NAME, ONE_OCR_MODEL_KEY};
use image::DynamicImage;
use crate::{ImageInput, ONE_OCR_MODEL_FILE_NAME, ONE_OCR_MODEL_KEY, OcrOptions};
use image::{DynamicImage, ImageBuffer, Rgba};
use std::ffi::{CString, c_void};
use std::path::Path;
use std::ptr;

// Macros
Expand All @@ -22,33 +21,32 @@ pub struct OcrEngine {
init_options: *mut c_void,
pipeline: *mut c_void,
process_options: *mut c_void,
ocr_options: OcrOptions,
}

impl OcrEngine {
/// Creates a new instance of the OCR engine.
/// This function loads the necessary library and initializes the OCR pipeline.
pub fn new() -> Result<Self, OneOcrError> {
/// Creates a new instance of the OCR engine with specified options.
/// This function loads the necessary library and initializes the OCR pipeline with the provided options.
pub fn new_with_options(ocr_options: OcrOptions) -> Result<Self, OneOcrError> {
let mut init_options: *mut c_void = ptr::null_mut();
check_ocr_call!(
unsafe { CreateOcrInitOptions(&mut init_options) },
"Failed to create init options"
);

// Disable model delay load
check_ocr_call!(
unsafe { OcrInitOptionsSetUseModelDelayLoad(init_options) },
unsafe { OcrInitOptionsSetUseModelDelayLoad(init_options, 0) },
"Failed to set model delay load"
);

let model_path = Self::get_model_path()?;
let model_path_cstr = CString::new(model_path).map_err(|e| {
OneOcrError::ModelFileLoadError(format!(
"Failed to convert model path to CString: {}",
e
))
OneOcrError::ModelFileLoadError(format!("Failed to convert model path to CString: {e}"))
})?;

let key_cstr = CString::new(ONE_OCR_MODEL_KEY).map_err(|e| {
OneOcrError::InvalidModelKey(format!("Failed to convert model key to CString: {}", e))
OneOcrError::InvalidModelKey(format!("Failed to convert model key to CString: {e}"))
})?;

let mut pipeline: *mut c_void = ptr::null_mut();
Expand All @@ -70,13 +68,41 @@ impl OcrEngine {
"Failed to create OCR process options"
);

check_ocr_call!(
unsafe {
OcrProcessOptionsSetMaxRecognitionLineCount(
process_options,
ocr_options.max_recognition_line_count,
)
},
"Failed to set max recognition line count"
);

check_ocr_call!(
unsafe {
OcrProcessOptionsSetResizeResolution(
process_options,
ocr_options.resize_resolution.width,
ocr_options.resize_resolution.height,
)
},
"Failed to set resize resolution"
);

Ok(Self {
init_options,
pipeline,
process_options,
ocr_options,
})
}

/// Creates a new instance of the OCR engine with default options.
/// This function loads the necessary library and initializes the OCR pipeline.
pub fn new() -> Result<Self, OneOcrError> {
Self::new_with_options(OcrOptions::default())
}

/// Retrieves the maximum number of lines that can be recognized.
/// Default is 100.
pub fn get_max_recognition_line_count(&self) -> Result<i32, OneOcrError> {
Expand Down Expand Up @@ -132,28 +158,72 @@ impl OcrEngine {
Ok(())
}

/// Run the OCR pipeline on the given image path.
pub fn run(
/// Run OCR processing on an image.
///
/// This method accepts various input types through the `ImageInput` enum
/// and allows configuration through `OcrOptions`.
///
/// # Arguments
///
/// * `input` - The image input source (file path, image buffer, or dynamic image)
///
/// # Returns
///
/// Returns an `OcrResult` containing the recognized text and associated metadata,
/// or an error if the OCR processing fails.
///
/// # Examples
///
/// ```no_run
/// use oneocr_rs::{OcrEngine, OcrOptions, ImageInput};
/// use std::path::Path;
/// let engine = OcrEngine::new().unwrap();
///
/// // Process from file path
/// let result = engine.run(Path::new("image.jpg").into()).unwrap();
/// ```
///
/// ```ignore
/// // Process from in-memory image buffer
/// let img_buffer: ImageBuffer<Rgba<u8>, Vec<u8>> = capture_screenshot(); // Your screenshot function
/// let result = engine.run(img_buffer.into()).unwrap();
/// ```
pub fn run(&self, input: ImageInput) -> Result<OcrResult, OneOcrError> {
let img_rgba = self.load_image(input)?;
self.run_ocr_on_rgba_image(&img_rgba, self.ocr_options.include_word_level_details)
}

/// Loads an image from various input sources and converts it to RGBA format.
fn load_image(&self, input: ImageInput) -> Result<ImageBuffer<Rgba<u8>, Vec<u8>>, OneOcrError> {
match input {
ImageInput::FilePath(path) => {
let img = image::open(path)?;
Ok(self.convert_to_rgba(img))
}
ImageInput::Buffer(buffer) => Ok(buffer),
ImageInput::Dynamic(img) => Ok(self.convert_to_rgba(img)),
}
}

/// Converts a DynamicImage to RGBA format.
fn convert_to_rgba(&self, img: DynamicImage) -> ImageBuffer<Rgba<u8>, Vec<u8>> {
match img {
DynamicImage::ImageRgba8(i) => i,
_ => img.to_rgba8(),
}
}

/// Performs OCR on an RGBA image buffer.
fn run_ocr_on_rgba_image(
&self,
image_path: &Path,
img_rgba: &ImageBuffer<Rgba<u8>, Vec<u8>>,
word_level_detail: bool,
) -> Result<OcrResult, OneOcrError> {
let img = image::open(Path::new(image_path))?;
let img_rgba = match img {
DynamicImage::ImageRgba8(i) => i,
DynamicImage::ImageRgb8(i) => DynamicImage::ImageRgb8(i).to_rgba8(),
_ => {
return Err(OneOcrError::ImageFormatError(format!(
"Unsupported image format: {:?}",
img
)));
}
};
let (rows, cols) = (img_rgba.height() as i32, img_rgba.width() as i32);
let step = (img_rgba.sample_layout().height_stride) as i64;
let data_ptr = img_rgba.as_ptr() as i64;
let image = RawImage {
t: 3, // Assuming 3 means RGBA or a type the C API expects
t: 3, // RGBA format identifier expected by the C API
col: cols,
row: rows,
_unk: 0,
Expand All @@ -173,7 +243,7 @@ impl OcrEngine {
/// Retrieves the path to the model file.
fn get_model_path() -> Result<String, OneOcrError> {
let exe_path = std::env::current_exe().map_err(|e| {
OneOcrError::ModelFileLoadError(format!("Failed to get current executable path: {}", e))
OneOcrError::ModelFileLoadError(format!("Failed to get current executable path: {e}"))
})?;
let model_path_buf = exe_path
.parent()
Expand Down
Loading