feat :: coreml, bench acceleration

rlaope · rlaope · commit ac62503f2246 · 2026-01-31T16:46:12.000+09:00
diff --git a/crates/airml-providers/src/coreml.rs b/crates/airml-providers/src/coreml.rs
@@ -1,38 +1,69 @@
 //! CoreML Execution Provider
 //!
 //! Execution provider for Apple CoreML/Metal acceleration on macOS.
+//! Supports CPU, GPU, and Neural Engine (ANE) on Apple Silicon.
 
-use ort::execution_providers::{CoreMLExecutionProvider, ExecutionProviderDispatch};
+use ort::ep::coreml::{ComputeUnits as OrtComputeUnits, CoreML, ModelFormat};
+use ort::execution_providers::ExecutionProviderDispatch;
 
 /// Compute units for CoreML execution
-#[derive(Debug, Clone, Copy, Default)]
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub enum ComputeUnits {
     /// Use all available compute units (CPU, GPU, Neural Engine)
     #[default]
     All,
-    /// Use CPU and GPU only
+    /// Use CPU and Neural Engine (ANE) - optimal for most models on Apple Silicon
+    CpuAndNeuralEngine,
+    /// Use CPU and GPU only (no ANE)
     CpuAndGpu,
     /// Use CPU only
     CpuOnly,
 }
 
+impl ComputeUnits {
+    /// Convert to ort's ComputeUnits enum
+    fn to_ort(self) -> OrtComputeUnits {
+        match self {
+            ComputeUnits::All => OrtComputeUnits::All,
+            ComputeUnits::CpuAndNeuralEngine => OrtComputeUnits::CPUAndNeuralEngine,
+            ComputeUnits::CpuAndGpu => OrtComputeUnits::CPUAndGPU,
+            ComputeUnits::CpuOnly => OrtComputeUnits::CPUOnly,
+        }
+    }
+}
+
 /// CoreML execution provider configuration
 #[derive(Debug, Clone)]
 pub struct CoreMLConfig {
     /// Which compute units to use
     pub compute_units: ComputeUnits,
+    /// Enable subgraph execution (for models with control flow)
+    pub enable_subgraphs: bool,
     /// Require static input shapes
     pub require_static_shapes: bool,
-    /// Enable model caching
-    pub enable_cache: bool,
+    /// Model format (NeuralNetwork or MLProgram)
+    pub model_format: Option<CoreMLModelFormat>,
+    /// Cache directory for compiled models
+    pub cache_dir: Option<String>,
+}
+
+/// CoreML model format
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CoreMLModelFormat {
+    /// NeuralNetwork format - better compatibility with older macOS/iOS
+    NeuralNetwork,
+    /// MLProgram format - supports more operators, potentially better performance
+    MLProgram,
 }
 
 impl Default for CoreMLConfig {
     fn default() -> Self {
         Self {
             compute_units: ComputeUnits::All,
+            enable_subgraphs: false,
             require_static_shapes: false,
-            enable_cache: true,
+            model_format: None,
+            cache_dir: None,
         }
     }
 }
@@ -60,36 +91,77 @@ impl CoreMLProvider {
         self
     }
 
-    /// Set neural engine only mode
+    /// Set Neural Engine only mode (CPU + ANE, no GPU)
+    /// This is optimal for most inference tasks on Apple Silicon
     pub fn neural_engine_only(self) -> Self {
-        self.with_compute_units(ComputeUnits::All)
+        self.with_compute_units(ComputeUnits::CpuAndNeuralEngine)
     }
 
-    /// Set GPU only mode (no ANE)
+    /// Set GPU only mode (CPU + GPU, no ANE)
     pub fn gpu_only(self) -> Self {
         self.with_compute_units(ComputeUnits::CpuAndGpu)
     }
 
+    /// Set CPU only mode
+    pub fn cpu_only(self) -> Self {
+        self.with_compute_units(ComputeUnits::CpuOnly)
+    }
+
+    /// Enable subgraph execution for models with control flow operators
+    pub fn with_subgraphs(mut self, enable: bool) -> Self {
+        self.config.enable_subgraphs = enable;
+        self
+    }
+
+    /// Require static input shapes
+    pub fn with_static_shapes(mut self, require: bool) -> Self {
+        self.config.require_static_shapes = require;
+        self
+    }
+
+    /// Set model format
+    pub fn with_model_format(mut self, format: CoreMLModelFormat) -> Self {
+        self.config.model_format = Some(format);
+        self
+    }
+
+    /// Set cache directory for compiled models
+    pub fn with_cache_dir(mut self, dir: impl Into<String>) -> Self {
+        self.config.cache_dir = Some(dir.into());
+        self
+    }
+
     /// Convert to ORT execution provider dispatch
     pub fn into_dispatch(self) -> ExecutionProviderDispatch {
-        let mut provider = CoreMLExecutionProvider::default();
-
-        match self.config.compute_units {
-            ComputeUnits::All => {
-                // Default - uses all available compute units
-            }
-            ComputeUnits::CpuAndGpu => {
-                provider = provider.with_ane_only();
-            }
-            ComputeUnits::CpuOnly => {
-                provider = provider.with_cpu_only();
-            }
+        let mut provider = CoreML::default();
+
+        // Set compute units
+        provider = provider.with_compute_units(self.config.compute_units.to_ort());
+
+        // Enable subgraphs if requested
+        if self.config.enable_subgraphs {
+            provider = provider.with_subgraphs(true);
         }
 
+        // Require static shapes if requested
         if self.config.require_static_shapes {
-            provider = provider.with_subgraphs();
+            provider = provider.with_static_input_shapes(true);
+        }
+
+        // Set model format if specified
+        if let Some(format) = self.config.model_format {
+            let ort_format = match format {
+                CoreMLModelFormat::NeuralNetwork => ModelFormat::NeuralNetwork,
+                CoreMLModelFormat::MLProgram => ModelFormat::MLProgram,
+            };
+            provider = provider.with_model_format(ort_format);
+        }
+
+        // Set cache directory if specified
+        if let Some(dir) = &self.config.cache_dir {
+            provider = provider.with_model_cache_dir(dir);
         }
 
-        provider.build().into()
+        provider.build()
     }
 }
diff --git a/crates/airml-providers/src/lib.rs b/crates/airml-providers/src/lib.rs
@@ -11,7 +11,7 @@ pub use cpu::CpuProvider;
 pub use ort::execution_providers::ExecutionProviderDispatch;
 
 #[cfg(feature = "coreml")]
-pub use coreml::{CoreMLConfig, CoreMLProvider};
+pub use coreml::{ComputeUnits, CoreMLConfig, CoreMLModelFormat, CoreMLProvider};
 
 /// Available execution providers
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
diff --git a/src/commands/bench.rs b/src/commands/bench.rs
@@ -22,6 +22,10 @@ pub fn execute(args: &BenchArgs) -> Result<()> {
         "cpu" => vec![airml_providers::CpuProvider::default().into_dispatch()],
         #[cfg(feature = "coreml")]
         "coreml" => vec![airml_providers::CoreMLProvider::default().into_dispatch()],
+        #[cfg(feature = "coreml")]
+        "neural-engine" => vec![airml_providers::CoreMLProvider::default()
+            .neural_engine_only()
+            .into_dispatch()],
         _ => auto_select_providers(),
     };
 
diff --git a/src/commands/run.rs b/src/commands/run.rs
@@ -72,9 +72,13 @@ fn select_providers(provider_name: &str) -> Result<Vec<airml_providers::Executio
         "auto" => Ok(auto_select_providers()),
         "cpu" => Ok(vec![airml_providers::CpuProvider::default().into_dispatch()]),
         #[cfg(feature = "coreml")]
-        "coreml" | "neural-engine" => {
-            Ok(vec![airml_providers::CoreMLProvider::default().into_dispatch()])
-        }
+        "coreml" => Ok(vec![airml_providers::CoreMLProvider::default().into_dispatch()]),
+        #[cfg(feature = "coreml")]
+        "neural-engine" => Ok(vec![
+            airml_providers::CoreMLProvider::default()
+                .neural_engine_only()
+                .into_dispatch(),
+        ]),
         _ => {
             println!("Warning: Unknown provider '{}', using auto-selection", provider_name);
             Ok(auto_select_providers())