helix-agh · WojtAcht · Feb 22, 2026 · Feb 22, 2026 · Copilot · Feb 22, 2026
diff --git a/src/lonpy/__init__.py b/src/lonpy/__init__.py
@@ -1,5 +1,6 @@
 from lonpy.lon import CMLON, LON, LONConfig
 from lonpy.sampling import BasinHoppingSampler, BasinHoppingSamplerConfig, compute_lon
+from lonpy.step_size import StepSizeEstimator, StepSizeEstimatorConfig, StepSizeResult
 from lonpy.visualization import LONVisualizer
 
 __version__ = "0.1.0"
@@ -10,5 +11,8 @@
     "BasinHoppingSamplerConfig",
     "LONConfig",
     "LONVisualizer",
+    "StepSizeEstimator",
+    "StepSizeEstimatorConfig",
+    "StepSizeResult",
     "compute_lon",
 ]
diff --git a/src/lonpy/step_size.py b/src/lonpy/step_size.py
@@ -0,0 +1,191 @@
+from collections.abc import Callable
+from dataclasses import dataclass, field
+
+import numpy as np
+from scipy.optimize import minimize
+
+from lonpy.sampling import BasinHoppingSampler, BasinHoppingSamplerConfig
+
+
+@dataclass
+class StepSizeEstimatorConfig:
+    """
+    Configuration for step size estimation.
+
+    Attributes:
+        n_samples: Number of random initial points to evaluate.
+        n_perturbations: Perturbations per sample point.
+        target_escape_rate: Target escape rate to find (0.5 = 50% of perturbations escape).
+        search_precision: Decimal digits of precision for step size search.
+        coordinate_precision: Precision for identifying distinct optima.
+            Use None for full double precision.
+        minimizer_method: Scipy minimizer method (default: "L-BFGS-B").
+        minimizer_options: Options passed to scipy.optimize.minimize.
+        bounded: Whether to enforce domain bounds during perturbation.
+        seed: Random seed for reproducibility.
+    """
+
+    n_samples: int = 100
+    n_perturbations: int = 30
+    target_escape_rate: float = 0.5
+    search_precision: int = 4
+    coordinate_precision: int | None = 4
+    minimizer_method: str = "L-BFGS-B"
+    minimizer_options: dict = field(
+        default_factory=lambda: {"ftol": 1e-07, "gtol": 0, "maxiter": 15000}
+    )
+    bounded: bool = True
+    seed: int | None = None
+
+
+@dataclass(frozen=True)
+class StepSizeResult:
+    """
+    Result of step size estimation.
+
+    Attributes:
+        step_size: Estimated optimal step size (percentage of domain range).
+        escape_rate: Achieved escape rate at this step size.
+        error: Absolute difference between achieved and target escape rate.
+    """
+
+    step_size: float
+    escape_rate: float
+    error: float
+
+
+class StepSizeEstimator:
+    """
+    Estimates the optimal fixed step size for basin-hopping sampling.
+
+    The optimal step size is defined as the one that produces an escape rate
+    closest to a target (default 0.5), meaning ~50% of perturbations lead to
+    a different local optimum.
+
+    The search uses a decimal refinement approach, progressively narrowing
+    the step size to the configured precision.
+
+    Example:
+        >>> import numpy as np
+        >>> estimator = StepSizeEstimator()
+        >>> result = estimator.estimate(problem, [(-5, 5)] * 2)
+        >>> print(f"Step size: {result.step_size}, escape rate: {result.escape_rate:.3f}")
+    """
+
+    def __init__(self, config: StepSizeEstimatorConfig | None = None):
+        self.config = config or StepSizeEstimatorConfig()
+
+    def _make_sampler(self) -> BasinHoppingSampler:
+        sampler_config = BasinHoppingSamplerConfig(
+            coordinate_precision=self.config.coordinate_precision,
+            bounded=self.config.bounded,
+            minimizer_method=self.config.minimizer_method,
+            minimizer_options=self.config.minimizer_options,
+            step_mode="percentage",
+        )
+        return BasinHoppingSampler(sampler_config)
+
+    def _compute_escape_rate(
+        self,
+        func: Callable[[np.ndarray], float],
+        domain_array: np.ndarray,
+        step_size: float,
+        sampler: BasinHoppingSampler,
+    ) -> float:
+        """Compute the average escape rate for a given step size."""
+        bounds_array = domain_array if self.config.bounded else None
+        p = step_size * np.abs(domain_array[:, 1] - domain_array[:, 0])
+
+        escape_rates = []
+
+        for _ in range(self.config.n_samples):
+            x0 = np.random.uniform(domain_array[:, 0], domain_array[:, 1])
+            res = minimize(
+                func,
+                x0,
+                method=self.config.minimizer_method,
+                options=self.config.minimizer_options,
+                bounds=bounds_array,
+            )
+            optimum = res.x
+            optimum_hash = sampler._hash_solution(optimum)
+
+            escapes = 0
+            for _ in range(self.config.n_perturbations):
+                x_perturbed = sampler._perturbation(optimum, p, bounds_array)
+                res_perturbed = minimize(
+                    func,
+                    x_perturbed,
+                    method=self.config.minimizer_method,
+                    options=self.config.minimizer_options,
+                    bounds=bounds_array,
+                )
+                new_hash = sampler._hash_solution(res_perturbed.x)
+                if new_hash != optimum_hash:
+                    escapes += 1
+
+            escape_rates.append(escapes / self.config.n_perturbations)
+
+        return float(np.mean(escape_rates))
+
+    def estimate(
+        self,
+        func: Callable[[np.ndarray], float],
+        domain: list[tuple[float, float]],
+    ) -> StepSizeResult:
+        """
+        Estimate the optimal step size for basin-hopping sampling.
+
+        Args:
+            func: Objective function to minimize (f: R^n -> R).
+            domain: List of (lower, upper) bounds per dimension.
+
+        Returns:
+            StepSizeResult with the estimated step size, achieved escape rate, and error.
+        """
+        if self.config.seed is not None:
+            np.random.seed(self.config.seed)
+
+        domain_array = np.array(domain)
+        sampler = self._make_sampler()
+
+        step = 0.1
+        increment = 0.1
+        target = self.config.target_escape_rate
+
+        best_lower: tuple[float, float] | None = None  # (step, rate)
+        best_upper: tuple[float, float] | None = None  # (step, rate)
+        last_tested: tuple[float, float] | None = None
+
+        for _ in range(self.config.search_precision):
+            while step <= 1.0 + 1e-12:
-            while step <= 1.0 + 1e-12:
+            while step <= 1.0:
-            while step <= 1.0 + 1e-12:
+            while step <= 1.0:
+                rate = self._compute_escape_rate(func, domain_array, step, sampler)
+                last_tested = (step, rate)
+
+                if rate < target:
+                    if best_lower is None or abs(rate - target) < abs(best_lower[1] - target):
+                        best_lower = (step, rate)
+                    step += increment
+                else:
+                    if best_upper is None or abs(rate - target) < abs(best_upper[1] - target):
+                        best_upper = (step, rate)
+                    # Refine: reduce increment and resume from lower bound
+                    increment /= 10
+                    if best_lower is not None:
+                        step = best_lower[0] + increment
+                    else:
+                        step = increment
+                    break
+            else:
+                # Reached step > 1.0 without finding upper bound
+                break
+
+        # Select the candidate closest to target
+        candidates = [c for c in (best_lower, best_upper, last_tested) if c is not None]
+        best_step, best_rate = min(candidates, key=lambda c: abs(c[1] - target))
+
+        return StepSizeResult(
+            step_size=round(best_step, self.config.search_precision),
+            escape_rate=best_rate,
+            error=abs(best_rate - target),
+        )