Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/lonpy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from lonpy.lon import CMLON, LON, LONConfig
from lonpy.sampling import BasinHoppingSampler, BasinHoppingSamplerConfig, compute_lon
from lonpy.step_size import StepSizeEstimator, StepSizeEstimatorConfig, StepSizeResult
from lonpy.visualization import LONVisualizer

__version__ = "0.1.0"
Expand All @@ -10,5 +11,8 @@
"BasinHoppingSamplerConfig",
"LONConfig",
"LONVisualizer",
"StepSizeEstimator",
"StepSizeEstimatorConfig",
"StepSizeResult",
"compute_lon",
]
191 changes: 191 additions & 0 deletions src/lonpy/step_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from collections.abc import Callable
from dataclasses import dataclass, field

import numpy as np
from scipy.optimize import minimize

from lonpy.sampling import BasinHoppingSampler, BasinHoppingSamplerConfig


@dataclass
class StepSizeEstimatorConfig:
"""
Configuration for step size estimation.

Attributes:
n_samples: Number of random initial points to evaluate.
n_perturbations: Perturbations per sample point.
target_escape_rate: Target escape rate to find (0.5 = 50% of perturbations escape).
search_precision: Decimal digits of precision for step size search.
coordinate_precision: Precision for identifying distinct optima.
Use None for full double precision.
minimizer_method: Scipy minimizer method (default: "L-BFGS-B").
minimizer_options: Options passed to scipy.optimize.minimize.
bounded: Whether to enforce domain bounds during perturbation.
seed: Random seed for reproducibility.
"""

n_samples: int = 100
n_perturbations: int = 30
target_escape_rate: float = 0.5
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The target_escape_rate configuration parameter has no validation. Values outside [0, 1] would not make sense for an escape rate (which is a probability/ratio), but there's no check to prevent invalid values. Consider adding validation in init or post_init to ensure 0 <= target_escape_rate <= 1.

Copilot uses AI. Check for mistakes.
search_precision: int = 4
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The search_precision parameter controls the number of refinement iterations in the search algorithm (line 163), but this name is misleading. It's described in the docstring as "Decimal digits of precision for step size search" (line 19), which suggests it should control decimal precision. However, it's used as an iteration count, not precision digits. The actual decimal precision is applied on line 191 where round() is called with search_precision. This creates confusion because search_precision=4 means 4 iterations AND rounds to 4 decimal places, but these are different concepts. Consider renaming to search_iterations or clarifying the documentation to explain this dual purpose.

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The search_precision parameter has no validation. If set to 0 or negative, the outer loop (line 163) won't execute any iterations, leading to an empty candidates list and a ValueError when calling min() on line 188. Consider adding validation to ensure search_precision >= 1.

Copilot uses AI. Check for mistakes.
coordinate_precision: int | None = 4
minimizer_method: str = "L-BFGS-B"
minimizer_options: dict = field(
default_factory=lambda: {"ftol": 1e-07, "gtol": 0, "maxiter": 15000}
)
bounded: bool = True
seed: int | None = None


@dataclass(frozen=True)
class StepSizeResult:
"""
Result of step size estimation.

Attributes:
step_size: Estimated optimal step size (percentage of domain range).
escape_rate: Achieved escape rate at this step size.
error: Absolute difference between achieved and target escape rate.
"""

step_size: float
escape_rate: float
error: float


class StepSizeEstimator:
"""
Estimates the optimal fixed step size for basin-hopping sampling.

The optimal step size is defined as the one that produces an escape rate
closest to a target (default 0.5), meaning ~50% of perturbations lead to
a different local optimum.

The search uses a decimal refinement approach, progressively narrowing
the step size to the configured precision.

Example:
>>> import numpy as np
>>> estimator = StepSizeEstimator()
>>> result = estimator.estimate(problem, [(-5, 5)] * 2)
>>> print(f"Step size: {result.step_size}, escape rate: {result.escape_rate:.3f}")
"""

def __init__(self, config: StepSizeEstimatorConfig | None = None):
self.config = config or StepSizeEstimatorConfig()

def _make_sampler(self) -> BasinHoppingSampler:
sampler_config = BasinHoppingSamplerConfig(
coordinate_precision=self.config.coordinate_precision,
bounded=self.config.bounded,
minimizer_method=self.config.minimizer_method,
minimizer_options=self.config.minimizer_options,
step_mode="percentage",
)
return BasinHoppingSampler(sampler_config)

def _compute_escape_rate(
self,
func: Callable[[np.ndarray], float],
domain_array: np.ndarray,
step_size: float,
sampler: BasinHoppingSampler,
) -> float:
"""Compute the average escape rate for a given step size."""
bounds_array = domain_array if self.config.bounded else None
p = step_size * np.abs(domain_array[:, 1] - domain_array[:, 0])

escape_rates = []

for _ in range(self.config.n_samples):
x0 = np.random.uniform(domain_array[:, 0], domain_array[:, 1])
res = minimize(
func,
x0,
method=self.config.minimizer_method,
options=self.config.minimizer_options,
bounds=bounds_array,
)
optimum = res.x
optimum_hash = sampler._hash_solution(optimum)

escapes = 0
for _ in range(self.config.n_perturbations):
x_perturbed = sampler._perturbation(optimum, p, bounds_array)
res_perturbed = minimize(
func,
x_perturbed,
method=self.config.minimizer_method,
options=self.config.minimizer_options,
bounds=bounds_array,
)
new_hash = sampler._hash_solution(res_perturbed.x)
if new_hash != optimum_hash:
escapes += 1

Comment on lines +110 to +126
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The step size estimator accesses private methods (_perturbation, _round_value, _hash_solution) of BasinHoppingSampler. While this works, it creates tight coupling and fragility. If these private methods change signatures or behavior, this code will break. Consider either making these methods public (removing the underscore prefix), or creating public helper methods in BasinHoppingSampler that expose this functionality for reuse.

Copilot uses AI. Check for mistakes.
escape_rates.append(escapes / self.config.n_perturbations)

return float(np.mean(escape_rates))

def estimate(
self,
Comment on lines +88 to +132
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _compute_escape_rate method is called multiple times during the search (once per step size tested), and each call runs n_samples * n_perturbations minimizations. With defaults (n_samples=100, n_perturbations=30), that's 3000 minimizations per step size tested. With search_precision=4, this could be ~4-40+ step sizes tested (depending on when the upper bound is found), resulting in 12,000-120,000+ minimizations. For expensive objective functions, this could be very slow. Consider documenting the computational cost in the class or method docstring, or providing guidance on adjusting n_samples/n_perturbations for expensive functions.

Copilot uses AI. Check for mistakes.
func: Callable[[np.ndarray], float],
domain: list[tuple[float, float]],
) -> StepSizeResult:
"""
Estimate the optimal step size for basin-hopping sampling.

Args:
func: Objective function to minimize (f: R^n -> R).
domain: List of (lower, upper) bounds per dimension.

Returns:
StepSizeResult with the estimated step size, achieved escape rate, and error.
"""
if self.config.seed is not None:
np.random.seed(self.config.seed)

domain_array = np.array(domain)
sampler = self._make_sampler()

step = 0.1
Comment on lines +149 to +152
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The estimate method doesn't validate the domain parameter. If an empty domain list is passed, domain_array will be empty and could lead to errors in _compute_escape_rate when computing p (line 97) or generating random initial points (line 102). Consider adding validation to ensure domain is non-empty and that each bound tuple has valid lower < upper values.

Copilot uses AI. Check for mistakes.
increment = 0.1
target = self.config.target_escape_rate

best_lower: tuple[float, float] | None = None # (step, rate)
best_upper: tuple[float, float] | None = None # (step, rate)
last_tested: tuple[float, float] | None = None

for _ in range(self.config.search_precision):
while step <= 1.0 + 1e-12:
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition allows step to exceed 1.0 (step <= 1.0 + 1e-12), but step_size is meant to represent a percentage of domain range. A step_size > 1.0 means perturbations larger than the entire domain range, which may not be meaningful for bounded problems. While this might be intentional to explore edge cases, it could lead to unexpected behavior. Consider documenting why values > 1.0 are allowed, or constraining the search to step <= 1.0 if that's the intended maximum.

Suggested change
while step <= 1.0 + 1e-12:
while step <= 1.0:

Copilot uses AI. Check for mistakes.
rate = self._compute_escape_rate(func, domain_array, step, sampler)
last_tested = (step, rate)

if rate < target:
if best_lower is None or abs(rate - target) < abs(best_lower[1] - target):
best_lower = (step, rate)
step += increment
else:
if best_upper is None or abs(rate - target) < abs(best_upper[1] - target):
best_upper = (step, rate)
# Refine: reduce increment and resume from lower bound
increment /= 10
if best_lower is not None:
step = best_lower[0] + increment
else:
step = increment
break
else:
# Reached step > 1.0 without finding upper bound
break
Comment on lines +172 to +181
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the search algorithm, when rate >= target (line 172-181), the code breaks out of the inner while loop and continues with the next iteration of the outer for loop. However, the step variable may not be reset appropriately if best_lower is None. In this case, step is set to increment (line 180), but increment was just divided by 10 (line 176). On subsequent outer loop iterations, if rate < target on the first test, step will be incremented by the small increment value and could remain below the previously tested values, causing redundant testing or inefficient search. Consider whether the initialization of step should account for previously explored ranges.

Copilot uses AI. Check for mistakes.

# Select the candidate closest to target
candidates = [c for c in (best_lower, best_upper, last_tested) if c is not None]
best_step, best_rate = min(candidates, key=lambda c: abs(c[1] - target))

return StepSizeResult(
step_size=round(best_step, self.config.search_precision),
escape_rate=best_rate,
error=abs(best_rate - target),
)