diff --git a/ocean/mip/_builders/model.py b/ocean/mip/_builders/model.py index 806f698..eb2aa60 100644 --- a/ocean/mip/_builders/model.py +++ b/ocean/mip/_builders/model.py @@ -1,3 +1,4 @@ +import warnings from collections.abc import Iterable from typing import Protocol @@ -139,7 +140,7 @@ def _cset( # :: mu[j] <= 1 - flow[node.left], # :: mu[j] >= epsilon * flow[node.right]. - epsilon = self._epsilon + epsilon = self._find_best_epsilon(model, var) threshold = node.threshold j = int(np.searchsorted(var.levels, threshold)) @@ -225,6 +226,29 @@ def _eset( model.addConstr(x <= 1 - tree[node.left.node_id]) model.addConstr(x >= tree[node.right.node_id]) + @staticmethod + def _find_best_epsilon(model: BaseModel, var: FeatureVar) -> float: + # Find the best epsilon value for the given feature variable. + # This + tol: float = model.getParamInfo("FeasibilityTol")[2] + min_tol: float = 1e-9 + delta: float = min(*np.diff(var.levels)) + eps: float = 1.0 - min_tol + if delta <= 2 * min_tol: + msg = "The difference between the split levels" + msg += f" is too small (={delta}) compared to" + msg += f" the tolerance minimum of Gurobi ({min_tol})." + msg += " There could be some precision errors." + msg += " Consider not scaling the data or using bigger intervals." + warnings.warn(msg, category=UserWarning, stacklevel=2) + return eps + while 2 * tol / delta >= 1.0: + tol /= 2 + feas_tol = model.getParamInfo("FeasibilityTol")[2] + if feas_tol != tol: + model.setParam("FeasibilityTol", min(feas_tol, tol)) + return 2 * tol / delta + class ModelBuilderFactory: MIP: type[MixedIntegerProgramBuilder] = MixedIntegerProgramBuilder diff --git a/ocean/mip/_explainer.py b/ocean/mip/_explainer.py index 6731c39..3373d8a 100644 --- a/ocean/mip/_explainer.py +++ b/ocean/mip/_explainer.py @@ -2,7 +2,6 @@ import warnings import gurobipy as gp -import numpy as np from sklearn.ensemble import IsolationForest from ..abc import Mapper @@ -47,23 +46,13 @@ def __init__( max_samples=max_samples, name=name, env=env, - epsilon=self.__find_best_num_epsilon(epsilon, mapper), + epsilon=epsilon, num_epsilon=num_epsilon, model_type=model_type, flow_type=flow_type, ) self.build() - @staticmethod - def __find_best_num_epsilon( - epsilon: float, mapper: Mapper[Feature] - ) -> float: - eps = epsilon - for f in mapper.values(): - if f.is_numeric: - eps = min(eps, *np.diff(f.levels) / 2.0) - return eps - def get_objective_value(self) -> float: return self.ObjVal