From ae0c6606f95194704f7fda998bc2b2594a76329f Mon Sep 17 00:00:00 2001 From: Awa Khouna Date: Thu, 13 Nov 2025 15:59:48 -0500 Subject: [PATCH 1/7] fix: Adjust objective coefficients in CP Model class for better optimization handling --- ocean/cp/_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ocean/cp/_model.py b/ocean/cp/_model.py index 39e33aa..34e8dd5 100644 --- a/ocean/cp/_model.py +++ b/ocean/cp/_model.py @@ -159,17 +159,18 @@ def L1( intervals_cost = self.get_intervals_cost(v.levels, x) variables = [v.mget(i) for i in range(len(v.levels) - 1)] obj_expr = cp.LinearExpr.WeightedSum(variables, intervals_cost) + obj_coefs.append(1) else: obj_expr = v.objvarget() self.add_garbage( self.AddAbsEquality(obj_expr, int(x) - v.xget()) ) + obj_coefs.append(self._obj_scale) obj_exprs.append(obj_expr) - obj_coefs.append(1) elif v.is_one_hot_encoded: obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code) obj_exprs.append(obj_expr) - obj_coefs.append(self._obj_scale) + obj_coefs.append(self._obj_scale // 2) else: obj_expr = v.xget() if x == 0.0 else 1 - v.xget() obj_exprs.append(obj_expr) From 6a24b5357ff1f148e8b4c7ce7b4d33f7b607f824 Mon Sep 17 00:00:00 2001 From: jferry Date: Fri, 14 Nov 2025 10:56:00 -0500 Subject: [PATCH 2/7] Improves continuous feature distance computation using AddElement, improves and fixes ordinal features distance computation --- ocean/cp/_model.py | 24 +++++++++++++++++++++--- ocean/cp/_variables/_feature.py | 28 +++++----------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/ocean/cp/_model.py b/ocean/cp/_model.py index 34e8dd5..98cd7a8 100644 --- a/ocean/cp/_model.py +++ b/ocean/cp/_model.py @@ -125,7 +125,7 @@ def _add_objective(self, x: Array1D, norm: int) -> cp.ObjLinearExprT: k = 0 for v in variables: if v.is_one_hot_encoded: - for code in v.codes: + for code in sorted(v.codes, key=int): objective += self.L1(x_arr[k], v, code=code) k += 1 else: @@ -157,10 +157,28 @@ def L1( if v.is_numeric: if v.is_continuous: intervals_cost = self.get_intervals_cost(v.levels, x) - variables = [v.mget(i) for i in range(len(v.levels) - 1)] - obj_expr = cp.LinearExpr.WeightedSum(variables, intervals_cost) + # tighten domain of objvar based on x itself ---------- + v.objvarget().Proto().domain[:] = [] + v.objvarget().Proto().domain.extend( cp.Domain(min(intervals_cost), max(intervals_cost)).FlattenedIntervals()) + # ----------------------------------------------------- + obj_expr = v.objvarget() + self.add_garbage( + self.AddElement(v.xget(), list(intervals_cost), obj_expr) + ) obj_coefs.append(1) else: + # include the value of x itself on the domain --------- + if len(v.thresholds) != 0: + values = [ + val for v in v.thresholds for val in [int(v), int(v) + 1] + ] + else: + values = np.asarray(v.levels).astype(int).tolist() + values.append(int(x)) + #print("new values for discrete feature ", v._name, ": ", values) # debug + v.xget().Proto().domain[:] = [] + v.xget().Proto().domain.extend( cp.Domain.FromValues(values).FlattenedIntervals()) + # ----------------------------------------------------- obj_expr = v.objvarget() self.add_garbage( self.AddAbsEquality(obj_expr, int(x) - v.xget()) diff --git a/ocean/cp/_variables/_feature.py b/ocean/cp/_variables/_feature.py index 1fc4b51..9bda67e 100644 --- a/ocean/cp/_variables/_feature.py +++ b/ocean/cp/_variables/_feature.py @@ -14,7 +14,6 @@ class FeatureVar(Var, FeatureKeeper): _x: cp.IntVar _u: Mapping[Key, cp.IntVar] - _mu: list[cp.IntVar] _objvar: cp.IntVar def __init__(self, feature: Feature, name: str) -> None: @@ -24,10 +23,7 @@ def __init__(self, feature: Feature, name: str) -> None: def build(self, model: BaseModel) -> None: if not self.is_one_hot_encoded: self._x = self._add_x(model) - if self.is_continuous: - self._mu = self._add_mu(model) - model.add_map_domain(self.xget(), self._mu) - elif self.is_one_hot_encoded: + if self.is_one_hot_encoded: self._u = self._add_u(model) def xget(self, code: Key | None = None) -> cp.IntVar: @@ -38,16 +34,10 @@ def xget(self, code: Key | None = None) -> cp.IntVar: raise ValueError(msg) return self._x - def mget(self, key: int) -> cp.IntVar: - if not self.is_continuous: - msg = "The 'mget' method is only supported for continuous features" - raise ValueError(msg) - return self._mu[key] - def objvarget(self) -> cp.IntVar: - if not self.is_discrete: + if not self.is_discrete and not self.is_continuous: msg = ( - "The 'objvarget' method is only supported for discrete features" + "The 'objvarget' method is only supported for continuous and discrete features" ) raise ValueError(msg) return self._objvar @@ -76,16 +66,6 @@ def _add_u(self, model: BaseModel) -> Mapping[Key, cp.IntVar]: model.AddExactlyOne(u.values()) return u - def _set_mu(self, model: BaseModel, m: int) -> list[cp.IntVar]: - return [model.NewBoolVar(f"{self._name}_mu_{i}") for i in range(m)] - - def _add_mu(self, model: BaseModel) -> list[cp.IntVar]: - if not self.is_continuous: - msg = "Mu variables are only supported for continuous features" - raise ValueError(msg) - m = len(self.levels) - 1 - return self._set_mu(model, m=m) - def _add_one_hot_encoded( self, model: BaseModel, @@ -100,6 +80,7 @@ def _add_binary(model: BaseModel, name: str) -> cp.IntVar: return model.NewBoolVar(name) def _add_continuous(self, model: BaseModel, name: str) -> cp.IntVar: + self._objvar = model.NewIntVar(0, int(self.levels[-1]), f"u_{name}") m = len(self.levels) return model.NewIntVar(0, m - 2, name) @@ -110,6 +91,7 @@ def _add_discrete(self, model: BaseModel, name: str) -> cp.IntVar: ] else: values = np.asarray(self.levels).astype(int).tolist() + #print("values for discrete feature ", self._name, ": ", values) #debug val = cp.Domain.FromValues(values) self._objvar = model.NewIntVar(0, int(self.levels[-1]), f"u_{name}") return model.NewIntVarFromDomain(val, name) From f6518e4b91bb254ee502dc6ee1fc868ba7c4de7f Mon Sep 17 00:00:00 2001 From: jferry Date: Fri, 14 Nov 2025 13:36:19 -0500 Subject: [PATCH 3/7] modif in initial objvar domain for continuous features --- ocean/cp/_variables/_feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean/cp/_variables/_feature.py b/ocean/cp/_variables/_feature.py index 9bda67e..fdf1fc5 100644 --- a/ocean/cp/_variables/_feature.py +++ b/ocean/cp/_variables/_feature.py @@ -80,7 +80,7 @@ def _add_binary(model: BaseModel, name: str) -> cp.IntVar: return model.NewBoolVar(name) def _add_continuous(self, model: BaseModel, name: str) -> cp.IntVar: - self._objvar = model.NewIntVar(0, int(self.levels[-1]), f"u_{name}") + self._objvar = model.NewIntVar(0, 42, f"u_{name}") # arbitrary, will be adapted for each query m = len(self.levels) return model.NewIntVar(0, m - 2, name) From a84a0861bb216e3e2fb37376e15adf67e0023c20 Mon Sep 17 00:00:00 2001 From: Awa Khouna Date: Tue, 18 Nov 2025 14:21:52 -0500 Subject: [PATCH 4/7] fix: Refactor objective variable handling and improve error messaging in FeatureVar class --- ocean/cp/_model.py | 28 +++++++++++++++++++--------- ocean/cp/_variables/_feature.py | 12 ++++++------ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/ocean/cp/_model.py b/ocean/cp/_model.py index 98cd7a8..79bdc89 100644 --- a/ocean/cp/_model.py +++ b/ocean/cp/_model.py @@ -121,12 +121,15 @@ def _add_objective(self, x: Array1D, norm: int) -> cp.ObjLinearExprT: x_arr = np.asarray(x, dtype=float).ravel() variables = self.mapper.values() + names = [n for n, _ in self.mapper.items()] objective: cp.LinearExpr = 0 # type: ignore[assignment] k = 0 - for v in variables: + indexer = self.mapper.idx + for v, name in zip(variables, names, strict=True): if v.is_one_hot_encoded: - for code in sorted(v.codes, key=int): - objective += self.L1(x_arr[k], v, code=code) + for code in v.codes: + idx = indexer.get(name, code) + objective += self.L1(x_arr[idx], v, code=code) k += 1 else: objective += self.L1(x_arr[k], v) @@ -159,7 +162,11 @@ def L1( intervals_cost = self.get_intervals_cost(v.levels, x) # tighten domain of objvar based on x itself ---------- v.objvarget().Proto().domain[:] = [] - v.objvarget().Proto().domain.extend( cp.Domain(min(intervals_cost), max(intervals_cost)).FlattenedIntervals()) + v.objvarget().Proto().domain.extend( + cp.Domain( + min(intervals_cost), max(intervals_cost) + ).FlattenedIntervals() + ) # ----------------------------------------------------- obj_expr = v.objvarget() self.add_garbage( @@ -170,14 +177,17 @@ def L1( # include the value of x itself on the domain --------- if len(v.thresholds) != 0: values = [ - val for v in v.thresholds for val in [int(v), int(v) + 1] + val + for v in v.thresholds + for val in [int(v), int(v) + 1] ] else: values = np.asarray(v.levels).astype(int).tolist() values.append(int(x)) - #print("new values for discrete feature ", v._name, ": ", values) # debug v.xget().Proto().domain[:] = [] - v.xget().Proto().domain.extend( cp.Domain.FromValues(values).FlattenedIntervals()) + v.xget().Proto().domain.extend( + cp.Domain.FromValues(values).FlattenedIntervals() + ) # ----------------------------------------------------- obj_expr = v.objvarget() self.add_garbage( @@ -186,11 +196,11 @@ def L1( obj_coefs.append(self._obj_scale) obj_exprs.append(obj_expr) elif v.is_one_hot_encoded: - obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code) + obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code) # type: ignore[assignment] obj_exprs.append(obj_expr) obj_coefs.append(self._obj_scale // 2) else: - obj_expr = v.xget() if x == 0.0 else 1 - v.xget() + obj_expr = v.xget() if x == 0.0 else 1 - v.xget() # type: ignore[assignment] obj_exprs.append(obj_expr) obj_coefs.append(self._obj_scale) return cp.LinearExpr.WeightedSum(obj_exprs, obj_coefs) diff --git a/ocean/cp/_variables/_feature.py b/ocean/cp/_variables/_feature.py index fdf1fc5..ce189ac 100644 --- a/ocean/cp/_variables/_feature.py +++ b/ocean/cp/_variables/_feature.py @@ -35,10 +35,9 @@ def xget(self, code: Key | None = None) -> cp.IntVar: return self._x def objvarget(self) -> cp.IntVar: - if not self.is_discrete and not self.is_continuous: - msg = ( - "The 'objvarget' method is only supported for continuous and discrete features" - ) + if not self.is_numeric: + msg = "The 'objvarget' method is only supported" + msg += " for continuous and discrete features" raise ValueError(msg) return self._objvar @@ -80,7 +79,9 @@ def _add_binary(model: BaseModel, name: str) -> cp.IntVar: return model.NewBoolVar(name) def _add_continuous(self, model: BaseModel, name: str) -> cp.IntVar: - self._objvar = model.NewIntVar(0, 42, f"u_{name}") # arbitrary, will be adapted for each query + self._objvar = model.NewIntVar( + 0, 42, f"u_{name}" + ) # arbitrary, will be adapted for each query m = len(self.levels) return model.NewIntVar(0, m - 2, name) @@ -91,7 +92,6 @@ def _add_discrete(self, model: BaseModel, name: str) -> cp.IntVar: ] else: values = np.asarray(self.levels).astype(int).tolist() - #print("values for discrete feature ", self._name, ": ", values) #debug val = cp.Domain.FromValues(values) self._objvar = model.NewIntVar(0, int(self.levels[-1]), f"u_{name}") return model.NewIntVarFromDomain(val, name) From 3b1337f8dc399f897576278a5cd48b83b73cb3ff Mon Sep 17 00:00:00 2001 From: Awa Khouna Date: Tue, 18 Nov 2025 14:22:19 -0500 Subject: [PATCH 5/7] fix: Enhance L1 and L2 methods to fix one-hot encoded variables in objective computation --- ocean/mip/_model.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/ocean/mip/_model.py b/ocean/mip/_model.py index 4e5468e..45a956c 100644 --- a/ocean/mip/_model.py +++ b/ocean/mip/_model.py @@ -147,19 +147,32 @@ def _add_objective(self, x: Array1D, norm: int) -> Objective: if norm not in {1, 2}: msg = f"Unsupported norm: {norm}" raise ValueError(msg) - + names = self.mapper.names + is_ohe = [self.mapper[name].is_one_hot_encoded for name in names] variables = map(self.vget, range(self.n_columns)) if norm == 1: - return sum(map(self.L1, x, variables), start=gp.LinExpr()) - return sum(map(self.L2, x, variables), start=gp.QuadExpr()) + return sum( + ( + self.L1(x, v, is_ohe=ohe) + for x, v, ohe in zip(x, variables, is_ohe, strict=True) + ), + start=gp.LinExpr(), + ) + return sum( + ( + self.L2(x, v, is_ohe=ohe) + for x, v, ohe in zip(x, variables, is_ohe, strict=True) + ), + start=gp.QuadExpr(), + ) - def L1(self, x: np.float64, v: gp.Var) -> gp.LinExpr: + def L1(self, x: np.float64, v: gp.Var, *, is_ohe: bool) -> gp.LinExpr: u = self.addVar() neg = self.addConstr(u >= v - x) pos = self.addConstr(u >= x - v) self.add_garbage(u, pos, neg) - return gp.LinExpr(u) + return gp.LinExpr(u) / 2.0 if is_ohe else gp.LinExpr(u) @staticmethod - def L2(x: np.float64, v: gp.Var) -> gp.QuadExpr: - return (v - x) ** 2 + def L2(x: np.float64, v: gp.Var, *, is_ohe: bool) -> gp.QuadExpr: + return ((v - x) ** 2) / 2.0 if is_ohe else (v - x) ** 2 From 988f496226026671dbd7f01e370ae61ab5b9897e Mon Sep 17 00:00:00 2001 From: Awa Khouna Date: Tue, 18 Nov 2025 14:22:43 -0500 Subject: [PATCH 6/7] fix: Update MIP and CP objective values and explanations in README --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index adfbab4..ab16c68 100644 --- a/README.md +++ b/README.md @@ -67,33 +67,33 @@ print("CP : ",cp_explanation) Expected output: ```plaintext -MIP objective value: 5.0 +MIP objective value: 3.0 MIP Explanation: Age : 39.0 CapitalGain : 2174.0 CapitalLoss : 0 EducationNumber : 13.0 -HoursPerWeek : 41.0 +HoursPerWeek : 40.0 MaritalStatus : 3 NativeCountry : 0 -Occupation : 1 +Occupation : 10 Relationship : 0 Sex : 0 WorkClass : 6 -CP objective value: 5.0 +CP objective value: 3.0 CP Explanation: -Age : 38.0 +Age : 39.0 CapitalGain : 2174.0 CapitalLoss : 0.0 EducationNumber : 13.0 -HoursPerWeek : 41.0 +HoursPerWeek : 40.0 MaritalStatus : 3 NativeCountry : 0 Occupation : 1 Relationship : 0 Sex : 0 -WorkClass : 6 +WorkClass : 4 ``` From 006c263b447534cb8a738e5cbbc0dfee91d6f040 Mon Sep 17 00:00:00 2001 From: Awa Khouna Date: Tue, 18 Nov 2025 14:22:55 -0500 Subject: [PATCH 7/7] fix: Update feature variable handling and constraints in tests for accuracy --- tests/cp/builder/test_model.py | 16 ++++++++-------- tests/cp/model/test_feasible.py | 4 ++-- tests/cp/variable/feature/test_build.py | 11 ----------- tests/cp/variable/feature/test_optimize.py | 20 +++++++++++--------- 4 files changed, 21 insertions(+), 30 deletions(-) diff --git a/tests/cp/builder/test_model.py b/tests/cp/builder/test_model.py index 3e4a1ba..68a8882 100644 --- a/tests/cp/builder/test_model.py +++ b/tests/cp/builder/test_model.py @@ -175,15 +175,15 @@ def test_build_continuous_feature( feature_var = FeatureVar(feature=feature, name="x") tree_var = TreeVar(tree=tree, name="tree") - feature_var.build(model) # 3 variables and 6 constraints + feature_var.build(model) # 2 variables tree_var.build(model) # 4 variables and 1 constraint mapper = Mapper[FeatureVar]({"x": feature_var}, columns=pd.Index(["x"])) builder.build(model, trees=[tree_var], mapper=mapper) # 8 constraints - assert len(model.Proto().constraints) == 13 - assert len(model.Proto().variables) == 7 + assert len(model.Proto().constraints) == 9 + assert len(model.Proto().variables) == 6 @staticmethod def test_build_discrete_feature( @@ -200,7 +200,7 @@ def test_build_discrete_feature( feature_var = FeatureVar(feature=feature, name="x") tree_var = TreeVar(tree=tree, name="tree") - feature_var.build(model) # 2 variables and 0 constraints + feature_var.build(model) # 2 variables tree_var.build(model) # 4 variables and 1 constraint mapper = Mapper[FeatureVar]({"x": feature_var}, columns=pd.Index(["x"])) @@ -259,7 +259,7 @@ def test_build_multiple_features( tree_var = TreeVar(tree=tree, name="tree") for feature_var in feature_vars: - # 6 variables and 1 constraints + # 7 variables and 1 constraints feature_var.build(model) tree_var.build(model) # 6 variables and 1 constraint @@ -275,7 +275,7 @@ def test_build_multiple_features( ) builder.build(model, trees=[tree_var], mapper=mapper) # 16 constraints assert len(model.Proto().constraints) == 18 - assert len(model.Proto().variables) == 12 + assert len(model.Proto().variables) == 13 @staticmethod def test_build_multiple_trees( @@ -305,7 +305,7 @@ def test_build_multiple_trees( TreeVar(tree=tree, name=f"tree_{i}") for i, tree in enumerate(trees) ] for feature_var in feature_vars: - # 6 variables and 1 constraints + # 7 variables and 1 constraints feature_var.build(model) for tree_var in tree_vars: # 18 variables and 3 constraints @@ -323,4 +323,4 @@ def test_build_multiple_trees( builder.build(model, trees=tree_vars, mapper=mapper) # 48 constraints assert len(model.Proto().constraints) == 52 - assert len(model.Proto().variables) == 24 + assert len(model.Proto().variables) == 25 diff --git a/tests/cp/model/test_feasible.py b/tests/cp/model/test_feasible.py index 0424078..a93bd5b 100644 --- a/tests/cp/model/test_feasible.py +++ b/tests/cp/model/test_feasible.py @@ -51,7 +51,7 @@ def test_build( if feature.is_binary: feature_vars += 1 elif feature.is_continuous: - feature_vars += len(feature.levels) + feature_vars += 2 feature_constraints += 2 * (len(feature.levels) - 1) elif feature.is_discrete: feature_vars += 2 @@ -60,7 +60,7 @@ def test_build( feature_constraints += 1 lb = n_nodes - n_leaves ub = (n_nodes - n_leaves) * (n_nodes - n_leaves + 1) - lb += feature_constraints + n_estimators + lb += n_estimators ub += feature_constraints + n_estimators assert len(model.Proto().variables) == n_leaves + feature_vars assert len(model.Proto().constraints) >= lb diff --git a/tests/cp/variable/feature/test_build.py b/tests/cp/variable/feature/test_build.py index cf82305..ced548a 100644 --- a/tests/cp/variable/feature/test_build.py +++ b/tests/cp/variable/feature/test_build.py @@ -16,9 +16,6 @@ def test_binary() -> None: var.build(model) v = var.xget() assert isinstance(v, cp.IntVar) - msg = r"The 'mget' method is only supported for continuous features" - with pytest.raises(ValueError, match=msg): - _ = var.mget(0) msg = r"Get by code is only supported for one-hot encoded features" with pytest.raises(ValueError, match=msg): @@ -58,10 +55,6 @@ def test_continuous(seed: int, n_levels: int, lower: int, upper: int) -> None: var.build(model) v = var.xget() assert isinstance(v, cp.IntVar) - n = len(var.levels) - for i in range(n - 1): - mu = var.mget(i) - assert isinstance(mu, cp.IntVar) msg = r"Get by code is only supported for one-hot encoded features" with pytest.raises(ValueError, match=msg): @@ -78,10 +71,6 @@ def test_one_hot_encoded(seed: int, n_codes: int) -> None: var = FeatureVar(feature=feature, name="x") var.build(model) - msg = r"The 'mget' method is only supported for continuous features" - with pytest.raises(ValueError, match=msg): - _ = var.mget(0) - msg = r"Code is required for one-hot encoded features get" with pytest.raises(ValueError, match=msg): _ = var.xget() diff --git a/tests/cp/variable/feature/test_optimize.py b/tests/cp/variable/feature/test_optimize.py index 3d47719..efcdb43 100644 --- a/tests/cp/variable/feature/test_optimize.py +++ b/tests/cp/variable/feature/test_optimize.py @@ -96,7 +96,6 @@ def solve_and_assert(val: float, expected: float) -> None: var.build(model) scale = int(1e6) v = var.xget() - variables = [var.mget(i) for i in range(len(levels) - 1)] intervals_cost = np.zeros(len(levels) - 1, dtype=int) for i in range(len(intervals_cost)): if levels[i] < val <= levels[i + 1]: @@ -105,21 +104,24 @@ def solve_and_assert(val: float, expected: float) -> None: intervals_cost[i] = int(abs(val - levels[i]) * scale) elif levels[i + 1] < val: intervals_cost[i] = int(abs(val - levels[i + 1]) * scale) + # tighten domain of objvar based on val itself ---------- + var.objvarget().Proto().domain[:] = [] + var.objvarget().Proto().domain.extend( + cp.Domain( + min(intervals_cost), max(intervals_cost) + ).FlattenedIntervals() + ) + # ----------------------------------------------------- - objective = cp.LinearExpr.WeightedSum(variables, intervals_cost) + objective = var.objvarget() + model.AddElement(v, list(intervals_cost), objective) model.Minimize(objective) solver.Solve(model) value = levels[solver.Value(v) + 1] - mu_values = [solver.Value(mu) for mu in variables] - assert sum(mu_values) == 1.0, ( - f"Solver status {solver.StatusName()}, " - f"mu values {mu_values}, " - f"model variables {model.Proto().variables}, " - ) assert np.isclose(value, expected), ( f"Expected {expected}, but got {value}, with levels {levels} " f"and val={val} and solver status {solver.StatusName()}" - f" v = {solver.Value(v)} and mu_values {mu_values}" + f" v = {solver.Value(v)}, obj_value = {solver.Value(objective)}" ) # Test within bounds