22 cp improve cp formulation using cp sat built ins #32

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

AwaKhouna merged 7 commits into main from 22-cp-improve-cp-formulation-using-cp-sat-built-ins

Nov 19, 2025

README.md

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -67,33 +67,33 @@ print("CP : ",cp_explanation)
  
    Expected output:

    ```plaintext

    MIP objective value: 5.0

    MIP objective value: 3.0

    MIP Explanation:

    Age              : 39.0

    CapitalGain      : 2174.0

    CapitalLoss      : 0

    EducationNumber  : 13.0

    HoursPerWeek     : 41.0

    HoursPerWeek     : 40.0

    MaritalStatus    : 3

    NativeCountry    : 0

    Occupation       : 1

    Occupation       : 10

    Relationship     : 0

    Sex              : 0

    WorkClass        : 6 

    CP objective value: 5.0

    CP objective value: 3.0

    CP Explanation:

    Age              : 38.0

    Age              : 39.0

    CapitalGain      : 2174.0

    CapitalLoss      : 0.0

    EducationNumber  : 13.0

    HoursPerWeek     : 41.0

    HoursPerWeek     : 40.0

    MaritalStatus    : 3

    NativeCountry    : 0

    Occupation       : 1

    Relationship     : 0

    Sex              : 0

    WorkClass        : 6

    WorkClass        : 4

    ```

ocean/cp/_model.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -121,12 +121,15 @@ def _add_objective(self, x: Array1D, norm: int) -> cp.ObjLinearExprT:
  
            x_arr = np.asarray(x, dtype=float).ravel()

            variables = self.mapper.values()

            names = [n for n, _ in self.mapper.items()]

            objective: cp.LinearExpr = 0  # type: ignore[assignment]

            k = 0

            for v in variables:

            indexer = self.mapper.idx

            for v, name in zip(variables, names, strict=True):

                if v.is_one_hot_encoded:

                    for code in v.codes:

                        objective += self.L1(x_arr[k], v, code=code)

                        idx = indexer.get(name, code)

                        objective += self.L1(x_arr[idx], v, code=code)

                        k += 1

                else:

                    objective += self.L1(x_arr[k], v)

    @@ -157,21 +160,47 @@ def L1(
  
            if v.is_numeric:

                if v.is_continuous:

                    intervals_cost = self.get_intervals_cost(v.levels, x)

                    variables = [v.mget(i) for i in range(len(v.levels) - 1)]

                    obj_expr = cp.LinearExpr.WeightedSum(variables, intervals_cost)

                    # tighten domain of objvar based on x itself ----------

                    v.objvarget().Proto().domain[:] = []

                    v.objvarget().Proto().domain.extend(

                        cp.Domain(

                            min(intervals_cost), max(intervals_cost)

                        ).FlattenedIntervals()

                    )

                    # -----------------------------------------------------

                    obj_expr = v.objvarget()

                    self.add_garbage(

                        self.AddElement(v.xget(), list(intervals_cost), obj_expr)

                    )

                    obj_coefs.append(1)

                else:

                    # include the value of x itself on the domain ---------

                    if len(v.thresholds) != 0:

                        values = [

                            val

                            for v in v.thresholds

                            for val in [int(v), int(v) + 1]

                        ]

                    else:

                        values = np.asarray(v.levels).astype(int).tolist()

                    values.append(int(x))

                    v.xget().Proto().domain[:] = []

                    v.xget().Proto().domain.extend(

                        cp.Domain.FromValues(values).FlattenedIntervals()

                    )

                    # -----------------------------------------------------

                    obj_expr = v.objvarget()

                    self.add_garbage(

                        self.AddAbsEquality(obj_expr, int(x) - v.xget())

                    )

                    obj_coefs.append(self._obj_scale)

                obj_exprs.append(obj_expr)

                obj_coefs.append(1)

            elif v.is_one_hot_encoded:

                obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code)

                obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code)  # type: ignore[assignment]

                obj_exprs.append(obj_expr)

                obj_coefs.append(self._obj_scale)

                obj_coefs.append(self._obj_scale // 2)

            else:

                obj_expr = v.xget() if x == 0.0 else 1 - v.xget()

                obj_expr = v.xget() if x == 0.0 else 1 - v.xget()  # type: ignore[assignment]

                obj_exprs.append(obj_expr)

                obj_coefs.append(self._obj_scale)

            return cp.LinearExpr.WeightedSum(obj_exprs, obj_coefs)

ocean/cp/_variables/_feature.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,6 @@ class FeatureVar(Var, FeatureKeeper): @@
         _x: cp.IntVar
         _u: Mapping[Key, cp.IntVar]
-        _mu: list[cp.IntVar]
         _objvar: cp.IntVar
         def __init__(self, feature: Feature, name: str) -> None:
@@ Expand All / @@ -24,10 +23,7 @@ def __init__(self, feature: Feature, name: str) -> None: @@
         def build(self, model: BaseModel) -> None:
             if not self.is_one_hot_encoded:
                 self._x = self._add_x(model)
-            if self.is_continuous:
-                self._mu = self._add_mu(model)
-                model.add_map_domain(self.xget(), self._mu)
-            elif self.is_one_hot_encoded:
+            if self.is_one_hot_encoded:
                 self._u = self._add_u(model)
         def xget(self, code: Key | None = None) -> cp.IntVar:
@@ Expand All / @@ -38,17 +34,10 @@ def xget(self, code: Key | None = None) -> cp.IntVar: @@
                 raise ValueError(msg)
             return self._x
-        def mget(self, key: int) -> cp.IntVar:
-            if not self.is_continuous:
-                msg = "The 'mget' method is only supported for continuous features"
-                raise ValueError(msg)
-            return self._mu[key]
         def objvarget(self) -> cp.IntVar:
-            if not self.is_discrete:
-                msg = (
-                    "The 'objvarget' method is only supported for discrete features"
-                )
+            if not self.is_numeric:
+                msg = "The 'objvarget' method is only supported"
+                msg += " for continuous and discrete features"
                 raise ValueError(msg)
             return self._objvar
@@ Expand Down Expand Up @@
             model.AddExactlyOne(u.values())
             return u
-        def _set_mu(self, model: BaseModel, m: int) -> list[cp.IntVar]:
-            return [model.NewBoolVar(f"{self._name}_mu_{i}") for i in range(m)]
-        def _add_mu(self, model: BaseModel) -> list[cp.IntVar]:
-            if not self.is_continuous:
-                msg = "Mu variables are only supported for continuous features"
-                raise ValueError(msg)
-            m = len(self.levels) - 1
-            return self._set_mu(model, m=m)
         def _add_one_hot_encoded(
             self,
             model: BaseModel,
@@ Expand All / @@ -100,6 +79,9 @@ def _add_binary(model: BaseModel, name: str) -> cp.IntVar: @@
             return model.NewBoolVar(name)
         def _add_continuous(self, model: BaseModel, name: str) -> cp.IntVar:
+            self._objvar = model.NewIntVar(
+, 42, f"u_{name}"
+            )  # arbitrary, will be adapted for each query
             m = len(self.levels)
             return model.NewIntVar(0, m - 2, name)
@@ Expand Down @@

ocean/mip/_model.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -147,19 +147,32 @@ def _add_objective(self, x: Array1D, norm: int) -> Objective:
  
            if norm not in {1, 2}:

                msg = f"Unsupported norm: {norm}"

                raise ValueError(msg)

            names = self.mapper.names

            is_ohe = [self.mapper[name].is_one_hot_encoded for name in names]

            variables = map(self.vget, range(self.n_columns))

            if norm == 1:

                return sum(map(self.L1, x, variables), start=gp.LinExpr())

            return sum(map(self.L2, x, variables), start=gp.QuadExpr())

                return sum(

                    (

                        self.L1(x, v, is_ohe=ohe)

                        for x, v, ohe in zip(x, variables, is_ohe, strict=True)

                    ),

                    start=gp.LinExpr(),

                )

            return sum(

                (

                    self.L2(x, v, is_ohe=ohe)

                    for x, v, ohe in zip(x, variables, is_ohe, strict=True)

                ),

                start=gp.QuadExpr(),

            )

        def L1(self, x: np.float64, v: gp.Var) -> gp.LinExpr:

        def L1(self, x: np.float64, v: gp.Var, *, is_ohe: bool) -> gp.LinExpr:

            u = self.addVar()

            neg = self.addConstr(u >= v - x)

            pos = self.addConstr(u >= x - v)

            self.add_garbage(u, pos, neg)

            return gp.LinExpr(u)

            return gp.LinExpr(u) / 2.0 if is_ohe else gp.LinExpr(u)

        @staticmethod

        def L2(x: np.float64, v: gp.Var) -> gp.QuadExpr:

            return (v - x) ** 2

        def L2(x: np.float64, v: gp.Var, *, is_ohe: bool) -> gp.QuadExpr:

            return ((v - x) ** 2) / 2.0 if is_ohe else (v - x) ** 2

tests/cp/builder/test_model.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -175,15 +175,15 @@ def test_build_continuous_feature(
  
            feature_var = FeatureVar(feature=feature, name="x")

            tree_var = TreeVar(tree=tree, name="tree")

            feature_var.build(model)  # 3 variables and 6 constraints

            feature_var.build(model)  # 2 variables

            tree_var.build(model)  # 4 variables and 1 constraint

            mapper = Mapper[FeatureVar]({"x": feature_var}, columns=pd.Index(["x"]))

            builder.build(model, trees=[tree_var], mapper=mapper)  # 8 constraints

            assert len(model.Proto().constraints) == 13

            assert len(model.Proto().variables) == 7

            assert len(model.Proto().constraints) == 9

            assert len(model.Proto().variables) == 6

        @staticmethod

        def test_build_discrete_feature(

    @@ -200,7 +200,7 @@ def test_build_discrete_feature(
  
            feature_var = FeatureVar(feature=feature, name="x")

            tree_var = TreeVar(tree=tree, name="tree")

            feature_var.build(model)  # 2 variables and 0 constraints

            feature_var.build(model)  # 2 variables

            tree_var.build(model)  # 4 variables and 1 constraint

            mapper = Mapper[FeatureVar]({"x": feature_var}, columns=pd.Index(["x"]))

    @@ -259,7 +259,7 @@ def test_build_multiple_features(
  
            tree_var = TreeVar(tree=tree, name="tree")

            for feature_var in feature_vars:

                # 6 variables and 1 constraints

                # 7 variables and 1 constraints

                feature_var.build(model)

            tree_var.build(model)  # 6 variables and 1 constraint

    @@ -275,7 +275,7 @@ def test_build_multiple_features(
  
            )

            builder.build(model, trees=[tree_var], mapper=mapper)  # 16 constraints

            assert len(model.Proto().constraints) == 18

            assert len(model.Proto().variables) == 12

            assert len(model.Proto().variables) == 13

        @staticmethod

        def test_build_multiple_trees(

    @@ -305,7 +305,7 @@ def test_build_multiple_trees(
  
                TreeVar(tree=tree, name=f"tree_{i}") for i, tree in enumerate(trees)

            ]

            for feature_var in feature_vars:

                # 6 variables and 1 constraints

                # 7 variables and 1 constraints

                feature_var.build(model)

            for tree_var in tree_vars:

                # 18 variables and 3 constraints

    @@ -323,4 +323,4 @@ def test_build_multiple_trees(
  
            builder.build(model, trees=tree_vars, mapper=mapper)

            # 48 constraints

            assert len(model.Proto().constraints) == 52

            assert len(model.Proto().variables) == 24

            assert len(model.Proto().variables) == 25

tests/cp/model/test_feasible.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -51,7 +51,7 @@ def test_build(
  
                if feature.is_binary:

                    feature_vars += 1

                elif feature.is_continuous:

                    feature_vars += len(feature.levels)

                    feature_vars += 2

                    feature_constraints += 2 * (len(feature.levels) - 1)

                elif feature.is_discrete:

                    feature_vars += 2

    @@ -60,7 +60,7 @@ def test_build(
  
                    feature_constraints += 1

            lb = n_nodes - n_leaves

            ub = (n_nodes - n_leaves) * (n_nodes - n_leaves + 1)

            lb += feature_constraints + n_estimators

            lb += n_estimators

            ub += feature_constraints + n_estimators

            assert len(model.Proto().variables) == n_leaves + feature_vars

            assert len(model.Proto().constraints) >= lb

tests/cp/variable/feature/test_build.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -16,9 +16,6 @@ def test_binary() -> None: @@
         var.build(model)
         v = var.xget()
         assert isinstance(v, cp.IntVar)
-        msg = r"The 'mget' method is only supported for continuous features"
-        with pytest.raises(ValueError, match=msg):
-            _ = var.mget(0)
         msg = r"Get by code is only supported for one-hot encoded features"
         with pytest.raises(ValueError, match=msg):
@@ Expand Down Expand Up @@
         var.build(model)
         v = var.xget()
         assert isinstance(v, cp.IntVar)
-        n = len(var.levels)
-        for i in range(n - 1):
-            mu = var.mget(i)
-            assert isinstance(mu, cp.IntVar)
         msg = r"Get by code is only supported for one-hot encoded features"
         with pytest.raises(ValueError, match=msg):
@@ Expand All / @@ -78,10 +71,6 @@ def test_one_hot_encoded(seed: int, n_codes: int) -> None: @@
         var = FeatureVar(feature=feature, name="x")
         var.build(model)
-        msg = r"The 'mget' method is only supported for continuous features"
-        with pytest.raises(ValueError, match=msg):
-            _ = var.mget(0)
         msg = r"Code is required for one-hot encoded features get"
         with pytest.raises(ValueError, match=msg):
             _ = var.xget()
@@ Expand Down @@

tests/cp/variable/feature/test_optimize.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -96,7 +96,6 @@ def solve_and_assert(val: float, expected: float) -> None: @@
             var.build(model)
             scale = int(1e6)
             v = var.xget()
-            variables = [var.mget(i) for i in range(len(levels) - 1)]
             intervals_cost = np.zeros(len(levels) - 1, dtype=int)
             for i in range(len(intervals_cost)):
                 if levels[i] < val <= levels[i + 1]:
@@ Expand All @@
                     intervals_cost[i] = int(abs(val - levels[i]) * scale)
                 elif levels[i + 1] < val:
                     intervals_cost[i] = int(abs(val - levels[i + 1]) * scale)
+            # tighten domain of objvar based on val itself ----------
+            var.objvarget().Proto().domain[:] = []
+            var.objvarget().Proto().domain.extend(
+                cp.Domain(
+                    min(intervals_cost), max(intervals_cost)
+                ).FlattenedIntervals()
+            )
+            # -----------------------------------------------------
-            objective = cp.LinearExpr.WeightedSum(variables, intervals_cost)
+            objective = var.objvarget()
+            model.AddElement(v, list(intervals_cost), objective)
             model.Minimize(objective)
             solver.Solve(model)
             value = levels[solver.Value(v) + 1]
-            mu_values = [solver.Value(mu) for mu in variables]
-            assert sum(mu_values) == 1.0, (
-                f"Solver status {solver.StatusName()}, "
-                f"mu values {mu_values}, "
-                f"model variables {model.Proto().variables}, "
-            )
             assert np.isclose(value, expected), (
                 f"Expected {expected}, but got {value}, with levels {levels} "
                 f"and val={val} and solver status {solver.StatusName()}"
-                f" v = {solver.Value(v)} and mu_values {mu_values}"
+                f" v = {solver.Value(v)}, obj_value = {solver.Value(objective)}"
             )
         # Test within bounds
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

22 cp improve cp formulation using cp sat built ins #32

Uh oh!

Diff view

Diff view

There are no files selected for viewing