Skip to content
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,33 +67,33 @@ print("CP : ",cp_explanation)
Expected output:

```plaintext
MIP objective value: 5.0
MIP objective value: 3.0
MIP Explanation:
Age : 39.0
CapitalGain : 2174.0
CapitalLoss : 0
EducationNumber : 13.0
HoursPerWeek : 41.0
HoursPerWeek : 40.0
MaritalStatus : 3
NativeCountry : 0
Occupation : 1
Occupation : 10
Relationship : 0
Sex : 0
WorkClass : 6

CP objective value: 5.0
CP objective value: 3.0
CP Explanation:
Age : 38.0
Age : 39.0
CapitalGain : 2174.0
CapitalLoss : 0.0
EducationNumber : 13.0
HoursPerWeek : 41.0
HoursPerWeek : 40.0
MaritalStatus : 3
NativeCountry : 0
Occupation : 1
Relationship : 0
Sex : 0
WorkClass : 6
WorkClass : 4
```


Expand Down
45 changes: 37 additions & 8 deletions ocean/cp/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,15 @@ def _add_objective(self, x: Array1D, norm: int) -> cp.ObjLinearExprT:
x_arr = np.asarray(x, dtype=float).ravel()

variables = self.mapper.values()
names = [n for n, _ in self.mapper.items()]
objective: cp.LinearExpr = 0 # type: ignore[assignment]
k = 0
for v in variables:
indexer = self.mapper.idx
for v, name in zip(variables, names, strict=True):
if v.is_one_hot_encoded:
for code in v.codes:
objective += self.L1(x_arr[k], v, code=code)
idx = indexer.get(name, code)
objective += self.L1(x_arr[idx], v, code=code)
k += 1
else:
objective += self.L1(x_arr[k], v)
Expand Down Expand Up @@ -157,21 +160,47 @@ def L1(
if v.is_numeric:
if v.is_continuous:
intervals_cost = self.get_intervals_cost(v.levels, x)
variables = [v.mget(i) for i in range(len(v.levels) - 1)]
obj_expr = cp.LinearExpr.WeightedSum(variables, intervals_cost)
# tighten domain of objvar based on x itself ----------
v.objvarget().Proto().domain[:] = []
v.objvarget().Proto().domain.extend(
cp.Domain(
min(intervals_cost), max(intervals_cost)
).FlattenedIntervals()
)
# -----------------------------------------------------
obj_expr = v.objvarget()
self.add_garbage(
self.AddElement(v.xget(), list(intervals_cost), obj_expr)
)
obj_coefs.append(1)
else:
# include the value of x itself on the domain ---------
if len(v.thresholds) != 0:
values = [
val
for v in v.thresholds
for val in [int(v), int(v) + 1]
]
else:
values = np.asarray(v.levels).astype(int).tolist()
values.append(int(x))
v.xget().Proto().domain[:] = []
v.xget().Proto().domain.extend(
cp.Domain.FromValues(values).FlattenedIntervals()
)
# -----------------------------------------------------
obj_expr = v.objvarget()
self.add_garbage(
self.AddAbsEquality(obj_expr, int(x) - v.xget())
)
obj_coefs.append(self._obj_scale)
obj_exprs.append(obj_expr)
obj_coefs.append(1)
elif v.is_one_hot_encoded:
obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code)
obj_expr = v.xget(code) if x == 0.0 else 1 - v.xget(code) # type: ignore[assignment]
obj_exprs.append(obj_expr)
obj_coefs.append(self._obj_scale)
obj_coefs.append(self._obj_scale // 2)
else:
obj_expr = v.xget() if x == 0.0 else 1 - v.xget()
obj_expr = v.xget() if x == 0.0 else 1 - v.xget() # type: ignore[assignment]
obj_exprs.append(obj_expr)
obj_coefs.append(self._obj_scale)
return cp.LinearExpr.WeightedSum(obj_exprs, obj_coefs)
32 changes: 7 additions & 25 deletions ocean/cp/_variables/_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class FeatureVar(Var, FeatureKeeper):

_x: cp.IntVar
_u: Mapping[Key, cp.IntVar]
_mu: list[cp.IntVar]
_objvar: cp.IntVar

def __init__(self, feature: Feature, name: str) -> None:
Expand All @@ -24,10 +23,7 @@ def __init__(self, feature: Feature, name: str) -> None:
def build(self, model: BaseModel) -> None:
if not self.is_one_hot_encoded:
self._x = self._add_x(model)
if self.is_continuous:
self._mu = self._add_mu(model)
model.add_map_domain(self.xget(), self._mu)
elif self.is_one_hot_encoded:
if self.is_one_hot_encoded:
self._u = self._add_u(model)

def xget(self, code: Key | None = None) -> cp.IntVar:
Expand All @@ -38,17 +34,10 @@ def xget(self, code: Key | None = None) -> cp.IntVar:
raise ValueError(msg)
return self._x

def mget(self, key: int) -> cp.IntVar:
if not self.is_continuous:
msg = "The 'mget' method is only supported for continuous features"
raise ValueError(msg)
return self._mu[key]

def objvarget(self) -> cp.IntVar:
if not self.is_discrete:
msg = (
"The 'objvarget' method is only supported for discrete features"
)
if not self.is_numeric:
msg = "The 'objvarget' method is only supported"
msg += " for continuous and discrete features"
raise ValueError(msg)
return self._objvar

Expand Down Expand Up @@ -76,16 +65,6 @@ def _add_u(self, model: BaseModel) -> Mapping[Key, cp.IntVar]:
model.AddExactlyOne(u.values())
return u

def _set_mu(self, model: BaseModel, m: int) -> list[cp.IntVar]:
return [model.NewBoolVar(f"{self._name}_mu_{i}") for i in range(m)]

def _add_mu(self, model: BaseModel) -> list[cp.IntVar]:
if not self.is_continuous:
msg = "Mu variables are only supported for continuous features"
raise ValueError(msg)
m = len(self.levels) - 1
return self._set_mu(model, m=m)

def _add_one_hot_encoded(
self,
model: BaseModel,
Expand All @@ -100,6 +79,9 @@ def _add_binary(model: BaseModel, name: str) -> cp.IntVar:
return model.NewBoolVar(name)

def _add_continuous(self, model: BaseModel, name: str) -> cp.IntVar:
self._objvar = model.NewIntVar(
0, 42, f"u_{name}"
) # arbitrary, will be adapted for each query
m = len(self.levels)
return model.NewIntVar(0, m - 2, name)

Expand Down
27 changes: 20 additions & 7 deletions ocean/mip/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,32 @@ def _add_objective(self, x: Array1D, norm: int) -> Objective:
if norm not in {1, 2}:
msg = f"Unsupported norm: {norm}"
raise ValueError(msg)

names = self.mapper.names
is_ohe = [self.mapper[name].is_one_hot_encoded for name in names]
variables = map(self.vget, range(self.n_columns))
if norm == 1:
return sum(map(self.L1, x, variables), start=gp.LinExpr())
return sum(map(self.L2, x, variables), start=gp.QuadExpr())
return sum(
(
self.L1(x, v, is_ohe=ohe)
for x, v, ohe in zip(x, variables, is_ohe, strict=True)
),
start=gp.LinExpr(),
)
return sum(
(
self.L2(x, v, is_ohe=ohe)
for x, v, ohe in zip(x, variables, is_ohe, strict=True)
),
start=gp.QuadExpr(),
)

def L1(self, x: np.float64, v: gp.Var) -> gp.LinExpr:
def L1(self, x: np.float64, v: gp.Var, *, is_ohe: bool) -> gp.LinExpr:
u = self.addVar()
neg = self.addConstr(u >= v - x)
pos = self.addConstr(u >= x - v)
self.add_garbage(u, pos, neg)
return gp.LinExpr(u)
return gp.LinExpr(u) / 2.0 if is_ohe else gp.LinExpr(u)

@staticmethod
def L2(x: np.float64, v: gp.Var) -> gp.QuadExpr:
return (v - x) ** 2
def L2(x: np.float64, v: gp.Var, *, is_ohe: bool) -> gp.QuadExpr:
return ((v - x) ** 2) / 2.0 if is_ohe else (v - x) ** 2
16 changes: 8 additions & 8 deletions tests/cp/builder/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,15 @@ def test_build_continuous_feature(
feature_var = FeatureVar(feature=feature, name="x")
tree_var = TreeVar(tree=tree, name="tree")

feature_var.build(model) # 3 variables and 6 constraints
feature_var.build(model) # 2 variables
tree_var.build(model) # 4 variables and 1 constraint

mapper = Mapper[FeatureVar]({"x": feature_var}, columns=pd.Index(["x"]))

builder.build(model, trees=[tree_var], mapper=mapper) # 8 constraints

assert len(model.Proto().constraints) == 13
assert len(model.Proto().variables) == 7
assert len(model.Proto().constraints) == 9
assert len(model.Proto().variables) == 6

@staticmethod
def test_build_discrete_feature(
Expand All @@ -200,7 +200,7 @@ def test_build_discrete_feature(
feature_var = FeatureVar(feature=feature, name="x")
tree_var = TreeVar(tree=tree, name="tree")

feature_var.build(model) # 2 variables and 0 constraints
feature_var.build(model) # 2 variables
tree_var.build(model) # 4 variables and 1 constraint

mapper = Mapper[FeatureVar]({"x": feature_var}, columns=pd.Index(["x"]))
Expand Down Expand Up @@ -259,7 +259,7 @@ def test_build_multiple_features(

tree_var = TreeVar(tree=tree, name="tree")
for feature_var in feature_vars:
# 6 variables and 1 constraints
# 7 variables and 1 constraints
feature_var.build(model)

tree_var.build(model) # 6 variables and 1 constraint
Expand All @@ -275,7 +275,7 @@ def test_build_multiple_features(
)
builder.build(model, trees=[tree_var], mapper=mapper) # 16 constraints
assert len(model.Proto().constraints) == 18
assert len(model.Proto().variables) == 12
assert len(model.Proto().variables) == 13

@staticmethod
def test_build_multiple_trees(
Expand Down Expand Up @@ -305,7 +305,7 @@ def test_build_multiple_trees(
TreeVar(tree=tree, name=f"tree_{i}") for i, tree in enumerate(trees)
]
for feature_var in feature_vars:
# 6 variables and 1 constraints
# 7 variables and 1 constraints
feature_var.build(model)
for tree_var in tree_vars:
# 18 variables and 3 constraints
Expand All @@ -323,4 +323,4 @@ def test_build_multiple_trees(
builder.build(model, trees=tree_vars, mapper=mapper)
# 48 constraints
assert len(model.Proto().constraints) == 52
assert len(model.Proto().variables) == 24
assert len(model.Proto().variables) == 25
4 changes: 2 additions & 2 deletions tests/cp/model/test_feasible.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_build(
if feature.is_binary:
feature_vars += 1
elif feature.is_continuous:
feature_vars += len(feature.levels)
feature_vars += 2
feature_constraints += 2 * (len(feature.levels) - 1)
elif feature.is_discrete:
feature_vars += 2
Expand All @@ -60,7 +60,7 @@ def test_build(
feature_constraints += 1
lb = n_nodes - n_leaves
ub = (n_nodes - n_leaves) * (n_nodes - n_leaves + 1)
lb += feature_constraints + n_estimators
lb += n_estimators
ub += feature_constraints + n_estimators
assert len(model.Proto().variables) == n_leaves + feature_vars
assert len(model.Proto().constraints) >= lb
Expand Down
11 changes: 0 additions & 11 deletions tests/cp/variable/feature/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ def test_binary() -> None:
var.build(model)
v = var.xget()
assert isinstance(v, cp.IntVar)
msg = r"The 'mget' method is only supported for continuous features"
with pytest.raises(ValueError, match=msg):
_ = var.mget(0)

msg = r"Get by code is only supported for one-hot encoded features"
with pytest.raises(ValueError, match=msg):
Expand Down Expand Up @@ -58,10 +55,6 @@ def test_continuous(seed: int, n_levels: int, lower: int, upper: int) -> None:
var.build(model)
v = var.xget()
assert isinstance(v, cp.IntVar)
n = len(var.levels)
for i in range(n - 1):
mu = var.mget(i)
assert isinstance(mu, cp.IntVar)

msg = r"Get by code is only supported for one-hot encoded features"
with pytest.raises(ValueError, match=msg):
Expand All @@ -78,10 +71,6 @@ def test_one_hot_encoded(seed: int, n_codes: int) -> None:
var = FeatureVar(feature=feature, name="x")
var.build(model)

msg = r"The 'mget' method is only supported for continuous features"
with pytest.raises(ValueError, match=msg):
_ = var.mget(0)

msg = r"Code is required for one-hot encoded features get"
with pytest.raises(ValueError, match=msg):
_ = var.xget()
Expand Down
20 changes: 11 additions & 9 deletions tests/cp/variable/feature/test_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def solve_and_assert(val: float, expected: float) -> None:
var.build(model)
scale = int(1e6)
v = var.xget()
variables = [var.mget(i) for i in range(len(levels) - 1)]
intervals_cost = np.zeros(len(levels) - 1, dtype=int)
for i in range(len(intervals_cost)):
if levels[i] < val <= levels[i + 1]:
Expand All @@ -105,21 +104,24 @@ def solve_and_assert(val: float, expected: float) -> None:
intervals_cost[i] = int(abs(val - levels[i]) * scale)
elif levels[i + 1] < val:
intervals_cost[i] = int(abs(val - levels[i + 1]) * scale)
# tighten domain of objvar based on val itself ----------
var.objvarget().Proto().domain[:] = []
var.objvarget().Proto().domain.extend(
cp.Domain(
min(intervals_cost), max(intervals_cost)
).FlattenedIntervals()
)
# -----------------------------------------------------

objective = cp.LinearExpr.WeightedSum(variables, intervals_cost)
objective = var.objvarget()
model.AddElement(v, list(intervals_cost), objective)
model.Minimize(objective)
solver.Solve(model)
value = levels[solver.Value(v) + 1]
mu_values = [solver.Value(mu) for mu in variables]
assert sum(mu_values) == 1.0, (
f"Solver status {solver.StatusName()}, "
f"mu values {mu_values}, "
f"model variables {model.Proto().variables}, "
)
assert np.isclose(value, expected), (
f"Expected {expected}, but got {value}, with levels {levels} "
f"and val={val} and solver status {solver.StatusName()}"
f" v = {solver.Value(v)} and mu_values {mu_values}"
f" v = {solver.Value(v)}, obj_value = {solver.Value(objective)}"
)

# Test within bounds
Expand Down