Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bemb/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .bemb import *
from .bemb_chunked import *
from .bemb_flex_lightning import *
220 changes: 202 additions & 18 deletions bemb/model/bayesian_coefficient.py

Large diffs are not rendered by default.

82 changes: 75 additions & 7 deletions bemb/model/bemb.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,26 @@ def parse_utility(utility_string: str) -> List[Dict[str, Union[List[str], None]]
A helper function parse utility string into a list of additive terms.

Example:
utility_string = 'lambda_item + theta_user * alpha_item + gamma_user * beta_item * price_obs'
utility_string = 'lambda_item + theta_user * alpha_item - gamma_user * beta_item * price_obs'
output = [
{
'coefficient': ['lambda_item'],
'observable': None
'observable': None,
'sign': 1.0,

},
{
'coefficient': ['theta_user', 'alpha_item'],
'observable': None
'sign': 1.0,
},
{
'coefficient': ['gamma_user', 'beta_item'],
'observable': 'price_obs'
'sign': -1.0,
}
]
Note that 'minus' is allowed in the utility string. If the first term is negative, the minus should be without a space.
"""
# split additive terms
coefficient_suffix = ('_item', '_user', '_constant', '_category')
Expand All @@ -76,10 +81,16 @@ def is_coefficient(name: str) -> bool:
def is_observable(name: str) -> bool:
return any(name.startswith(prefix) for prefix in observable_prefix)

utility_string = utility_string.replace(' - ', ' + -')
additive_terms = utility_string.split(' + ')
additive_decomposition = list()
for term in additive_terms:
atom = {'coefficient': [], 'observable': None}
if term.startswith('-'):
sign = -1.0
term = term[1:]
else:
sign = 1.0
atom = {'coefficient': [], 'observable': None, 'sign': sign}
# split multiplicative terms.
for x in term.split(' * '):
assert not (is_observable(x) and is_coefficient(x)), f"The element {x} is ambiguous, it follows naming convention of both an observable and a coefficient."
Expand Down Expand Up @@ -113,6 +124,7 @@ def __init__(self,
num_items: int,
pred_item: bool,
num_classes: int = 2,
coef_dist_dict: Dict[str, str] = {'default' : 'gaussian'},
H_zero_mask_dict: Optional[Dict[str, torch.BoolTensor]] = None,
prior_mean: Union[float, Dict[str, float]] = 0.0,
prior_variance: Union[float, Dict[str, float]] = 1.0,
Expand Down Expand Up @@ -140,6 +152,14 @@ def __init__(self,
lambda_item + theta_user * alpha_item + gamma_user * beta_item * price_obs
See the doc-string of parse_utility for an example.

coef_dist_dict (Dict[str, str]): a dictionary mapping coefficient name to coefficient distribution name.
The coefficient distribution name can be one of the following:
1. 'gaussian'
2. 'gamma' - obs2prior is not supported for gamma coefficients
If a coefficient does not appear in the dictionary, it will be assigned the distribution specified
by the 'default' key. By default, the default distribution is 'gaussian'.
For coefficients which have gamma distributions, prior mean and variance MUST be specified in the prior_mean and prior_variance arguments if obs2prior is False for this coefficient. If obs2prior is True, prior_variance is still required

obs2prior_dict (Dict[str, bool]): a dictionary maps coefficient name (e.g., 'lambda_item')
to a boolean indicating if observable (e.g., item_obs) enters the prior of the coefficient.

Expand Down Expand Up @@ -184,6 +204,8 @@ def __init__(self,
If no `prior_mean['default']` is provided, the default prior mean will be 0.0 for those coefficients
not in the prior_mean.keys().

For coefficients with gamma distributions, prior_mean specifies the shape parameter of the gamma prior.

Defaults to 0.0.

prior_variance (Union[float, Dict[str, float]], Dict[str, torch. Tensor]): the variance of prior distribution
Expand All @@ -203,6 +225,8 @@ def __init__(self,
If no `prior_variance['default']` is provided, the default prior variance will be 1.0 for those coefficients
not in the prior_variance.keys().

For coefficients with gamma distributions, prior_variance specifies the concentration parameter of the gamma prior.

Defaults to 1.0, which means all priors have identity matrix as the covariance matrix.

num_users (int, optional): number of users, required only if coefficient or observable
Expand Down Expand Up @@ -233,6 +257,7 @@ def __init__(self,
self.utility_formula = utility_formula
self.obs2prior_dict = obs2prior_dict
self.coef_dim_dict = coef_dim_dict
self.coef_dist_dict = coef_dist_dict
if H_zero_mask_dict is not None:
self.H_zero_mask_dict = H_zero_mask_dict
else:
Expand Down Expand Up @@ -325,6 +350,21 @@ def __init__(self,
for additive_term in self.formula:
for coef_name in additive_term['coefficient']:
variation = coef_name.split('_')[-1]

if coef_name not in self.coef_dist_dict.keys():
if 'default' in self.coef_dist_dict.keys():
self.coef_dist_dict[coef_name] = self.coef_dist_dict['default']
else:
warnings.warn(f"You provided a dictionary of coef_dist_dict, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the coef_dist_dict dictionary to use that as default value (e.g., coef_dist_dict['default'] = 'gaussian'); now using distribution='gaussian' since this is not supplied.")
self.coef_dist_dict[coef_name] = 'gaussian'

elif self.coef_dist_dict[coef_name] == 'gamma':
if not self.obs2prior_dict[coef_name]:
assert isinstance(self.prior_mean, dict) and coef_name in self.prior_mean.keys(), \
f"Prior mean for {coef_name} needs to be provided because it's posterior is estimated as a gamma distribution."
assert isinstance(self.prior_variance, dict) and coef_name in self.prior_variance.keys(), \
f"Prior variance for {coef_name} needs to be provided because it's posterior is estimated as a gamma distribution."

if isinstance(self.prior_mean, dict):
# the user didn't specify prior mean for this coefficient.
if coef_name not in self.prior_mean.keys():
Expand All @@ -345,7 +385,7 @@ def __init__(self,
if 'default' in self.prior_variance.keys():
self.prior_variance[coef_name] = self.prior_variance['default']
else:
warnings.warn(f"You provided a dictionary of prior variance, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the prior_variance dictionary to use that as default value (e.g., prior_variance['default'] = 0.3); now using variance=1.0 since this is not supplied.")
# warnings.warn(f"You provided a dictionary of prior variance, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the prior_variance dictionary to use that as default value (e.g., prior_variance['default'] = 0.3); now using variance=1.0 since this is not supplied.")
self.prior_variance[coef_name] = 1.0

s2 = self.prior_variance[coef_name] if isinstance(
Expand All @@ -359,6 +399,7 @@ def __init__(self,
if (not self.obs2prior_dict[coef_name]) and (H_zero_mask is not None):
raise ValueError(f'You specified H_zero_mask for {coef_name}, but obs2prior is False for this coefficient.')

print(coef_name)
coef_dict[coef_name] = BayesianCoefficient(variation=variation,
num_classes=variation_to_num_classes[variation],
obs2prior=self.obs2prior_dict[coef_name],
Expand All @@ -367,7 +408,8 @@ def __init__(self,
prior_mean=mean,
prior_variance=s2,
H_zero_mask=H_zero_mask,
is_H=False)
is_H=False,
distribution=self.coef_dist_dict[coef_name])
self.coef_dict = nn.ModuleDict(coef_dict)

# ==============================================================================================================
Expand All @@ -380,6 +422,10 @@ def __init__(self,
'Additional modules are temporarily disabled for further development.')
self.additional_modules = nn.ModuleList(additional_modules)

def clamp_coefs(self):
for coef_name in self.coef_dict.keys():
self.coef_dict[coef_name].clamp_params()

def __str__(self):
return f'Bayesian EMBedding Model with U[user, item, session] = {self.raw_formula}\n' \
+ f'Total number of parameters: {self.num_params}.\n' \
Expand Down Expand Up @@ -654,19 +700,31 @@ def sample_coefficient_dictionary(self, num_seeds: int, deterministic: bool = Fa
sample_dict = dict()
for coef_name, coef in self.coef_dict.items():
if deterministic:
sample_dict[coef_name] = coef.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
s = coef.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
# print(torch.min(s), torch.max(s))
# breakpoint()
# if coef.distribution == 'lognormal':
# s = torch.exp(s)
sample_dict[coef_name] = s
if coef.obs2prior:
sample_dict[coef_name + '.H'] = coef.prior_H.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
else:
s = coef.rsample(num_seeds)
if coef.obs2prior:
# sample both obs2prior weight and realization of variable.
assert isinstance(s, tuple) and len(s) == 2
sample_dict[coef_name] = s[0]
# if coef.distribution == 'lognormal':
if False:
ss = torch.exp(s[0])
else:
ss = s[0]
sample_dict[coef_name] = ss
sample_dict[coef_name + '.H'] = s[1]
else:
# only sample the realization of variable.
assert torch.is_tensor(s)
# if coef.distribution == 'lognormal':
# s = torch.exp(s)
sample_dict[coef_name] = s
return sample_dict

Expand Down Expand Up @@ -907,6 +965,7 @@ def reshape_observable(obs, name):
sample_dict[coef_name], coef_name)
assert coef_sample.shape == (R, P, I, 1)
additive_term = coef_sample.view(R, P, I)
additive_term *= term['sign']

# Type II: factorized coefficient, e.g., <theta_user, lambda_item>.
elif len(term['coefficient']) == 2 and term['observable'] is None:
Expand All @@ -922,6 +981,7 @@ def reshape_observable(obs, name):
R, P, I, positive_integer)

additive_term = (coef_sample_0 * coef_sample_1).sum(dim=-1)
additive_term *= term['sign']

# Type III: single coefficient multiplied by observable, e.g., theta_user * x_obs_item.
elif len(term['coefficient']) == 1 and term['observable'] is not None:
Expand All @@ -935,6 +995,7 @@ def reshape_observable(obs, name):
assert obs.shape == (R, P, I, positive_integer)

additive_term = (coef_sample * obs).sum(dim=-1)
additive_term *= term['sign']

# Type IV: factorized coefficient multiplied by observable.
# e.g., gamma_user * beta_item * price_obs.
Expand All @@ -961,10 +1022,13 @@ def reshape_observable(obs, name):
R, P, I, num_obs, latent_dim)
coef_sample_1 = coef_sample_1.view(
R, P, I, num_obs, latent_dim)
# coef_sample_0 = torch.exp(coef_sample_0)
# coef_sample_1 = torch.exp(coef_sample_1)
# compute the factorized coefficient with shape (R, P, I, O).
coef = (coef_sample_0 * coef_sample_1).sum(dim=-1)

additive_term = (coef * obs).sum(dim=-1)
additive_term *= term['sign']

else:
raise ValueError(f'Undefined term type: {term}')
Expand Down Expand Up @@ -1138,6 +1202,7 @@ def reshape_observable(obs, name):
sample_dict[coef_name], coef_name)
assert coef_sample.shape == (R, total_computation, 1)
additive_term = coef_sample.view(R, total_computation)
additive_term *= term['sign']

# Type II: factorized coefficient, e.g., <theta_user, lambda_item>.
elif len(term['coefficient']) == 2 and term['observable'] is None:
Expand All @@ -1153,6 +1218,7 @@ def reshape_observable(obs, name):
R, total_computation, positive_integer)

additive_term = (coef_sample_0 * coef_sample_1).sum(dim=-1)
additive_term *= term['sign']

# Type III: single coefficient multiplied by observable, e.g., theta_user * x_obs_item.
elif len(term['coefficient']) == 1 and term['observable'] is not None:
Expand All @@ -1167,6 +1233,7 @@ def reshape_observable(obs, name):
assert obs.shape == (R, total_computation, positive_integer)

additive_term = (coef_sample * obs).sum(dim=-1)
additive_term *= term['sign']

# Type IV: factorized coefficient multiplied by observable.
# e.g., gamma_user * beta_item * price_obs.
Expand Down Expand Up @@ -1196,6 +1263,7 @@ def reshape_observable(obs, name):
coef = (coef_sample_0 * coef_sample_1).sum(dim=-1)

additive_term = (coef * obs).sum(dim=-1)
additive_term *= term['sign']

else:
raise ValueError(f'Undefined term type: {term}')
Expand Down
Loading