gsbDBI · kanodiaayush · Oct 31, 2022 · Nov 2, 2022 · Nov 2, 2022 · Dec 5, 2022
diff --git a/bemb/model/__init__.py b/bemb/model/__init__.py
@@ -1,2 +1,3 @@
 from .bemb import *
+from .bemb_chunked import *
 from .bemb_flex_lightning import *
diff --git a/bemb/model/bayesian_coefficient.py b/bemb/model/bayesian_coefficient.py
diff --git a/bemb/model/bemb.py b/bemb/model/bemb.py
@@ -39,21 +39,26 @@ def parse_utility(utility_string: str) -> List[Dict[str, Union[List[str], None]]
     A helper function parse utility string into a list of additive terms.
 
     Example:
-        utility_string = 'lambda_item + theta_user * alpha_item + gamma_user * beta_item * price_obs'
+        utility_string = 'lambda_item + theta_user * alpha_item - gamma_user * beta_item * price_obs'
         output = [
             {
                 'coefficient': ['lambda_item'],
-                'observable': None
+                'observable': None,
+                'sign': 1.0,
+
             },
             {
                 'coefficient': ['theta_user', 'alpha_item'],
                 'observable': None
+                'sign': 1.0,
             },
             {
                 'coefficient': ['gamma_user', 'beta_item'],
                 'observable': 'price_obs'
+                'sign': -1.0,
             }
             ]
+         Note that 'minus' is allowed in the utility string. If the first term is negative, the minus should be without a space.
     """
     # split additive terms
     coefficient_suffix = ('_item', '_user', '_constant', '_category')
@@ -76,10 +81,16 @@ def is_coefficient(name: str) -> bool:
     def is_observable(name: str) -> bool:
         return any(name.startswith(prefix) for prefix in observable_prefix)
 
+    utility_string = utility_string.replace(' - ', ' + -')
     additive_terms = utility_string.split(' + ')
     additive_decomposition = list()
     for term in additive_terms:
-        atom = {'coefficient': [], 'observable': None}
+        if term.startswith('-'):
+            sign = -1.0
+            term = term[1:]
+        else:
+            sign = 1.0
+        atom = {'coefficient': [], 'observable': None, 'sign': sign}
         # split multiplicative terms.
         for x in term.split(' * '):
             assert not (is_observable(x) and is_coefficient(x)), f"The element {x} is ambiguous, it follows naming convention of both an observable and a coefficient."
@@ -113,6 +124,7 @@ def __init__(self,
                  num_items: int,
                  pred_item: bool,
                  num_classes: int = 2,
+                 coef_dist_dict: Dict[str, str] = {'default' : 'gaussian'},
                  H_zero_mask_dict: Optional[Dict[str, torch.BoolTensor]] = None,
                  prior_mean: Union[float, Dict[str, float]] = 0.0,
                  prior_variance: Union[float, Dict[str, float]] = 1.0,
@@ -140,6 +152,14 @@ def __init__(self,
                     lambda_item + theta_user * alpha_item + gamma_user * beta_item * price_obs
                 See the doc-string of parse_utility for an example.
 
+            coef_dist_dict (Dict[str, str]): a dictionary mapping coefficient name to coefficient distribution name.
+                The coefficient distribution name can be one of the following:
+                1. 'gaussian'
+                2. 'gamma' - obs2prior is not supported for gamma coefficients
+                If a coefficient does not appear in the dictionary, it will be assigned the distribution specified
+                by the 'default' key. By default, the default distribution is 'gaussian'.
+                For coefficients which have gamma distributions, prior mean and variance MUST be specified in the prior_mean and prior_variance arguments if obs2prior is False for this coefficient. If obs2prior is True, prior_variance is still required
+
             obs2prior_dict (Dict[str, bool]): a dictionary maps coefficient name (e.g., 'lambda_item')
                 to a boolean indicating if observable (e.g., item_obs) enters the prior of the coefficient.
 
@@ -184,6 +204,8 @@ def __init__(self,
                 If no `prior_mean['default']` is provided, the default prior mean will be 0.0 for those coefficients
                 not in the prior_mean.keys().
 
+                For coefficients with gamma distributions, prior_mean specifies the shape parameter of the gamma prior.
+
                 Defaults to 0.0.
 
             prior_variance (Union[float, Dict[str, float]], Dict[str, torch. Tensor]): the variance of prior distribution
@@ -203,6 +225,8 @@ def __init__(self,
                 If no `prior_variance['default']` is provided, the default prior variance will be 1.0 for those coefficients
                 not in the prior_variance.keys().
 
+                For coefficients with gamma distributions, prior_variance specifies the concentration parameter of the gamma prior.
+
                 Defaults to 1.0, which means all priors have identity matrix as the covariance matrix.
 
             num_users (int, optional): number of users, required only if coefficient or observable
@@ -233,6 +257,7 @@ def __init__(self,
         self.utility_formula = utility_formula
         self.obs2prior_dict = obs2prior_dict
         self.coef_dim_dict = coef_dim_dict
+        self.coef_dist_dict = coef_dist_dict
         if H_zero_mask_dict is not None:
             self.H_zero_mask_dict = H_zero_mask_dict
         else:
@@ -325,6 +350,21 @@ def __init__(self,
         for additive_term in self.formula:
             for coef_name in additive_term['coefficient']:
                 variation = coef_name.split('_')[-1]
+
+                if coef_name not in self.coef_dist_dict.keys():
+                    if 'default' in self.coef_dist_dict.keys():
+                        self.coef_dist_dict[coef_name] = self.coef_dist_dict['default']
+                    else:
+                        warnings.warn(f"You provided a dictionary of coef_dist_dict, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the coef_dist_dict dictionary to use that as default value (e.g., coef_dist_dict['default'] = 'gaussian'); now using distribution='gaussian' since this is not supplied.")
+                        self.coef_dist_dict[coef_name] = 'gaussian'
+
+                elif self.coef_dist_dict[coef_name] == 'gamma':
+                    if not self.obs2prior_dict[coef_name]:
+                        assert isinstance(self.prior_mean, dict) and coef_name in self.prior_mean.keys(), \
+                            f"Prior mean for {coef_name} needs to be provided because it's posterior is estimated as a gamma distribution."
+                        assert isinstance(self.prior_variance, dict) and coef_name in self.prior_variance.keys(), \
+                            f"Prior variance for {coef_name} needs to be provided because it's posterior is estimated as a gamma distribution."
+
                 if isinstance(self.prior_mean, dict):
                     # the user didn't specify prior mean for this coefficient.
                     if coef_name not in self.prior_mean.keys():
@@ -345,7 +385,7 @@ def __init__(self,
                         if 'default' in self.prior_variance.keys():
                             self.prior_variance[coef_name] = self.prior_variance['default']
                         else:
-                            warnings.warn(f"You provided a dictionary of prior variance, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the prior_variance dictionary to use that as default value (e.g., prior_variance['default'] = 0.3); now using variance=1.0 since this is not supplied.")
+                            # warnings.warn(f"You provided a dictionary of prior variance, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the prior_variance dictionary to use that as default value (e.g., prior_variance['default'] = 0.3); now using variance=1.0 since this is not supplied.")
                             self.prior_variance[coef_name] = 1.0
 
                 s2 = self.prior_variance[coef_name] if isinstance(
@@ -359,6 +399,7 @@ def __init__(self,
                 if (not self.obs2prior_dict[coef_name]) and (H_zero_mask is not None):
                     raise ValueError(f'You specified H_zero_mask for {coef_name}, but obs2prior is False for this coefficient.')
 
+                print(coef_name)
                 coef_dict[coef_name] = BayesianCoefficient(variation=variation,
                                                            num_classes=variation_to_num_classes[variation],
                                                            obs2prior=self.obs2prior_dict[coef_name],
@@ -367,7 +408,8 @@ def __init__(self,
                                                            prior_mean=mean,
                                                            prior_variance=s2,
                                                            H_zero_mask=H_zero_mask,
-                                                           is_H=False)
+                                                           is_H=False,
+                                                           distribution=self.coef_dist_dict[coef_name])
         self.coef_dict = nn.ModuleDict(coef_dict)
 
         # ==============================================================================================================
@@ -380,6 +422,10 @@ def __init__(self,
                 'Additional modules are temporarily disabled for further development.')
             self.additional_modules = nn.ModuleList(additional_modules)
 
+    def clamp_coefs(self):
+        for coef_name in self.coef_dict.keys():
+            self.coef_dict[coef_name].clamp_params()
+
     def __str__(self):
         return f'Bayesian EMBedding Model with U[user, item, session] = {self.raw_formula}\n' \
                + f'Total number of parameters: {self.num_params}.\n' \
@@ -654,19 +700,31 @@ def sample_coefficient_dictionary(self, num_seeds: int, deterministic: bool = Fa
         sample_dict = dict()
         for coef_name, coef in self.coef_dict.items():
             if deterministic:
-                sample_dict[coef_name] = coef.variational_distribution.mean.unsqueeze(dim=0)  # (1, num_*, dim)
+                s = coef.variational_distribution.mean.unsqueeze(dim=0)  # (1, num_*, dim)
+                # print(torch.min(s), torch.max(s))
+                # breakpoint()
+                # if coef.distribution == 'lognormal':
+                #     s = torch.exp(s)
+                sample_dict[coef_name] = s
                 if coef.obs2prior:
                     sample_dict[coef_name + '.H'] = coef.prior_H.variational_distribution.mean.unsqueeze(dim=0)  # (1, num_*, dim)
             else:
                 s = coef.rsample(num_seeds)
                 if coef.obs2prior:
                     # sample both obs2prior weight and realization of variable.
                     assert isinstance(s, tuple) and len(s) == 2
-                    sample_dict[coef_name] = s[0]
+                    # if coef.distribution == 'lognormal':
+                    if False:
+                        ss = torch.exp(s[0])
+                    else:
+                        ss = s[0]
+                    sample_dict[coef_name] = ss
                     sample_dict[coef_name + '.H'] = s[1]
                 else:
                     # only sample the realization of variable.
                     assert torch.is_tensor(s)
+                    # if coef.distribution == 'lognormal':
+                    #     s = torch.exp(s)
                     sample_dict[coef_name] = s
         return sample_dict
 
@@ -907,6 +965,7 @@ def reshape_observable(obs, name):
                     sample_dict[coef_name], coef_name)
                 assert coef_sample.shape == (R, P, I, 1)
                 additive_term = coef_sample.view(R, P, I)
+                additive_term *= term['sign']
 
             # Type II: factorized coefficient, e.g., <theta_user, lambda_item>.
             elif len(term['coefficient']) == 2 and term['observable'] is None:
@@ -922,6 +981,7 @@ def reshape_observable(obs, name):
                     R, P, I, positive_integer)
 
                 additive_term = (coef_sample_0 * coef_sample_1).sum(dim=-1)
+                additive_term *= term['sign']
 
             # Type III: single coefficient multiplied by observable, e.g., theta_user * x_obs_item.
             elif len(term['coefficient']) == 1 and term['observable'] is not None:
@@ -935,6 +995,7 @@ def reshape_observable(obs, name):
                 assert obs.shape == (R, P, I, positive_integer)
 
                 additive_term = (coef_sample * obs).sum(dim=-1)
+                additive_term *= term['sign']
 
             # Type IV: factorized coefficient multiplied by observable.
             # e.g., gamma_user * beta_item * price_obs.
@@ -961,10 +1022,13 @@ def reshape_observable(obs, name):
                     R, P, I, num_obs, latent_dim)
                 coef_sample_1 = coef_sample_1.view(
                     R, P, I, num_obs, latent_dim)
+                # coef_sample_0 = torch.exp(coef_sample_0)
+                # coef_sample_1 = torch.exp(coef_sample_1)
                 # compute the factorized coefficient with shape (R, P, I, O).
                 coef = (coef_sample_0 * coef_sample_1).sum(dim=-1)
 
                 additive_term = (coef * obs).sum(dim=-1)
+                additive_term *= term['sign']
 
             else:
                 raise ValueError(f'Undefined term type: {term}')
@@ -1138,6 +1202,7 @@ def reshape_observable(obs, name):
                     sample_dict[coef_name], coef_name)
                 assert coef_sample.shape == (R, total_computation, 1)
                 additive_term = coef_sample.view(R, total_computation)
+                additive_term *= term['sign']
 
             # Type II: factorized coefficient, e.g., <theta_user, lambda_item>.
             elif len(term['coefficient']) == 2 and term['observable'] is None:
@@ -1153,6 +1218,7 @@ def reshape_observable(obs, name):
                     R, total_computation, positive_integer)
 
                 additive_term = (coef_sample_0 * coef_sample_1).sum(dim=-1)
+                additive_term *= term['sign']
 
             # Type III: single coefficient multiplied by observable, e.g., theta_user * x_obs_item.
             elif len(term['coefficient']) == 1 and term['observable'] is not None:
@@ -1167,6 +1233,7 @@ def reshape_observable(obs, name):
                 assert obs.shape == (R, total_computation, positive_integer)
 
                 additive_term = (coef_sample * obs).sum(dim=-1)
+                additive_term *= term['sign']
 
             # Type IV: factorized coefficient multiplied by observable.
             # e.g., gamma_user * beta_item * price_obs.
@@ -1196,6 +1263,7 @@ def reshape_observable(obs, name):
                 coef = (coef_sample_0 * coef_sample_1).sum(dim=-1)
 
                 additive_term = (coef * obs).sum(dim=-1)
+                additive_term *= term['sign']
 
             else:
                 raise ValueError(f'Undefined term type: {term}')