diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 000000000..5008ddfcf Binary files /dev/null and b/.DS_Store differ diff --git a/dist/selection-0+untagged.2432.gfe15626-py2.7-macosx-10.6-x86_64.egg b/dist/selection-0+untagged.2432.gfe15626-py2.7-macosx-10.6-x86_64.egg new file mode 100644 index 000000000..ff55d1a9f Binary files /dev/null and b/dist/selection-0+untagged.2432.gfe15626-py2.7-macosx-10.6-x86_64.egg differ diff --git a/dist/selection-0+untagged.2432.gfe15626-py3.7-macosx-10.7-x86_64.egg b/dist/selection-0+untagged.2432.gfe15626-py3.7-macosx-10.7-x86_64.egg new file mode 100644 index 000000000..0af51405a Binary files /dev/null and b/dist/selection-0+untagged.2432.gfe15626-py3.7-macosx-10.7-x86_64.egg differ diff --git a/dist/selection-0+untagged.2432.gfe15626.dirty-py3.7-macosx-10.7-x86_64.egg b/dist/selection-0+untagged.2432.gfe15626.dirty-py3.7-macosx-10.7-x86_64.egg new file mode 100644 index 000000000..fc19df296 Binary files /dev/null and b/dist/selection-0+untagged.2432.gfe15626.dirty-py3.7-macosx-10.7-x86_64.egg differ diff --git a/selection.egg-info/PKG-INFO b/selection.egg-info/PKG-INFO new file mode 100644 index 000000000..d619aaca9 --- /dev/null +++ b/selection.egg-info/PKG-INFO @@ -0,0 +1,49 @@ +Metadata-Version: 2.1 +Name: selection +Version: 0+untagged.2485.g1019ee0.dirty +Summary: Testing a fixed value of lambda +Home-page: http://github.org/jonathan.taylor/fixed_lambda +Author: fixed_lambda developers +Author-email: +Maintainer: Jonathan Taylor +Maintainer-email: +License: BSD license +Description: # The selection project + + This project contains software for selective inference, with + emphasis on selective inference in regression. + + Some key references: + + * `A significance test for the lasso`: http://arxiv.org/abs/1301.7161 + * `Tests in adaptive regression via the Kac-Rice formula`: http://arxiv.org/abs/1308.3020 + * `Post-selection adaptive inference for Least Angle Regression and the Lasso`: http://arxiv.org/abs/1401.3889 + * `Exact post-selection inference with the lasso`: http://arxiv.org/abs/1311.6238 + * `Exact Post Model Selection Inference for Marginal Screening`: http://arxiv.org/abs/1402.5596 + + Install + ------- + + + ``` + git submodule init # travis_tools and C-software + git submodule update + pip install -r requirements.txt + python setup.py install + ``` + +Platform: OS Independent +Classifier: Development Status :: 3 - Alpha +Classifier: Environment :: Console +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Topic :: Scientific/Engineering +Requires: numpy (>=1.7.1) +Requires: scipy (>=0.9) +Requires: mpmath (>=0.18) +Requires: pyinter +Provides: fixed_lambda +Provides-Extra: test +Provides-Extra: doc diff --git a/selection.egg-info/SOURCES.txt b/selection.egg-info/SOURCES.txt new file mode 100644 index 000000000..cfdc98eab --- /dev/null +++ b/selection.egg-info/SOURCES.txt @@ -0,0 +1,186 @@ +LICENSE +MANIFEST.in +README.md +TODO +cythexts.py +setup.cfg +setup.py +setup_helpers.py +setup_helpers.pyc +versioneer.py +C-software/src/debias.h +C-software/src/matrixcomps.h +C-software/src/quadratic_program_wide.c +C-software/src/randomized_lasso.h +C-software/src/selective_mle.c +C-software/src/selective_mle.h +doc/Makefile +doc/examples/compute_coverages.rst +doc/examples/conditional_sampling.py +doc/examples/hiv_approx_ci.py +doc/examples/power_comparison.py +doc/notebooks/UMPU.ipynb +doc/notebooks/covtest.ipynb +doc/notebooks/isotonic.ipynb +doc/notebooks/lasso.ipynb +doc/notebooks/pca_rank1.ipynb +doc/notebooks/quadratic_decisions.ipynb +doc/notebooks/reduced_covtest.ipynb +doc/notebooks/screening.ipynb +doc/notebooks/selection_objects.ipynb +doc/notebooks/spacings.ipynb +doc/source/conf.py +doc/source/docattribute.rst +doc/source/documentation.rst +doc/source/download.rst +doc/source/index.rst +doc/source/links_names.txt +doc/source/spacings.rst +doc/source/_static/logo.png +doc/source/_static/selection.css +doc/source/_templates/layout.html +doc/source/spacings_files/spacings_23_0.png +doc/source/spacings_files/spacings_25_0.png +doc/source/spacings_files/spacings_27_0.png +doc/source/spacings_files/spacings_29_0.png +doc/source/spacings_files/spacings_31_0.png +doc/source/spacings_files/spacings_3_0.png +doc/source/spacings_files/spacings_4_0.png +doc/source/spacings_files/spacings_5_0.png +doc/source/spacings_files/spacings_6_0.png +doc/source/spacings_files/spacings_7_0.png +doc/source/spacings_files/spacings_9_0.png +doc/source/sphinxext/math_dollar.py +selection/__init__.py +selection/_version.py +selection/api.py +selection/base.py +selection/glm.py +selection/info.py +selection.egg-info/PKG-INFO +selection.egg-info/SOURCES.txt +selection.egg-info/dependency_links.txt +selection.egg-info/not-zip-safe +selection.egg-info/requires.txt +selection.egg-info/top_level.txt +selection/algorithms/__init__.py +selection/algorithms/api.py +selection/algorithms/change_point.py +selection/algorithms/covtest.py +selection/algorithms/cv.py +selection/algorithms/cv_glmnet.py +selection/algorithms/debiased_lasso.py +selection/algorithms/debiased_lasso_utils.pyx +selection/algorithms/forward_step.py +selection/algorithms/lasso.py +selection/algorithms/pca.py +selection/algorithms/screening.py +selection/algorithms/softmax.py +selection/algorithms/sqrt_lasso.py +selection/algorithms/stopping_rules.py +selection/algorithms/tests/__init__.py +selection/algorithms/tests/test_IC.py +selection/algorithms/tests/test_ROSI.py +selection/algorithms/tests/test_change_point.py +selection/algorithms/tests/test_compareR.py +selection/algorithms/tests/test_covtest.py +selection/algorithms/tests/test_data_carving.py +selection/algorithms/tests/test_debiased_lasso.py +selection/algorithms/tests/test_forward_step.py +selection/algorithms/tests/test_lasso.py +selection/algorithms/tests/test_screening.py +selection/algorithms/tests/test_softmax.py +selection/algorithms/tests/test_sqrt_lasso.py +selection/constraints/__init__.py +selection/constraints/affine.py +selection/constraints/api.py +selection/constraints/base.py +selection/constraints/estimation.py +selection/constraints/intervals.py +selection/constraints/quadratic.py +selection/constraints/quasi_affine.py +selection/constraints/tests/__init__.py +selection/constraints/tests/test_affine.py +selection/constraints/tests/test_estimation.py +selection/constraints/tests/test_quadratic_tests.py +selection/constraints/tests/test_quasi.py +selection/constraints/tests/test_unknown_sigma.py +selection/distributions/__init__.py +selection/distributions/api.py +selection/distributions/chain.py +selection/distributions/chisq.py +selection/distributions/discrete_family.py +selection/distributions/discrete_multiparameter.py +selection/distributions/intervals.py +selection/distributions/pvalue.py +selection/distributions/tests/__init__.py +selection/distributions/tests/test_chains.py +selection/distributions/tests/test_discreteExFam.py +selection/distributions/tests/test_multiparameter.py +selection/randomized/__init__.py +selection/randomized/api.py +selection/randomized/cv_view.py +selection/randomized/gen_lasso.py +selection/randomized/gen_lasso_utils.py +selection/randomized/lasso.py +selection/randomized/modelQ.py +selection/randomized/qp.py +selection/randomized/query.py +selection/randomized/randomization.py +selection/randomized/screening.py +selection/randomized/selective_MLE_utils.pyx +selection/randomized/slope.py +selection/randomized/tests/__init__.py +selection/randomized/tests/test_BH.py +selection/randomized/tests/test_genlasso.py +selection/randomized/tests/test_lasso.py +selection/randomized/tests/test_marginal_screening.py +selection/randomized/tests/test_modelQ.py +selection/randomized/tests/test_multiple_queries.py +selection/randomized/tests/test_naive.py +selection/randomized/tests/test_qp.py +selection/randomized/tests/test_randomization.py +selection/randomized/tests/test_selective_MLE.py +selection/randomized/tests/test_selective_MLE_high.py +selection/randomized/tests/test_selective_MLE_onedim.py +selection/randomized/tests/test_slope.py +selection/randomized/tests/test_slope_subgrad.py +selection/randomized/tests/test_topK.py +selection/sampling/__init__.py +selection/sampling/api.py +selection/sampling/langevin.py +selection/sampling/sequential.py +selection/sampling/sqrt_lasso.pyx +selection/sampling/truncnorm.pyx +selection/sampling/truncnorm_quadratic.pyx +selection/sampling/tests/__init__.py +selection/sampling/tests/plots_fs.py +selection/sampling/tests/test_fstep_langevin.py +selection/sampling/tests/test_kfstep.py +selection/sampling/tests/test_pca_langevin.py +selection/sampling/tests/test_sample_sphere.py +selection/sampling/tests/test_sequential.py +selection/tests/__init__.py +selection/tests/decorators.py +selection/tests/flags.py +selection/tests/instance.py +selection/tests/test_instance.py +selection/tests/tests.py +selection/truncated/F.py +selection/truncated/T.py +selection/truncated/__init__.py +selection/truncated/api.py +selection/truncated/base.py +selection/truncated/chi.py +selection/truncated/gaussian.py +selection/truncated/tests/__init__.py +selection/truncated/tests/test_truncated.py +selection/truncated/tests/test_truncatedFT.py +selection/utils/__init__.py +selection/utils/tools.py +tools/apigen.py +tools/build_modref_templates.py +tools/gitwash_dumper.py +tools/nbtools.py +tools/noseall_with_coverage +tools/strip_notebook.py \ No newline at end of file diff --git a/selection.egg-info/dependency_links.txt b/selection.egg-info/dependency_links.txt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/selection.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/selection.egg-info/not-zip-safe b/selection.egg-info/not-zip-safe new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/selection.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/selection.egg-info/requires.txt b/selection.egg-info/requires.txt new file mode 100644 index 000000000..eac3fc62b --- /dev/null +++ b/selection.egg-info/requires.txt @@ -0,0 +1,7 @@ +Cython>=0.21 + +[doc] +Sphinx>=1.0 + +[test] +nose>=0.10.1 diff --git a/selection.egg-info/top_level.txt b/selection.egg-info/top_level.txt new file mode 100644 index 000000000..8be36ee44 --- /dev/null +++ b/selection.egg-info/top_level.txt @@ -0,0 +1 @@ +selection diff --git a/selection/constraints/affine.py b/selection/constraints/affine.py index 94e7ceeff..c9eac7975 100644 --- a/selection/constraints/affine.py +++ b/selection/constraints/affine.py @@ -452,9 +452,30 @@ def whiten(self): """ sqrt_cov, sqrt_inv = self.covariance_factors()[:2] + # if np.isnan(sqrt_cov).any(): + # print("sqrt cov has nan:", sqrt_cov) + # if (sqrt_cov==0).any(): + # print("sqrt cov has 0:", sqrt_cov) + # if np.isnan(sqrt_inv).any(): + # print("sqrt inv has nan:", sqrt_inv) + # if (sqrt_inv==0).any(): + # print("sqrt inv has 0:", sqrt_inv) + # print("linear part", self.linear_part) + new_A = self.linear_part.dot(sqrt_cov) + # if np.isnan(new_A).any(): + # print("new_A has nan:", new_A) + # if (new_A==0).any(): + # print("new_A has 0:", new_A) den = np.sqrt((new_A**2).sum(1)) + # if np.isnan(den).any(): + # print("den has nan:", den) + # if (den==0).any(): + # print("den has 0:", den) + # print("sqrt cov col sums:", sqrt_cov.sum(1)) new_b = self.offset - self.linear_part.dot(self.mean) + # if np.isnan(new_b).any(): + # print("new_b has nan:", new_b) new_con = constraints(new_A / den[:,None], new_b / den) mu = self.mean.copy() diff --git a/selection/randomized/.DS_Store b/selection/randomized/.DS_Store new file mode 100644 index 000000000..5008ddfcf Binary files /dev/null and b/selection/randomized/.DS_Store differ diff --git a/selection/randomized/gen_lasso.py b/selection/randomized/gen_lasso.py new file mode 100644 index 000000000..13a3de822 --- /dev/null +++ b/selection/randomized/gen_lasso.py @@ -0,0 +1,971 @@ +from __future__ import print_function +import functools +from copy import copy + +import numpy as np +from scipy.stats import norm as ndist, t as tdist + +# import functools +# from copy import copy + +# import numpy as np +# from scipy.stats import norm as ndist + +import regreg.api as rr +import regreg.affine as ra + +from .qp import qp_problem + +from ..constraints.affine import constraints +from ..algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda + +from .query import (gaussian_query, + affine_gaussian_sampler) + +from .reconstruction import reconstruct_opt +from .randomization import randomization +from ..base import restricted_estimator +from ..glm import (pairs_bootstrap_glm, + glm_nonparametric_bootstrap, + glm_parametric_covariance) +from ..algorithms.debiased_lasso import debiasing_matrix + +from .gen_lasso_utils import find_trendfiltering_nspaceb, find_fusedlasso_nspaceb, find_nspaceb_identity, find_nullspace_basis, create_penalty_matrix + +#### High dimensional version +#### - parametric covariance +#### - Gaussian randomization + +class gen_lasso(gaussian_query): + r""" + A class for the randomized generalized LASSO for post-selection inference. + The problem solved is + + .. math:: + + \text{minimize}_{\beta} \ell(\beta) + + \sum_{i=1}^p \lambda_i |D_i\beta| - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 + + where $\lambda$ is `lam`, $D$ is the penalty matrix, $\omega$ is a + randomization generated below and the last term is a small ridge + penalty. Each static method forms $\ell$ as well as the $\ell_1$ + penalty. The generic class forms the remaining two terms in the + objective. + + """ + + def __init__(self, + loglike, + feature_weights, + ridge_term, + randomizer, + perturb=None, + penalty_param=None, + penalty_type=None, + fused_dims=None): + r""" + Create a new post-selection object for the LASSO problem + + Parameters + ---------- + + loglike : `regreg.smooth.glm.glm` + A (negative) log-likelihood as implemented in `regreg`. + + feature_weights : float + $\lambda$ value for L-1 penalty. + + penalty_param : [np.ndarray, int] + Either the penalty matrix for L-1 penalty (the D in |D\beta|_1), + or if penalty_type is provided, this parametrizes the construction + of the penalty matrix D (k-D fused or k-th order polynomial + trend-filtering), or is a custom penalty matrix of the specified + type in penalty_matrix. If None, defaults to 1D fused lasso, if + penalty_type='fused' or linear trend-filtering if + penalty_type='trendfiltering'. At least one of penalty_param and + penalty_type must not be None. + + ridge_term : float + How big a ridge term to add? + + randomizer : object + Randomizer -- contains representation of randomization density. + + perturb : np.ndarray + Random perturbation subtracted as a linear + term in the objective function. + + penalty_type : [str, np.ndarray] + If None (default), a general solver is used, or a string in + ["lasso", "fused","trendfiltering"], in which case an optimized + solver is used. At least one of penalty_param and + penalty_type must not be None. + + fused_dims : integer sequence + Can't be None if penalty_matrix is "fused" and k is an int > 1, + must be a sequence of length k specifying the dimensions of the grid. + """ + + self.loglike = loglike + self.nfeature = p = self.loglike.shape[0] + + # if np.asarray(feature_weights).shape == (): + # feature_weights = np.ones(loglike.shape) * feature_weights + # else: + if np.asarray(feature_weights).shape != (): + raise ValueError("'feature_weights' must be a scalar") + self.feature_weights = feature_weights # np.asarray(feature_weights) + + self.ridge_term = ridge_term + + if penalty_param is None and penalty_type is None: + raise ValueError("'penalty_param' and 'penalty_type' cannot both be None") + if type(penalty_param) is np.ndarray: + self.D = penalty_param + elif type(penalty_type) is str: + if penalty_param is None or type(penalty_param) is int: + self.D = create_penalty_matrix(penalty_type,p,k=penalty_param,fused_dims=fused_dims) + else: + raise ValueError("'penalty_param' must be an int or None if 'penalty_type' is not None") + else: + raise ValueError("'penalty_param' must be a numpy array or 'penalty_type' must be a string") + + self.structure = penalty_type + + + self.penalty = rr.l1norm(self.D.shape[0],lagrange=feature_weights) + self._initial_omega = perturb # random perturbation + + self.randomizer = randomizer + + def fit(self, + use_admm=True, + use_closed_form=True, + solve_args={'tol': 1.e-12, 'min_its': 50}, + perturb=None): + """ + Fit the randomized lasso using `regreg`. + Parameters + ---------- + solve_args : keyword args + Passed to `regreg.problems.simple_problem.solve`. + Returns + ------- + signs : np.float + Support and non-zero signs of randomized lasso solution. + + """ + + p = self.nfeature + + # take a new perturbation if supplied + if perturb is not None: + self._initial_omega = perturb + if self._initial_omega is None: + self._initial_omega = self.randomizer.sample() + + quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0) + + X, y = self.loglike.data + if use_admm: + admm_quad = (X.T.dot(X) + self.ridge_term * np.eye(p)) if use_closed_form else None + problem = rr.admm_problem(self.loglike, + self.penalty, + self.D, + 0.5, + X.T.dot(y), + self._initial_omega, + admm_quad) + # problem = rr.admm_problem(self.loglike, + # self.penalty, + # self.D, + # quadratic=self.loglike.quadratic, + # rho_quadratic=rho_quadratic) + problem.solve(niter=25) + # problem.solve(quadratic=quad, niter=250) + self.initial_soln = problem.loss_coefs # \beta + self.initial_penalty_soln = problem.atom_coefs # D\beta + else: + problem = qp_problem(X, y, self.D, + ridge_term=self.ridge_term, + lam=self.feature_weights, + randomization=self._initial_omega) + # problem.solve() + self.exact_initial_soln = self.initial_soln = problem.solve() # \beta + self.exact_initial_penalty_soln = self.initial_penalty_soln = self.D.dot(self.initial_soln) # D\beta + self.initial_penalty_soln = self.initial_penalty_soln * (np.fabs(self.initial_penalty_soln) > 1e-6*np.max(X)) + + active_signs = self.active_signs = np.sign(self.initial_penalty_soln) + active = self._active = active_signs != 0 # used for finding nullspace basis + + coef_active_signs = np.sign(self.initial_soln) + coef_active_variables = coef_active_signs != 0 + self.selection_variable = {'sign': active_signs, + 'variables': active} + + # initial state for opt variables + initial_subgrad = -(self.loglike.smooth_objective(self.initial_soln, 'grad') + + quad.objective(self.initial_soln, 'grad')) + self.initial_subgrad = initial_subgrad + + if self.structure == "fused": + nsb = find_fusedlasso_nspaceb(self.D, self._active) + elif self.structure == "trendfiltering": + nsb = find_trendfiltering_nspaceb(self.D, self._active) + elif self.structure == "lasso": + nsb = find_nspaceb_identity(self._active) + else: + nsb = find_nullspace_basis(self.D, self._active) + + self.nsb = nsb + + if active.sum() > 0: + nsb_pinv = np.linalg.inv(nsb.T.dot(nsb)).dot(nsb.T) + self.initial_alpha = nsb_pinv.dot(self.initial_soln) # \beta = nsb\alpha => (nsb^Tnsb)^{-1}nsb^T\beta = \alpha + self.observed_opt_state = self.initial_alpha + else: + self.observed_opt_state = self.initial_alpha = self.initial_soln + + self.num_opt_var = self.observed_opt_state.shape[0] ###### opt_var is now alpha + + W = self._W = self.loglike.saturated_loss.hessian(X.dot(self.initial_soln)) + _hessian = np.dot(X.T,X*W[:,None]) + + _score_linear_term = -_hessian + + # set the observed score (data dependent) state + self.observed_score_state = self.loglike.smooth_objective(self.initial_soln, 'grad') + _score_linear_term.dot(self.initial_soln) + + _opt_linear_term = (_hessian + self.ridge_term * np.eye(p)).dot(nsb) + + self.opt_transform = (_opt_linear_term, self.initial_subgrad) + self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0])) + + # now store everything needed for the projections + # the projection acts only on the optimization + # variables + + self._setup = True + self.ndim = self.loglike.shape[0] + + if False:#active.sum() == 0: + self.sampler = None + else: + # compute implied mean and covariance + + _, prec = self.randomizer.cov_prec + opt_linear, opt_offset = self.opt_transform + A = _hessian + self.ridge_term * np.eye(p) + if active.sum() > 0: + A = A.dot(nsb) + + if np.asarray(prec).shape in [(), (0,)]: + cond_precision = A.T.dot(A) * prec + cond_cov = np.linalg.inv(cond_precision) + logdens_linear = cond_cov.dot(A.T) * prec + else: + cond_precision = A.T.dot(prec.dot(A)) + cond_cov = np.linalg.inv(cond_precision) + logdens_linear = cond_cov.dot(A.T).dot(prec) + + cond_mean = - logdens_linear.dot(self._initial_omega - (_hessian + self.ridge_term * np.eye(p)).dot(self.initial_soln)) + + # density as a function of score and optimization variables + + def log_density(logdens_linear, offset, cond_prec, score, opt): + if score.ndim == 1: + mean_term = logdens_linear.dot(score.T + offset).T + else: + mean_term = logdens_linear.dot(score.T + offset[:, None]).T + arg = opt + mean_term + return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1) + + log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision) + + + # constrain |D\beta| = 0 + if active.sum() == 0: + A_scaling = np.vstack((self.D, -self.D)) + b_scaling = np.ones(2*self.D.shape[0]) * 1e-8 * np.max(X) ## tolerance since QP soln is not exact + # constrain -sign(D\hat\beta)D\Gamma\aplha <= 0 + else: + A_scaling = -np.diag(active_signs[active]).dot(self.D[active,:].dot(nsb)) + b_scaling = np.zeros(active.sum()) + + affine_con = constraints(A_scaling, + b_scaling, + mean=cond_mean, + covariance=cond_cov) + + + logdens_transform = (logdens_linear, opt_offset) + + self.sampler = affine_gaussian_sampler(affine_con, + self.observed_opt_state, + self.observed_score_state, + log_density, + logdens_transform, + selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on + + return coef_active_signs, active_signs # if active.sum() > 0 else 2*np.ones_like(active_signs) + + def summary(self, + observed_target, + cov_target, + cov_target_score, + alternatives, + opt_sample=None, + parameter=None, + level=0.9, + ndraw=10000, + burnin=2000, + compute_intervals=False): + """ + Produce p-values and confidence intervals for targets + of model including selected features + Parameters + ---------- + target : one of ['selected', 'full'] + features : np.bool + Binary encoding of which features to use in final + model and targets. + parameter : np.array + Hypothesized value for parameter -- defaults to 0. + level : float + Confidence level. + ndraw : int (optional) + Defaults to 1000. + burnin : int (optional) + Defaults to 1000. + compute_intervals : bool + Compute confidence intervals? + dispersion : float (optional) + Use a known value for dispersion, or Pearson's X^2? + """ + + + if parameter is None: + parameter = np.zeros_like(observed_target) + + ## handle case where active set is empty + if self.structure != "fused" and self._active.sum() == 0: + (ind_unbiased_estimator, + cov_unbiased_estimator, + unbiased_Z_scores, + unbiased_pvalues, + unbiased_intervals) = self.selective_MLE(observed_target, + cov_target, + cov_target_score, + level=level)[5:] + + sd_target = np.sqrt(np.diag(cov_unbiased_estimator)) + pivots = ndist.cdf((ind_unbiased_estimator - parameter)/sd_target) + + if not np.all(parameter == 0): + pvalues = ndist.cdf((ind_unbiased_estimator - np.zeros_like(parameter))/sd_target) + else: + pvalues = pivots + + if alternatives == 'twosided': + pvalues = 2 * np.minimum(pvalues, 1-pvalues) + if alternatives == 'greater': + pvalues = 1 - pvalues + + + intervals = None + if compute_intervals: + _, y = self.loglike.data + n = y.shape[0] + alpha = 1 - level + z = ndist.ppf(1 - alpha/2) + # t = tdist.ppf(1 - alpha/2, n - ind_unbiased_estimator.shape[0]) + lower_limit = ind_unbiased_estimator - z*sd_target + upper_limit = ind_unbiased_estimator + z*sd_target + + lower_limit = np.expand_dims(lower_limit, 1) + upper_limit = np.expand_dims(upper_limit, 1) + intervals = np.hstack((lower_limit, upper_limit)) + else: + if opt_sample is None: + opt_sample = self.sampler.sample(ndraw, burnin) + else: + ndraw = opt_sample.shape[0] + + pivots = self.sampler.coefficient_pvalues(observed_target, + cov_target, + cov_target_score, + parameter=parameter, + sample=opt_sample, + alternatives=alternatives) + + if not np.all(parameter == 0): + pvalues = self.sampler.coefficient_pvalues(observed_target, + cov_target, + cov_target_score, + parameter=np.zeros_like(parameter), + sample=opt_sample, + alternatives=alternatives) + else: + pvalues = pivots + + intervals = None + if compute_intervals: + + # MLE_intervals = self.selective_MLE(observed_target, + # cov_target, + # cov_target_score, + # level=level)[4] + + intervals = self.sampler.confidence_intervals(observed_target, + cov_target, + cov_target_score, + sample=opt_sample, + # initial_guess=MLE_intervals, + level=level) + + return pivots, pvalues, intervals + + @staticmethod + def gaussian(X, + Y, + feature_weights, + penalty_param=None, + penalty_type=None, + sigma=1., + quadratic=None, + ridge_term=None, + randomizer_scale=None): + r""" + Squared-error LASSO with feature weights. + Objective function is (before randomization) + + $$ + \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i| + $$ + + where $\lambda$ is `feature_weights`. The ridge term + is determined by the Hessian and `np.std(Y)` by default, + as is the randomizer scale. + Parameters + ---------- + X : ndarray + Shape (n,p) -- the design matrix. + Y : ndarray + Shape (n,) -- the response. + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + sigma : float (optional) + Noise variance. Set to 1 if `covariance_estimator` is not None. + This scales the loglikelihood by `sigma**(-2)`. + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + ridge_term : float + How big a ridge term to add? + randomizer_scale : float + Scale for IID components of randomizer. + randomizer : str + One of ['laplace', 'logistic', 'gaussian'] + Returns + ------- + L : `selection.randomized.convenience.lasso` + + """ + + loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic) + n, p = X.shape + + mean_diag = np.mean((X ** 2).sum(0)) + if ridge_term is None: + ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) + + if randomizer_scale is None: + randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) + + randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) + + return gen_lasso(loglike, + np.asarray(feature_weights) / sigma ** 2, + ridge_term, randomizer, + penalty_param=penalty_param, + penalty_type=penalty_type) + + @staticmethod + def logistic(X, + successes, + feature_weights, + trials=None, + quadratic=None, + ridge_term=None, + randomizer_scale=None): + r""" + Logistic LASSO with feature weights (before randomization) + $$ + \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + $$ + where $\ell$ is the negative of the logistic + log-likelihood (half the logistic deviance) + and $\lambda$ is `feature_weights`. + Parameters + ---------- + X : ndarray + Shape (n,p) -- the design matrix. + successes : ndarray + Shape (n,) -- response vector. An integer number of successes. + For data that is proportions, multiply the proportions + by the number of trials first. + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + trials : ndarray (optional) + Number of trials per response, defaults to + ones the same shape as Y. + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + ridge_term : float + How big a ridge term to add? + randomizer_scale : float + Scale for IID components of randomizer. + randomizer : str + One of ['laplace', 'logistic', 'gaussian'] + Returns + ------- + L : `selection.randomized.convenience.lasso` + + """ + n, p = X.shape + + loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic) + + mean_diag = np.mean((X ** 2).sum(0)) + + if ridge_term is None: + ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) + + if randomizer_scale is None: + randomizer_scale = np.sqrt(mean_diag) * 0.5 + + randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) + + return lasso(loglike, + np.asarray(feature_weights), + ridge_term, randomizer) + + @staticmethod + def coxph(X, + times, + status, + feature_weights, + quadratic=None, + ridge_term=None, + randomizer_scale=None): + r""" + Cox proportional hazards LASSO with feature weights. + Objective function is (before randomization) + + $$ + \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + $$ + where $\ell^{\text{Cox}}$ is the + negative of the log of the Cox partial + likelihood and $\lambda$ is `feature_weights`. + Uses Efron's tie breaking method. + Parameters + ---------- + X : ndarray + Shape (n,p) -- the design matrix. + times : ndarray + Shape (n,) -- the survival times. + status : ndarray + Shape (n,) -- the censoring status. + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + covariance_estimator : optional + If None, use the parameteric + covariance estimate of the selected model. + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + ridge_term : float + How big a ridge term to add? + randomizer_scale : float + Scale for IID components of randomizer. + randomizer : str + One of ['laplace', 'logistic', 'gaussian'] + Returns + ------- + L : `selection.randomized.convenience.lasso` + + """ + loglike = coxph_obj(X, times, status, quadratic=quadratic) + + # scale for randomization seems kind of meaningless here... + + mean_diag = np.mean((X ** 2).sum(0)) + + if ridge_term is None: + ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1) + + if randomizer_scale is None: + randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) + + randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) + + return lasso(loglike, + feature_weights, + ridge_term, + randomizer) + + @staticmethod + def poisson(X, + counts, + feature_weights, + quadratic=None, + ridge_term=None, + randomizer_scale=None): + r""" + Poisson log-linear LASSO with feature weights. + Objective function is (before randomization) + + $$ + \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + $$ + where $\ell^{\text{Poisson}}$ is the negative + of the log of the Poisson likelihood (half the deviance) + and $\lambda$ is `feature_weights`. + Parameters + ---------- + X : ndarray + Shape (n,p) -- the design matrix. + counts : ndarray + Shape (n,) -- the response. + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + ridge_term : float + How big a ridge term to add? + randomizer_scale : float + Scale for IID components of randomizer. + randomizer : str + One of ['laplace', 'logistic', 'gaussian'] + Returns + ------- + L : `selection.randomized.convenience.lasso` + + """ + n, p = X.shape + loglike = rr.glm.poisson(X, counts, quadratic=quadratic) + + # scale for randomizer seems kind of meaningless here... + + mean_diag = np.mean((X ** 2).sum(0)) + + if ridge_term is None: + ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1) + + if randomizer_scale is None: + randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.)) + + randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) + + return lasso(loglike, + feature_weights, + ridge_term, + randomizer) + + @staticmethod + def sqrt_lasso(X, + Y, + feature_weights, + quadratic=None, + ridge_term=None, + randomizer_scale=None, + solve_args={'min_its': 200}, + perturb=None): + r""" + Use sqrt-LASSO to choose variables. + Objective function is (before randomization) + + $$ + \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i| + $$ + where $\lambda$ is `feature_weights`. After solving the problem + treat as if `gaussian` with implied variance and choice of + multiplier. See arxiv.org/abs/1504.08031 for details. + Parameters + ---------- + X : ndarray + Shape (n,p) -- the design matrix. + Y : ndarray + Shape (n,) -- the response. + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + covariance : str + One of 'parametric' or 'sandwich'. Method + used to estimate covariance for inference + in second stage. + solve_args : dict + Arguments passed to solver. + ridge_term : float + How big a ridge term to add? + randomizer_scale : float + Scale for IID components of randomizer. + randomizer : str + One of ['laplace', 'logistic', 'gaussian'] + Returns + ------- + L : `selection.randomized.convenience.lasso` + + Notes + ----- + Unlike other variants of LASSO, this + solves the problem on construction as the active + set is needed to find equivalent gaussian LASSO. + Assumes parametric model is correct for inference, + i.e. does not accept a covariance estimator. + """ + + n, p = X.shape + + if np.asarray(feature_weights).shape == (): + feature_weights = np.ones(p) * feature_weights + + mean_diag = np.mean((X ** 2).sum(0)) + if ridge_term is None: + ridge_term = np.sqrt(mean_diag) / (n - 1) + + if randomizer_scale is None: + randomizer_scale = 0.5 * np.sqrt(mean_diag) / np.sqrt(n - 1) + + if perturb is None: + perturb = np.random.standard_normal(p) * randomizer_scale + + randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term + + if quadratic is not None: + totalQ = randomQ + quadratic + else: + totalQ = randomQ + + soln, sqrt_loss = solve_sqrt_lasso(X, + Y, + weights=feature_weights, + quadratic=totalQ, + solve_args=solve_args, + force_fat=True) + + denom = np.linalg.norm(Y - X.dot(soln)) + loglike = rr.glm.gaussian(X, Y) + + randomizer = randomization.isotropic_gaussian((p,), randomizer_scale * denom) + + obj = lasso(loglike, + np.asarray(feature_weights) * denom, + ridge_term * denom, + randomizer, + perturb=perturb * denom) + obj._sqrt_soln = soln + + return obj + +# Targets of inference +# and covariance with score representation + +def fused_targets(loglike, + W, + ccs, + sign_info={}, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + X, y = loglike.data + n, p = X.shape + + if (len(ccs) == 0): + raise ValueError("No connected components provided, no targets to estimate.") + if (len(ccs) > 1): + Xcc = np.vstack([X[:,cc].sum(axis=1) for cc in ccs]).T + else: + Xcc = np.array(X[:,ccs[0]].sum(axis=1))[:,None] + ncc = Xcc.shape[1] + loglikecc = rr.glm.gaussian(Xcc, y, coef=loglike.coef, quadratic=None)#loglike.quadratic) + observed_target = loglikecc.solve(**solve_args) + if W.ndim == 1: + Qcc = Xcc.T.dot(W[:,None] * Xcc) + _score_linear = -Xcc.T.dot(W[:,None] * X).T + else: + Qcc = Xcc.T.dot(W.dot(Xcc)) + _score_linear = -Xcc.T.dot(W.dot(X)).T + cov_target = Qcc/(np.diag(Xcc.T.dot(Xcc))**2) # np.linalg.inv(Qcc) + crosscov_target_score = _score_linear.dot(np.linalg.inv(Xcc.T.dot(Xcc))) + # assert(0==1) + alternatives = ['twosided'] * ncc + ccs_idx = np.arange(ncc) + + for i in range(len(alternatives)): + if ccs_idx[i] in sign_info.keys(): + alternatives[i] = sign_info[ccs_idx[i]] + + if dispersion is None: + if W.ndim == 1: + dispersion = ((y-loglikecc.saturated_loss.mean_function( + Xcc.dot(observed_target))) ** 2 / W).sum() / (n-ncc)#-1) + else: + dispersion = ((y-loglikecc.saturated_loss.mean_function( + Xcc.dot(observed_target))) ** 2 / np.diag(W)).sum() / (n-ncc)#-1) + + return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives + +def selected_targets(loglike, + W, + features, + sign_info={}, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + # if features.sum() == 0: + # raise ValueError('Empty feature set, no targets to estimate.') + + X, y = loglike.data + n, p = X.shape + + Xfeat = X[:, features] + Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) + observed_target = restricted_estimator(loglike, features, solve_args=solve_args) + cov_target = np.linalg.inv(Qfeat) + _score_linear = -Xfeat.T.dot(W[:, None] * X).T + crosscov_target_score = _score_linear.dot(cov_target) + alternatives = ['twosided'] * features.sum() + features_idx = np.arange(p)[features] + + for i in range(len(alternatives)): + if features_idx[i] in sign_info.keys(): + alternatives[i] = sign_info[features_idx[i]] + + if dispersion is None: # use Pearson's X^2 + dispersion = ((y - loglike.saturated_loss.mean_function( + Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + + return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives + +def full_targets(loglike, + W, + features, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + if features.sum() == 0: + raise ValueError('Empty feature set, no targets to estimate.') + + X, y = loglike.data + n, p = X.shape + features_bool = np.zeros(p, np.bool) + features_bool[features] = True + features = features_bool + + # target is one-step estimator + + Qfull = X.T.dot(W[:, None] * X) + Qfull_inv = np.linalg.inv(Qfull) + full_estimator = loglike.solve(**solve_args) + cov_target = Qfull_inv[features][:, features] + observed_target = full_estimator[features] + crosscov_target_score = np.zeros((p, cov_target.shape[0])) + crosscov_target_score[features] = -np.identity(cov_target.shape[0]) + + if dispersion is None: # use Pearson's X^2 + dispersion = (((y - loglike.saturated_loss.mean_function(X.dot(full_estimator))) ** 2 / W).sum() / + (n - p)) + + alternatives = ['twosided'] * features.sum() + return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives + +def debiased_targets(loglike, + W, + features, + sign_info={}, + penalty=None, #required kwarg + dispersion=None, + debiasing_args={}): + + if features.sum() == 0: + raise ValueError('Empty feature set, no targets to estimate.') + + if penalty is None: + raise ValueError('require penalty for consistent estimator') + + X, y = loglike.data + n, p = X.shape + features_bool = np.zeros(p, np.bool) + features_bool[features] = True + features = features_bool + + # relevant rows of approximate inverse + + Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(W)[:, None], + np.nonzero(features)[0], + **debiasing_args)) / n + + problem = rr.simple_problem(loglike, penalty) + nonrand_soln = problem.solve() + G_nonrand = loglike.smooth_objective(nonrand_soln, 'grad') + + observed_target = nonrand_soln[features] - Qinv_hat.dot(G_nonrand) + + if p > n: + M1 = Qinv_hat.dot(X.T) + cov_target = (M1 * W[None, :]).dot(M1.T) + crosscov_target_score = -(M1 * W[None, :]).dot(X).T + else: + Qfull = X.T.dot(W[:, None] * X) + cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T)) + crosscov_target_score = -Qinv_hat.dot(Qfull).T + + if dispersion is None: # use Pearson's X^2 + Xfeat = X[:, features] + Qrelax = Xfeat.T.dot(W[:, None] * Xfeat) + relaxed_soln = nonrand_soln[features] - np.linalg.inv(Qrelax).dot(G_nonrand[features]) + dispersion = (((y - loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / W).sum() / + (n - features.sum())) + + alternatives = ['twosided'] * features.sum() + return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives + +def form_targets(target, + loglike, + W, + features, + **kwargs): + _target = {'full':full_targets, + 'selected':selected_targets, + 'debiased':debiased_targets, + 'fused':fused_targets}[target] + return _target(loglike, + W, + features, + **kwargs) diff --git a/selection/randomized/gen_lasso_utils.py b/selection/randomized/gen_lasso_utils.py new file mode 100644 index 000000000..d4dfe968b --- /dev/null +++ b/selection/randomized/gen_lasso_utils.py @@ -0,0 +1,247 @@ +import numpy as np +from scipy.linalg import svd +import networkx as nx + + +def create_penalty_matrix(matrix_type,p,k=None,fused_dims=None): + """ + create a penalty matrix according to the specified parameters + ---------- + matrix_type : string + One of ['lasso','fused','trendfiltering'], specifying the + type of the penalty matrix + p : int + the number of features + k : [int, np.ndarray] + This parametrizes the construction of the penalty matrix D + (k-D fused or k-th order polynomial trend filtering), If None, + defaults to 1D fused lasso or linear trend filtering. + fused_dims : sequence + Can't be None if penalty_matrix is "fused" and k is an int > 1, + must be a sequence of length k specifying the dimensions of the grid. + """ + if matrix_type == "lasso": + ## maybe should change to just call plain lasso? + return np.eye(p) + elif matrix_type == "fused": + return create_fused_lasso_matrix(p,k,fused_dims) + elif matrix_type == "trendfiltering": + return create_trendfiltering_matrix(p,k) + +def create_trendfiltering_matrix(p,k=None): + """ + p : int + the number of features + k : [int, np.ndarray] + This parametrizes the construction of the penalty matrix D + (k-th order polynomial trend filtering), If None, defaults + to linear trend filtering. + """ + if k is None: + k = 1 + if isinstance(k,int): + if k == 0: + return create_fused_lasso_matrix(p) + elif k == 1: + D = -np.eye(p)[:-2,:] + D[range(p-2),range(1,p-1)] = 2 + D[range(p-2),range(2,p)] = -1 + elif k == 2: + D = np.eye(p)[:-3,:] + D[range(p-3),range(1,p-2)] = -3 + D[range(p-3),range(2,p-1)] = 3 + D[range(p-3),range(3,p)] = -1 + elif k == 3: + D = np.eye(p)[:-4,:] + D[range(p-4),range(1,p-3)] = -4 + D[range(p-4),range(2,p-2)] = 6 + D[range(p-4),range(3,p-1)] = -4 + D[range(p-4),range(4,p)] = 1 + elif k == 4: + D = np.eye(p)[:-5,:] + D[range(p-5),range(1,p-4)] = -5 + D[range(p-5),range(2,p-3)] = 10 + D[range(p-5),range(3,p-2)] = -10 + D[range(p-5),range(4,p-1)] = 5 + D[range(p-5),range(5,p)] = -1 + else: + raise ValueError("We currently onnly support polynomial trend filtering up to order 4") + return D + else: + raise ValueError("'k' must be either an integer (0,1,2,3,4) or a 2D numpy array") + +def create_fused_lasso_matrix(p,k=None,fused_dims=None): + """ + p : int + the number of features + k : [int, np.ndarray] + This parametrizes the construction of the penalty matrix D + (k-D fused or k-th order polynomial trend filtering) If None, + defaults to 1D fused lasso or linear trend filtering. + fused_dims : sequence + Can't be None if k is an int > 1, must be a sequence + of length k specifying the dimensions of the grid. + """ + if k is None: + k = 1 + if isinstance(k,int): + D = -np.eye(p)[:-1,:] + D[range(p-1),range(1,p)] = 1 + if k == 1: + return D + if fused_dims is None: + raise ValueError("If 'k' > 1, fused_dims must be provided") + elif len(fused_dims) != k: + raise ValueError("length of 'fused_dims' must equal 'k'") + else: + x=fused_dims[0] + y=fused_dims[1] + add = np.zeros(((x-1)*y,p)) + add[range(x*y-y),range(y,x*y)] = 1 + D = np.hstack((D,add)) + if k == 2: + return D + if k == 3: + for i in range(2,k+1): + add = np.zeros(((x-1)*y,p)) + a=(i-1)*x*y-y + b=i*x*y-y + add[range(a,b-y),range(a+y,b)] = 1 + D = np.hstack((D,add)) + + for i in range(k-1): + add = np.zeros((x*y,p)) + a = i*x*y + b = (i+1)*x*y + c = (i+2)*x*y + add[range(a,b),range(b,c)] = 1 + D = np.hstack((D,add)) + + return D + else: + raise ValueError("We only support 'k' = 1,2,3") + else: + raise ValueError("'k' must be either an integer (1,2,3) or a 2D numpy array") + +def find_trendfiltering_nspaceb(D,active): + """ + D : np.ndarray + penalty matrix + active : np.ndarray + boolean vector specifying which rows of D are active (E) + """ + k = np.fabs(D[0,1]).astype(int) + p = D.shape[1] + if k < 2 or k > 5: + raise ValueError('k must be an integer in [2,5].') + nullity = active.sum() + if nullity == active.shape[0]: + return np.eye(p) + D_inactive = D[(~active),:] + nonzero_idx = np.fabs(D_inactive).sum(axis=0) + cum_nonzero_idx = nonzero_idx.cumsum() + + # index of first nonzero column in D_{-E} + if (cum_nonzero_idx==0).any(): + head_offset = np.amax(np.where(cum_nonzero_idx==0)) + 1 + else: + head_offset = 0 + + # index of last nonzerocolumn in D_{-E} + where_max = np.where(cum_nonzero_idx==np.amax(cum_nonzero_idx)) + tail_offset = len(cum_nonzero_idx) - np.amin(where_max) - 1 + + # where to put initial ones in basis + active_idx = np.where(active[head_offset:(len(active)-tail_offset)])[0] + head_offset + + mat_dtype = np.longdouble if (k > 2 and p > 50) else np.float64 + basis = np.zeros((p,nullity+k),dtype=mat_dtype) + head_cols = range(head_offset,nullity-tail_offset) + basis[(active_idx),head_cols] = 1 + + tail_idx = -np.array(range(1,k+1+tail_offset)) + basis[tail_idx,tail_idx] = 1 + + basis[range(head_offset),range(head_offset)] = 1 + + restr_basis = basis[:,(head_offset):(basis.shape[1]-tail_offset)] + for r in reversed(range(1+head_offset,p-tail_offset-k+1)): + if (r-1) in active_idx: + continue + else: + if k == 2: restr_basis[r-1,:] = -restr_basis[r+1,:] + 2*restr_basis[r,:] + elif k == 3: restr_basis[r-1,:] = restr_basis[r+2,:] - 3*restr_basis[r+1,:] \ + + 3*restr_basis[r,:] + elif k == 4: restr_basis[r-1,:] = -restr_basis[r+3,:] + 4*restr_basis[r+2,:] \ + - 6*restr_basis[r+1,:] + 4*restr_basis[r,:] + elif k == 5: restr_basis[r-1,:] = restr_basis[r+4,:] - 5*restr_basis[r+3,:] \ + + 10*restr_basis[r+2,:] - 10*restr_basis[r+1,:] \ + + 5*restr_basis[r,:] + + basis[:,(head_offset):(basis.shape[1]-tail_offset)] = restr_basis + return basis + +def find_fusedlasso_nspaceb(D,active): + """ + D : np.ndarray + penalty matrix + active : np.ndarray + boolean vector specifying which rows of D are active (E) + """ + p = D.shape[1] + # create graph from D_{-E} + D_inactive = D[(~active),:] + src_nodes = np.where(D_inactive == -1)[1] + dst_nodes = np.where(D_inactive == 1)[1] + edge_tuples = [(src_nodes[i],dst_nodes[i]) for i in range(D_inactive.shape[0])] + + G = nx.empty_graph(n=p) + G.add_edges_from(edge_tuples) + + ccs = list(nx.connected_components(G)) + n_ccs = len(ccs)#nx.number_connected_components(G) + # isos = list(nx.isolates(G)) + # n_isos = len(isos) + # print("Isolates:", isos) + basis = np.zeros((p, n_ccs))# + n_isos)) + + count = 0 + for i,cc in enumerate(ccs): + count += len(list(cc)) + # print("~~~~~~") + # print(list(cc)) + # print(i) + basis[list(cc),i] = 1 + assert(count==p) + assert(basis.sum()==p) + # stop2 + # for i,iso in enumerate(isos): + # basis[iso,i+n_ccs] = 1 + + return basis + + +# In the case of D = I (ordinary lasso), null(D_{-E}) = D_E +def find_nspaceb_identity(active): + """ + active : np.ndarray + boolean vector specifying which rows of D are active (E) + """ + return np.identity(len(active))[:,active] + +def find_nullspace_basis(D,active): + """ + D : np.ndarray + penalty matrix + active : np.ndarray + boolean vector specifying which rows of D are active (E) + """ + if (D.shape[0] == D.shape[1]) and (D == np.identity(D.shape[0])).all(): + return find_nspaceb_identity(active) + + _,s,v = svd(D[(~active),:]) + basis = v[len(s):,:].T # v is returned w/ rows being right singular vectors of D_{-E} + return basis + + + diff --git a/selection/randomized/qp.py b/selection/randomized/qp.py new file mode 100644 index 000000000..0cccb14f6 --- /dev/null +++ b/selection/randomized/qp.py @@ -0,0 +1,87 @@ +#!/usr/bin/python + +# Copyright 2018, Gurobi Optimization, LLC + +# This example formulates and solves the following simple QP model: +# minimize +# x^2 + x*y + y^2 + y*z + z^2 + 2 x +# subject to +# x + 2 y + 3 z >= 4 +# x + y >= 1 +# +# It solves it once as a continuous model, and once as an integer model. + +import numpy as np +# from ..smooth import sum as smooth_sum +# from ..smooth.quadratic import quadratic_loss +# from ..affine import aslinear, astransform +# from ..identity_quadratic import identity_quadratic +from gurobipy import * + + +##### NOTE: rn only for squared error loss +##### TODO: make general later +class qp_problem(object): + + + def __init__(self, + X, + y, + D, + ridge_term=0.01, + lam=1.5, + randomization=None): + + self.n, self.p = n, p = X.shape + q = D.shape[0] + + # Create a new model + self.m = Model("qp") + self.m.setParam( 'OutputFlag', False ) + + # Create variables + beta = self.m.addVars(p, lb=-GRB.INFINITY, name="beta") + t = self.m.addVars(q, lb=-GRB.INFINITY, name="t") + + # Set objective: beta^T(X^TX + ridge_term/2)beta - 2(y^TX+w)beta + lam*1^Tt + quad = 0.5 * (X.T.dot(X))# + ridge_term * np.eye(p)) + quad_dict = {i: beta.prod({j: quad[j,i] for j in range(p)}) for i in range(p)} + + if randomization is None: + randomization = np.random.standard_normal(p) + + linear = y.T.dot(X) + randomization + linear_dict = {i: lin for i,lin in enumerate(linear)} + + obj = beta.prod(quad_dict) - beta.prod(linear_dict) + lam*t.sum() + self.m.setObjective(obj) + + # Add constraints: -t <= Dbeta <= t + for i in range(q): + D_dict = beta.prod({j: D[i,j] for j in range(p)}) + self.m.addConstr(D_dict <= t[i], "pc{}".format(i)) + self.m.addConstr(D_dict >= -t[i], "nc{}".format(i)) + + def solve(self): + self.m.optimize() + + soln = np.zeros(self.p) + i = 0 + + for v in self.m.getVars(): + if 'beta' in v.VarName: + soln[i] = v.x + i += 1 + + self.soln = soln + + return soln + + + + + + + + + diff --git a/selection/randomized/query.py b/selection/randomized/query.py index c385d69d5..8cd0485d4 100644 --- a/selection/randomized/query.py +++ b/selection/randomized/query.py @@ -135,9 +135,10 @@ def summary(self, normal_sample=target_sample, alternatives=alternatives) - MLE_intervals = self.selective_MLE(observed_target, - cov_target, - cov_target_score)[5] + # MLE_intervals = self.selective_MLE(observed_target, + # cov_target, + # cov_target_score, + # level=level)[5] if not np.all(parameter == 0): pvalues = self.sampler.coefficient_pvalues(observed_target, @@ -155,7 +156,8 @@ def summary(self, MLE_intervals = self.selective_MLE(observed_target, cov_target, - cov_target_score)[4] + cov_target_score, + level=level)[4] intervals = self.sampler.confidence_intervals(observed_target, cov_target, @@ -182,6 +184,8 @@ def selective_MLE(self, return self.sampler.selective_MLE(observed_target, cov_target, cov_target_score, + self.randomizer.cov_prec[1], + self._initial_omega, self.observed_opt_state, level=level, solve_args=solve_args) @@ -763,8 +767,10 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, + prec_randomizer, + init_omega, init_soln, # initial (observed) value of optimization variables -- used as a feasible point. - # precise value used only for independent estimator + # precise value used only for independent estimator solve_args={'tol':1.e-12}, level=0.9): """ @@ -776,6 +782,8 @@ def selective_MLE(self, return selective_MLE(observed_target, cov_target, cov_target_score, + prec_randomizer, + init_omega, init_soln, self.affine_con.mean, self.affine_con.covariance, @@ -1296,9 +1304,46 @@ def _solve_barrier_nonneg(conjugate_arg, hess = np.linalg.inv(precision + np.diag(barrier_hessian(current))) return current_value, current, hess +def ind_unbiased_estimate(observed_target, + cov_target, + cov_target_score, + prec_randomizer, + init_omega, + level=0.9): + + if np.asarray(observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + observed_target = np.atleast_1d(observed_target) + prec_target = np.linalg.inv(cov_target) + + gamma = -cov_target_score.T.dot(prec_target) + prec_randomizer = np.asarray(prec_randomizer) + linear_target = np.eye(cov_target.shape[0]) + \ + cov_target.dot(gamma.T.dot(prec_randomizer).dot(gamma)) + # cond_cov_target = np.linalg.inv(linear_target).dot(cov_target) + ind_unbiased_estimator = observed_target - cov_target.dot(gamma.T.dot(prec_randomizer).dot(init_omega)) + cov_unbiased_estimator = cov_target.dot(linear_target.T) + + unbiased_Z_scores = ind_unbiased_estimator / np.sqrt(np.diag(cov_unbiased_estimator)) + unbiased_pvalues = ndist.cdf(unbiased_Z_scores) + unbiased_pvalues = 2 * np.minimum(unbiased_pvalues, 1 - unbiased_pvalues) + + alpha = 1 - level + quantile = ndist.ppf(1 - alpha / 2.) + unbiased_intervals = np.vstack([ind_unbiased_estimator - quantile * np.sqrt(np.diag(cov_unbiased_estimator)), + ind_unbiased_estimator + quantile * np.sqrt(np.diag(cov_unbiased_estimator))]).T + + return (ind_unbiased_estimator, + cov_unbiased_estimator, + unbiased_Z_scores, unbiased_pvalues, + unbiased_intervals) + def selective_MLE(observed_target, cov_target, cov_target_score, + prec_randomizer, + init_omega, init_soln, # initial (observed) value of optimization variables -- used as a feasible point. # precise value used only for independent estimator cond_mean, @@ -1315,9 +1360,6 @@ def selective_MLE(observed_target, """ - if np.asarray(observed_target).shape in [(), (0,)]: - raise ValueError('no target specified') - observed_target = np.atleast_1d(observed_target) prec_target = np.linalg.inv(cov_target) @@ -1361,7 +1403,24 @@ def selective_MLE(observed_target, intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T - return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator + (ind_unbiased_estimator, + cov_unbiased_estimator, + unbiased_Z_scores, unbiased_pvalues, + unbiased_intervals) = ind_unbiased_estimate(observed_target, + cov_target, + cov_target_score, + prec_randomizer, + init_omega, + level=level) + + return (final_estimator, + observed_info_mean, + Z_scores, pvalues, + intervals, + ind_unbiased_estimator, + cov_unbiased_estimator, + unbiased_Z_scores, unbiased_pvalues, + unbiased_intervals) def normalizing_constant(target_parameter, diff --git a/selection/randomized/tests/.DS_Store b/selection/randomized/tests/.DS_Store new file mode 100644 index 000000000..886a1bc65 Binary files /dev/null and b/selection/randomized/tests/.DS_Store differ diff --git a/selection/randomized/tests/test_genlasso.py b/selection/randomized/tests/test_genlasso.py new file mode 100644 index 000000000..def585091 --- /dev/null +++ b/selection/randomized/tests/test_genlasso.py @@ -0,0 +1,301 @@ +from __future__ import division, print_function + +import numpy as np +import nose.tools as nt + +from scipy.stats import norm as ndist + +import regreg.api as rr + +# import rpy2.robjects as rpy +# from rpy2.robjects import numpy2ri +# rpy.r('library(selectiveInference)') + +from ..gen_lasso import gen_lasso, selected_targets, full_targets, debiased_targets, fused_targets +from ..lasso import lasso +from ...tests.instance import gaussian_instance +from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso +from ..randomization import randomization + +def test_simple_lasso(n=50, p=10, signal_fac=1.5, s=2, sigma=1, penalty_type="lasso", target='selected', rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000): + zero_int_results=np.array([],dtype=bool) + nonzero_int_results=np.array([],dtype=bool) + zero_results = np.array([],dtype=bool) + nonzero_results = np.array([],dtype=bool) + + for i in range(100): + if i % 10 == 0: print(i) + inst, const = gaussian_instance, gen_lasso.gaussian + signal = np.sqrt(signal_fac * np.log(p)) + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + + # print("sigma is ", sigma) + # print("sigma_ is ", sigma_) + if target is not 'debiased': + # W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_ + W = np.sqrt(1.5 * np.log(p)) * sigma_ + else: + # W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + W = np.sqrt(2 * np.log(p)) * sigma_ + + + conv = const(X, + Y, + W, # make sure just a number + penalty_type=penalty_type, + randomizer_scale=randomizer_scale * sigma_) + + signs, penalty_signs = conv.fit() + # nonzero = signs != 0 + nonzero = penalty_signs != 0 + + if target == 'full': + (observed_target, + cov_target, + cov_target_score, + alternatives) = full_targets(conv.loglike, + conv._W, + nonzero) + elif target == 'selected': + (observed_target, + cov_target, + cov_target_score, + alternatives) = selected_targets(conv.loglike, + conv._W, + nonzero) + elif target == 'debiased': + (observed_target, + cov_target, + cov_target_score, + alternatives) = debiased_targets(conv.loglike, + conv._W, + nonzero, + penalty=conv.penalty) + + + _, pval, intervals = conv.summary(observed_target, + cov_target, + cov_target_score, + alternatives, + ndraw=ndraw, + burnin=burnin, + compute_intervals=True) + + zero_idx = beta[nonzero] == 0 + nonzero_idx = beta[nonzero] != 0 + zero_intervals = intervals[zero_idx,:] + nonzero_intervals = intervals[nonzero_idx,:] + + zero_beta = beta[nonzero][zero_idx] + nonzero_beta = beta[nonzero][nonzero_idx] + + # print(zero_intervals) + + zero_int_results = np.concatenate((zero_int_results,np.logical_and(zero_beta > zero_intervals[:,0],zero_beta < zero_intervals[:,1]))) + nonzero_int_results = np.concatenate((nonzero_int_results,np.logical_and(nonzero_beta > nonzero_intervals[:,0],nonzero_beta < nonzero_intervals[:,1]))) + + zero_results = np.concatenate((zero_results,pval[zero_idx])) + nonzero_results = np.concatenate((nonzero_results,pval[nonzero_idx])) + # print(intervals) + + # print(zero_int_results) + # print(nonzero_int_results) + # print(zero_results) + # print(nonzero_results) + + zero_int_results = np.array(zero_int_results).flatten() + nonzero_int_results = np.array(nonzero_int_results).flatten() + print("zero intervals:", zero_int_results.mean()) + print("zero intervals:", zero_int_results.sum()) + print("nonzero intervals:", nonzero_int_results.mean()) + print("nonzero intervals:", nonzero_int_results.sum()) + if len(zero_results) > 0: + print("checking zero targets",(zero_results > .1).sum(),(zero_results > .1).mean()) + if len(nonzero_results) > 0: + print("checking nonzero targets",(nonzero_results < .1).sum(),(nonzero_results < .1).mean()) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0] + + + + +# def test_simple_fused_lasso(p=100, +# signal_fac=1.5, +# ncc=2, +# sigma=3, +# penalty_param=None, +# penalty_type="fused", +# target='fused', +# rho=0.4, +# randomizer_scale=1, +# ndraw=5000, +# burnin=1000): +# zero_int_results=np.array([],dtype=bool) +# nonzero_int_results=np.array([],dtype=bool) +# zero_results = np.array([],dtype=bool) +# nonzero_results = np.array([],dtype=bool) +# for i in range(100): +# if i % 10 == 0: print(i) +# const = gen_lasso.gaussian +# signal = np.sqrt(signal_fac * np.log(p)) + +# X = np.eye(p) +# true_beta = beta = np.random.choice([-1,1],size=ncc,replace=False)#np.random.choice(np.linspace(-10,10,5),size=ncc,replace=False) + +# comp_size = int(p/ncc) +# remain = p % ncc + +# beta_stack = [] +# for i in range(ncc): +# beta_stack.append(beta[i]*np.ones(comp_size + (i < remain))) +# tiled_beta = np.hstack(beta_stack) + +# Y = tiled_beta + ndist.rvs(size=p)*sigma + +# # sigma_ = np.std(Y) +# sigma_ = np.std(Y-tiled_beta) + +# # print("sigma is ", sigma) +# # print("sigma_ is ", sigma_) + +# if target is not 'debiased': +# # W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_ +# W = np.sqrt(1.5 * np.log(p)) * sigma_ +# else: +# # W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ +# W = np.sqrt(2 * np.log(p)) * sigma_ + + +# conv = const(X, +# Y, +# W, # make sure just a number +# penalty_param=penalty_param, +# penalty_type=penalty_type, +# randomizer_scale=randomizer_scale * sigma_) + +# signs, penalty_signs = conv.fit() +# # nonzero = signs != 0 +# nonzero = penalty_signs != 0 + +# if target == 'full': +# (observed_target, +# cov_target, +# cov_target_score, +# alternatives) = full_targets(conv.loglike, +# conv._W, +# nonzero) +# elif target == 'selected': +# (observed_target, +# cov_target, +# cov_target_score, +# alternatives) = selected_targets(conv.loglike, +# conv._W, +# nonzero) +# elif target == 'debiased': +# (observed_target, +# cov_target, +# cov_target_score, +# alternatives) = debiased_targets(conv.loglike, +# conv._W, +# nonzero, +# penalty=conv.penalty) +# elif target == 'fused': +# if nonzero.sum() == 0: +# breaks = np.array([0,p]) +# else: +# breaks = np.where(nonzero)[0] +# breaks += 1 +# breaks=np.hstack((0,breaks,p)) +# I = np.eye(p) +# # Wcc = np.array([conv._W[breaks[i]:breaks[i+1]].sum() for i in range(len(breaks)-1)]).squeeze() +# # Wcc = np.diag(Wcc) +# # print(Wcc) +# # print("---------") +# initial_ccs = [I[:,breaks[i]:breaks[i+1]].sum(axis=1) for i in range(len(breaks)-1)] +# # print(initial_ccs) +# # print("~~~~~~") +# initial_ccs = np.array(initial_ccs,dtype=bool) +# print("initial ccs", initial_ccs.T.astype(int)) +# print("nsb", conv.nsb.astype(int)) +# stop +# # W = conv._W.dot(initial_ccs) +# # print(initial_ccs) +# # print(W) +# (observed_target, +# cov_target, +# cov_target_score, +# alternatives) = fused_targets(conv.loglike, +# conv._W, +# initial_ccs) +# nobserved = observed_target.shape[0] +# nonzero = np.ones(nobserved).astype(bool) +# beta = np.array([np.mean(tiled_beta[initial_ccs[i]]) for i in range(nobserved)]) +# # print("observed,",observed_target) +# # print("beta,",beta) + + + +# _, pval, intervals = conv.summary(observed_target, +# cov_target, +# cov_target_score, +# alternatives, +# ndraw=ndraw, +# burnin=burnin, +# compute_intervals=True) + +# zero_idx = beta[nonzero] == 0 +# nonzero_idx = beta[nonzero] != 0 +# zero_intervals = intervals[zero_idx,:] +# nonzero_intervals = intervals[nonzero_idx,:] + +# zero_beta = beta[zero_idx] +# nonzero_beta = beta[nonzero_idx] + +# # print(zero_intervals) + +# zero_int_results = np.concatenate((zero_int_results,np.logical_and(zero_beta >= zero_intervals[:,0],zero_beta <= zero_intervals[:,1]))) +# nonzero_int_results = np.concatenate((nonzero_int_results,np.logical_and(nonzero_beta >= nonzero_intervals[:,0],nonzero_beta <= nonzero_intervals[:,1]))) + +# zero_results = np.concatenate((zero_results,pval[zero_idx])) +# nonzero_results = np.concatenate((nonzero_results,pval[nonzero_idx])) +# # print(intervals) + +# # print(zero_int_results) +# # print(nonzero_int_results) +# # print(zero_results) +# # print(nonzero_results) + +# zero_int_results = np.array(zero_int_results).flatten() +# nonzero_int_results = np.array(nonzero_int_results).flatten() +# print("zero intervals:", zero_int_results.mean()) +# print("zero intervals:", zero_int_results.sum()) +# print("nonzero intervals:", nonzero_int_results.mean()) +# print("nonzero intervals:", nonzero_int_results.sum()) +# if len(zero_results) > 0: +# print("checking zero targets",(zero_results > .1).sum(),(zero_results > .1).mean()) +# if len(nonzero_results) > 0: +# print("checking nonzero targets",(nonzero_results < .1).sum(),(nonzero_results < .1).mean()) +# # if len(zero_results) > 0: +# # print("zero targets",zero_results) +# # if len(nonzero_results) > 0: +# # print("nonzero targets",nonzero_results) +# # if len(zero_results) > 0: +# # nt.assert_true((zero_results > .1).mean() >= .9) +# # if len(nonzero_results) > 0: +# # nt.assert_true((nonzero_results < .1).mean() >= .9) +# # nt.assert_true(int_results.mean() >= .9) + + + + diff --git a/selection/randomized/tests/test_qp.py b/selection/randomized/tests/test_qp.py new file mode 100644 index 000000000..1e82a7193 --- /dev/null +++ b/selection/randomized/tests/test_qp.py @@ -0,0 +1,37 @@ +import numpy as np +import nose.tools as nt + +import regreg.api as rr +import regreg.affine as ra +from ..qp import qp_problem + +def test_qp(n=500, p=100): + # np.random.seed(0) + X = np.random.standard_normal((n, p)) + y = np.random.standard_normal(n) + # def is_pos_def(x): + # return np.all(np.linalg.eigvals(x) > 0) + # print(is_pos_def(X.T.dot(X))) + # print(X) + # loss = rr.squared_error(X, Y) + D = np.identity(p) + pen = rr.l1norm(p, lagrange=1.5) + + QP = qp_problem(X, y, np.eye(p), ridge_term=0) + soln = QP.solve() + + closed_form_soln = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y) + + # tol = 1e-5 + # yield np.testing.assert_allclose, closed_form_soln, soln, tol, tol, False, 'checking initial true and qp solutions' + + + # print(QP.m.getVars()) + # ADMM.solve(niter=1000) + + # coef1 = ADMM.atom_coefs + # problem2 = rr.simple_problem(loss, pen) + # coef2 = problem2.solve(tol=1.e-12, min_its=500) + + # np.testing.assert_allclose(coef1, coef2, rtol=1.e-3, atol=1.e-4) +