From 58eefdc5e6ab3bcfe957eccc2cba09f6172ccdab Mon Sep 17 00:00:00 2001
From: Hannes Hansen <mail@hannesh.de>
Date: Tue, 18 Jan 2022 09:58:57 +0100
Subject: [PATCH] use fracridge pypi package; fix #20

---
 frrsa/fitting/fitting.py             |   6 +-
 frrsa/fitting/fracridge/__init__.py  |   2 -
 frrsa/fitting/fracridge/_linalg.py   |  99 ------------
 frrsa/fitting/fracridge/_numba.py    |  71 ---------
 frrsa/fitting/fracridge/fracridge.py | 216 ---------------------------
 requirements.txt                     |   1 +
 6 files changed, 2 insertions(+), 393 deletions(-)
 delete mode 100755 frrsa/fitting/fracridge/__init__.py
 delete mode 100755 frrsa/fitting/fracridge/_linalg.py
 delete mode 100755 frrsa/fitting/fracridge/_numba.py
 delete mode 100755 frrsa/fitting/fracridge/fracridge.py

diff --git a/frrsa/fitting/fitting.py b/frrsa/fitting/fitting.py
index a8eb62b..025a934 100755
--- a/frrsa/fitting/fitting.py
+++ b/frrsa/fitting/fitting.py
@@ -13,11 +13,7 @@
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 from sklearn.linear_model import Ridge
-
-if ('dev' not in str(Path(os.getcwd()).parent)) and ('draco' not in str(Path(os.getcwd()).parent)) and ('cobra' not in str(Path(os.getcwd()).parent)):
-    from fitting.fracridge import fracridge
-else:
-    from frrsa.frrsa.fitting.fracridge import fracridge
+from fracridge import fracridge
 
 
 def count_targets(y):
diff --git a/frrsa/fitting/fracridge/__init__.py b/frrsa/fitting/fracridge/__init__.py
deleted file mode 100755
index 3ec645e..0000000
--- a/frrsa/fitting/fracridge/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# from .version import version as __version__  # noqa
-from .fracridge import *  #noqa
\ No newline at end of file
diff --git a/frrsa/fitting/fracridge/_linalg.py b/frrsa/fitting/fracridge/_linalg.py
deleted file mode 100755
index de30a78..0000000
--- a/frrsa/fitting/fracridge/_linalg.py
+++ /dev/null
@@ -1,99 +0,0 @@
-"""
-This code is taken from the hyperlearn library: https://github.com/danielhanchen/hyperlearn/
-And distributed together with the license therein:
-BSD 3-Clause License
-Copyright (c) 2020, Daniel Han-Chen
-All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""
-
-from . import _numba as numba
-from ._numba import njit, USE_NUMBA, sign, arange
-import numpy as np
-from scipy.linalg import lapack as get_lapack_funcs
-
-
-def svd_flip(U, VT, U_decision=True):
-    """
-    Flips the signs of U and VT for SVD in order to force deterministic output.
-    Follows Sklearn convention by looking at U's maximum in columns
-    as default.
-    """
-    if U_decision:
-        max_abs_cols = abs(U).argmax(0)
-        signs = sign(U[max_abs_cols, arange(U.shape[1])])
-    else:
-        # rows of v, columns of u
-        max_abs_rows = abs(VT).argmax(1)
-        signs = sign( VT[arange(VT.shape[0]), max_abs_rows])
-
-    U *= signs
-    VT *= signs[:, np.newaxis]
-    return U, VT
-
-
-
-def svd(X, fast=True, U_decision=False, transpose=True):
-    """
-    [Edited 9/11/2018 --> Modern Big Data Algorithms p/n ratio check]
-    Computes the Singular Value Decomposition of any matrix.
-    So, X = U * S @ VT. Note will compute svd(X.T) if p > n.
-    Should be 99% same result. This means this implementation's
-    time complexity is O[ min(np^2, n^2p) ]
-    Speed
-    --------------
-    If USE_GPU:
-        Uses PyTorch's SVD. PyTorch uses (for now) a NON divide-n-conquer algo.
-        Submitted report to PyTorch:
-        https://github.com/pytorch/pytorch/issues/11174
-    If CPU:
-        Uses Numpy's Fortran C based SVD.
-        If NUMBA is not installed, uses divide-n-conqeur LAPACK functions.
-    If Transpose:
-        Will compute if possible svd(X.T) instead of svd(X) if p > n.
-        Default setting is TRUE to maintain speed.
-    Stability
-    --------------
-    SVD_Flip is used for deterministic output. Does NOT follow Sklearn convention.
-    This flips the signs of U and VT, using VT_based decision.
-    """
-    transpose = True if (transpose and X.shape[1] > X.shape[0]) else False
-    if transpose:
-        X, U_decision = X.T, not U_decision
-
-    n, p = X.shape
-    ratio = p/n
-    #### TO DO: If memory usage exceeds LWORK, use GESVD
-    if ratio >= 0.001:
-        if USE_NUMBA:
-            U, S, VT = numba.svd(X)
-        else:
-            #### TO DO: If memory usage exceeds LWORK, use GESVD
-            U, S, VT, __ = get_lapack_funcs("gesdd")(X, full_matrices=False)
-    else:
-        U, S, VT, __ = get_lapack_funcs("gesvd")(X, full_matrices=False)
-
-    U, VT = svd_flip(U, VT, U_decision=U_decision)
-
-    if transpose:
-        return VT.T, S, U.T
-    return U, S, VT
\ No newline at end of file
diff --git a/frrsa/fitting/fracridge/_numba.py b/frrsa/fitting/fracridge/_numba.py
deleted file mode 100755
index 279e202..0000000
--- a/frrsa/fitting/fracridge/_numba.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-This code is taken from the hyperlearn library: https://github.com/danielhanchen/hyperlearn/
-And distributed together with the license therein:
-BSD 3-Clause License
-Copyright (c) 2020, Daniel Han-Chen
-All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-3. Neither the name of the copyright holder nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""
-
-from numpy import ones, eye, float32, float64, \
-                sum as __sum, arange as _arange, sign as __sign, uint as _uint, \
-                abs as __abs, minimum as _minimum, maximum as _maximum
-from numpy.linalg import svd as _svd, pinv as _pinv, eigh as _eigh, \
-                cholesky as _cholesky, lstsq as _lstsq, qr as _qr, \
-                norm as _norm
-from numba import njit, prange
-USE_NUMBA = True
-
-
-__all__ = ['svd', 'sign', 'arange']
-
-
-@njit(fastmath=True, nogil=True, cache=True)
-def svd(X):
-    return _svd(X, full_matrices=False)
-
-@njit(fastmath=True, nogil=True, cache=True)
-def sign(X):
-    return __sign(X)
-
-@njit(fastmath=True, nogil=True, cache=True)
-def arange(i):
-    return _arange(i)
-
-y32 = ones(2, dtype=float32)
-y64 = ones(2, dtype=float64)
-
-
-X = eye(2, dtype=float32)
-A = svd(X)
-A = sign(X)
-
-X = eye(2, dtype=float64)
-A = svd(X)
-A = sign(X)
-
-A = arange(100)
-
-A = None
-X = None
-y32 = None
-y64 = None
\ No newline at end of file
diff --git a/frrsa/fitting/fracridge/fracridge.py b/frrsa/fitting/fracridge/fracridge.py
deleted file mode 100755
index 191ae08..0000000
--- a/frrsa/fitting/fracridge/fracridge.py
+++ /dev/null
@@ -1,216 +0,0 @@
-"""
-"""
-# =============================================================================
-# Note from P.Kaniuth: the whole package "fracridge", of which this module is a
-# part, is the sole work of A. Rokem K. Kay, see their paper "Fractional ridge
-# regression: a fast, interpretable reparameterization of ridge regression"
-# (2020) GigaScience, Volume 9, Issue 12, December 2020,
-# https://doi.org/10.1093/gigascience/giaa133. and the respective GitHub repo
-# https://github.com/nrdg/fracridge/tree/master/fracridge
-
-# The only changes I implemented are the following:
-#     - the function "fracridge" contains the argument "X_test" which
-#         defaults to None.
-#     - the function "fracridge" can return predictions directly.
-#     - the function "fracridge" contains the argument "pred_wanted" to
-#         specify whether predictions shall be output at all (default True).
-#     - the function "fracridge" contains the argument "betas_wanted" to
-#         specify whether coefs shall be output at all (default False).
-#     - other functions defined in the original module were deleted, since they
-#         are not needed in the framework of the FR-RSA project.
-# Package last updated on: 14th of January 2022.
-# =============================================================================
-
-import numpy as np
-from numpy import interp
-import warnings
-
-# Module-wide constants
-BIG_BIAS = 10e3
-SMALL_BIAS = 10e-3
-BIAS_STEP = 0.2
-
-__all__ = ["fracridge"]
-
-def _do_svd(X, y, jit=True):
-    """
-    Helper function to produce SVD outputs
-    """
-    if len(y.shape) == 1:
-        y = y[:, np.newaxis]
-        
-    # Per default, we'll try to use the jit-compiled SVD, which should be
-    # more performant:
-    use_scipy = False
-    if jit:
-        try:
-            from ._linalg import svd
-        except ImportError:
-            warnings.warn("The `jit` key-word argument is set to `True` "\
-                          "but numba could not be imported, or just-in time "\
-                          "compilation failed. Falling back to "\
-                          "`scipy.linalg.svd`")
-            use_scipy = True
-
-    # If that doesn't work, or you asked not to, we'll use scipy SVD:
-    if not jit or use_scipy:
-        from functools import partial
-        from scipy.linalg import svd  # noqa
-        svd = partial(svd, full_matrices=False)
-
-    if X.shape[0] > X.shape[1]:
-        uu, ss, v_t = svd(X.T @ X)
-        selt = np.sqrt(ss)
-        if y.shape[-1] >= X.shape[0]:
-            ynew = (np.diag(1./selt) @ v_t @ X.T) @ y
-        else:
-            ynew = np.diag(1./selt) @ v_t @ (X.T @ y)
-
-    else:
-        # This rotates the targets by the unitary matrix uu.T:
-        uu, selt, v_t = svd(X)
-        ynew = uu.T @ y
-
-    ols_coef = (ynew.T / selt).T
-    
-    return selt, v_t, ols_coef
-
-
-def fracridge(X, y, X_test=None, fracs=None, tol=1e-10, jit=True, betas_wanted=False, pred_wanted=True):
-    """
-    Approximates alpha parameters to match desired fractions of OLS length.
-    Parameters
-    ----------
-    X : ndarray, shape (n, p)
-        Design matrix for regression, with n number of
-        observations and p number of model parameters.
-    y : ndarray, shape (n, b)
-        Data, with n number of observations and b number of targets.
-    fracs : float or 1d array, optional
-        The desired fractions of the parameter vector length, relative to
-        OLS solution. If 1d array, the shape is (f,). This input is required
-        to be sorted. Otherwise, raises ValueError.
-        Default: np.arange(.1, 1.1, .1).
-    jit : bool, optional
-        Whether to speed up computations by using a just-in-time compiled
-        version of core computations. This may not work well with very large
-        datasets. Default: True
-    Returns
-    -------
-    coef : ndarray, shape (p, f, b)
-        The full estimated parameters across units of measurement for every
-        desired fraction.
-    alphas : ndarray, shape (f, b)
-        The alpha coefficients associated with each solution
-    Examples
-    --------
-    Generate random data:
-    >>> np.random.seed(0)
-    >>> y = np.random.randn(100)
-    >>> X = np.random.randn(100, 10)
-    Calculate coefficients with naive OLS:
-    >>> coef = np.linalg.inv(X.T @ X) @ X.T @ y
-    >>> print(np.linalg.norm(coef))  # doctest: +NUMBER
-    0.35
-    Call fracridge function:
-    >>> coef2, alpha = fracridge(X, y, 0.3)
-    >>> print(np.linalg.norm(coef2))  # doctest: +NUMBER
-    0.10
-    >>> print(np.linalg.norm(coef2) / np.linalg.norm(coef))  # doctest: +NUMBER
-    0.3
-    Calculate coefficients with naive RR:
-    >>> alphaI = alpha * np.eye(X.shape[1])
-    >>> coef3 = np.linalg.inv(X.T @ X + alphaI) @ X.T @ y
-    >>> print(np.linalg.norm(coef2 - coef3))  # doctest: +NUMBER
-    0.0
-    """
-    if fracs is None:
-        fracs = np.arange(.1, 1.1, .1)
-
-    if hasattr(fracs, "__len__"):
-        if np.any(np.diff(fracs) < 0):
-            raise ValueError("The `frac` inputs to the `fracridge` function ",
-                             f"must be sorted. You provided: {fracs}")
-
-    else:
-        fracs = [fracs]
-    fracs = np.array(fracs)
-
-    nn, pp = X.shape
-    if len(y.shape) == 1:
-        y = y[:, np.newaxis]
-
-    bb = y.shape[-1]
-    ff = fracs.shape[0]
-
-    # Calculate the rotation of the data
-    selt, v_t, ols_coef = _do_svd(X, y, jit=jit)
-
-    # Set solutions for small eigenvalues to 0 for all targets:
-    isbad = selt < tol
-    if np.any(isbad):
-        warnings.warn("Some eigenvalues are being treated as 0")
-
-    ols_coef[isbad, ...] = 0
-
-    # Limits on the grid of candidate alphas used for interpolation:
-    val1 = BIG_BIAS * selt[0] ** 2
-    val2 = SMALL_BIAS * selt[-1] ** 2
-
-    # Generates the grid of candidate alphas used in interpolation:
-    alphagrid = np.concatenate(
-        [np.array([0]),
-         10 ** np.arange(np.floor(np.log10(val2)),
-                         np.ceil(np.log10(val1)), BIAS_STEP)])
-
-    # The scaling factor applied to coefficients in the rotated space is
-    # lambda**2 / (lambda**2 + alpha), where lambda are the singular values
-    seltsq = selt**2
-    sclg = seltsq / (seltsq + alphagrid[:, None])
-    sclg_sq = sclg**2
-
-    # Prellocate the solution:
-    if nn >= pp:
-        first_dim = pp
-    else:
-        first_dim = nn
-
-    coef = np.empty((first_dim, ff, bb))
-    alphas = np.empty((ff, bb))
-
-    # The main loop is over targets:
-    for ii in range(y.shape[-1]):
-        # Applies the scaling factors per alpha
-        newlen = np.sqrt(sclg_sq @ ols_coef[..., ii]**2).T
-        # Normalize to the length of the unregularized solution,
-        # because (alphagrid[0] == 0)
-        newlen = (newlen / newlen[0])
-        # Perform interpolation in a log transformed space (so it behaves
-        # nicely), avoiding log of 0.
-        temp = interp(fracs, newlen[::-1], np.log(1 + alphagrid)[::-1])
-        # Undo the log transform from the previous step
-        targetalphas = np.exp(temp) - 1
-        # Allocate the alphas for this target:
-        alphas[:, ii] = targetalphas
-        # Calculate the new scaling factor, based on the interpolated alphas:
-        sc = seltsq / (seltsq + targetalphas[np.newaxis].T)
-        # Use the scaling factor to calculate coefficients in the rotated
-        # space:
-        coef[..., ii] = (sc * ols_coef[..., ii]).T
-
-    # After iterating over all targets, we unrotate using the unitary v
-    # matrix and reshape to conform to desired output:
-    
-    if pred_wanted:
-        pred = np.reshape(X_test @ v_t.T @ coef.reshape((first_dim, ff * bb)),
-                      (X_test.shape[0], ff, bb)).squeeze()
-    else:
-        pred = None
-    
-    if betas_wanted:
-        coef = np.reshape(v_t.T @ coef.reshape((first_dim, ff * bb)),
-          (pp, ff, bb)).squeeze()
-    else:
-        coef = None
-        
-    return pred, coef, alphas
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index b19c823..c0ecda1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ scikit-learn==1.0.*
 scipy==1.7.*
 spyder
 spyder-terminal
+fracridge==1.4.3