Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ if(NOT BUILD_CPU_ONLY)
src/preprocessing/quantize/binary.cu
src/preprocessing/quantize/pq.cu
src/preprocessing/spectral/spectral_embedding.cu
src/preprocessing/pca/pca.cu
src/selection/select_k_float_int64_t.cu
src/selection/select_k_float_int32_t.cu
src/selection/select_k_float_uint32_t.cu
Expand Down
185 changes: 185 additions & 0 deletions cpp/include/cuvs/preprocessing/pca.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/pca_types.hpp>

namespace cuvs::preprocessing::pca {

using solver = raft::linalg::solver;

/**
* @brief Parameters for PCA decomposition. Ref:
* http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
*/
struct params {
/** @brief Number of components to keep. */
int n_components = 1;

/**
* @brief If false, data passed to fit are overwritten and running fit(X).transform(X) will
* not yield the expected results, use fit_transform(X) instead.
*/
bool copy = true;

/**
* @brief When true (false by default) the components vectors are multiplied by the square
* root of n_samples and then divided by the singular values to ensure uncorrelated outputs with
* unit component-wise variances.
*/
bool whiten = false;

/** @brief The solver algorithm to use. */
solver algorithm = solver::COV_EIG_DQ;

/**
* @brief Tolerance for singular values computed by svd_solver == 'arpack' or
* the Jacobi solver.
*/
float tol = 0.0f;

/**
* @brief Number of iterations for the power method computed by the Jacobi solver.
*/
int n_iterations = 15;
};

/**
* @defgroup pca PCA (Principal Component Analysis)
* @{
*/

/**
* @brief Perform PCA fit operation.
*
* Computes the principal components, explained variances, singular values, and column means
* from the input data.
*
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <cuvs/preprocessing/pca.hpp>
*
* raft::resources handle;
*
* cuvs::preprocessing::pca::params params;
* params.n_components = 2;
*
* auto input = raft::make_device_matrix<float, int>(handle, n_rows, n_cols);
* // ... fill input ...
*
* auto components = raft::make_device_matrix<float, int, raft::col_major>(
* handle, params.n_components, n_cols);
* auto explained_var = raft::make_device_vector<float, int>(handle, params.n_components);
* auto explained_var_ratio = raft::make_device_vector<float, int>(handle, params.n_components);
* auto singular_vals = raft::make_device_vector<float, int>(handle, params.n_components);
* auto mu = raft::make_device_vector<float, int>(handle, n_cols);
* auto noise_vars = raft::make_device_scalar<float>(handle);
*
* cuvs::preprocessing::pca::fit(handle, params,
* input.view(), components.view(), explained_var.view(),
* explained_var_ratio.view(), singular_vals.view(), mu.view(), noise_vars.view());
* @endcode
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
* @param[out] components principal components [n_components x n_cols] (col-major)
* @param[out] explained_var explained variances [n_components]
* @param[out] explained_var_ratio explained variance ratios [n_components]
* @param[out] singular_vals singular values [n_components]
* @param[out] mu column means [n_cols]
* @param[out] noise_vars noise variance (scalar)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
*/
void fit(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false);

Copy link
Member Author

@aamijar aamijar Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making a note here that I don't think the existing cuml implementation has the ability to tune the percentage of explained variance.
For example, in sklearn we can set 0 < n_components < 1 where the user can select a percentage of the explained variance to recover and the n_components is automatically determined by the algorithm in order to satisfy that.

We will have to build that piece out since it doesn't exist in the current implementation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tuning is not what's being asked for. Exposing the explained variance is what's being requested (it is used for tuning / selecting the number of components but that's something the user does, not something we need to do).

/**
* @brief Perform PCA fit and transform operations.
*
* Computes the principal components and transforms the input data into the eigenspace
* in a single operation.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
* @param[out] trans_input transformed data [n_rows x n_components] (col-major)
* @param[out] components principal components [n_components x n_cols] (col-major)
* @param[out] explained_var explained variances [n_components]
* @param[out] explained_var_ratio explained variance ratios [n_components]
* @param[out] singular_vals singular values [n_components]
* @param[out] mu column means [n_cols]
* @param[out] noise_vars noise variance (scalar)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
*/
void fit_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false);

/**
* @brief Perform PCA transform operation.
*
* Transforms the input data into the eigenspace using previously computed principal components.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input data to transform [n_rows x n_cols] (col-major). Modified temporarily
* (mean-centered then restored).
* @param[in] components principal components [n_components x n_cols] (col-major)
* @param[in] singular_vals singular values [n_components]
* @param[in] mu column means [n_cols]
* @param[out] trans_input transformed data [n_rows x n_components] (col-major)
*/
void transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input);

/**
* @brief Perform PCA inverse transform operation.
*
* Transforms data from the eigenspace back to the original space.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[in] trans_input transformed data [n_rows x n_components] (col-major)
* @param[in] components principal components [n_components x n_cols] (col-major)
* @param[in] singular_vals singular values [n_components]
* @param[in] mu column means [n_cols]
* @param[out] output reconstructed data [n_rows x n_cols] (col-major)
*/
void inverse_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> output);

/** @} */ // end group pca

} // namespace cuvs::preprocessing::pca
113 changes: 113 additions & 0 deletions cpp/src/preprocessing/pca/detail/pca.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cuvs/preprocessing/pca.hpp>

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/pca.cuh>

namespace cuvs::preprocessing::pca::detail {

/**
* @brief Convert cuvs::preprocessing::pca::params to raft::linalg::paramsPCA.
*/
inline auto to_raft_params(const params& config, std::size_t n_rows, std::size_t n_cols)
-> raft::linalg::paramsPCA
{
raft::linalg::paramsPCA prms;
prms.n_rows = n_rows;
prms.n_cols = n_cols;
prms.n_components = config.n_components;
prms.algorithm = config.algorithm;
prms.tol = config.tol;
prms.n_iterations = config.n_iterations;
prms.copy = config.copy;
prms.whiten = config.whiten;
return prms;
Comment on lines +29 to +31
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
prms.copy = config.copy;
prms.whiten = config.whiten;
return prms;
prms.copy = config.copy;
prms.whiten = config.whiten;
prms.verbose = config.verbose;
return prms;

We are missing verbose here, no?

Copy link
Member Author

@aamijar aamijar Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

verbose was a unused parameter, removed it from pca.hpp in 99f32fc

}

template <typename DataT, typename IndexT>
void fit(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> explained_var,
raft::device_vector_view<DataT, IndexT> explained_var_ratio,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_scalar_view<DataT, IndexT> noise_vars,
bool flip_signs_based_on_U)
{
auto raft_prms = to_raft_params(config, input.extent(0), input.extent(1));
raft::linalg::pca_fit(handle,
raft_prms,
input,
components,
explained_var,
explained_var_ratio,
singular_vals,
mu,
noise_vars,
flip_signs_based_on_U);
}

template <typename DataT, typename IndexT>
void fit_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> trans_input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> explained_var,
raft::device_vector_view<DataT, IndexT> explained_var_ratio,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_scalar_view<DataT, IndexT> noise_vars,
bool flip_signs_based_on_U)
{
auto raft_prms = to_raft_params(config, input.extent(0), input.extent(1));
raft::linalg::pca_fit_transform(handle,
raft_prms,
input,
trans_input,
components,
explained_var,
explained_var_ratio,
singular_vals,
mu,
noise_vars,
flip_signs_based_on_U);
}

template <typename DataT, typename IndexT>
void transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_matrix_view<DataT, IndexT, raft::col_major> trans_input)
{
auto raft_prms = to_raft_params(config, input.extent(0), input.extent(1));
raft::linalg::pca_transform(handle, raft_prms, input, components, singular_vals, mu, trans_input);
}

template <typename DataT, typename IndexT>
void inverse_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> trans_input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_matrix_view<DataT, IndexT, raft::col_major> output)
{
auto raft_prms = to_raft_params(config, output.extent(0), output.extent(1));
raft::linalg::pca_inverse_transform(
handle, raft_prms, trans_input, components, singular_vals, mu, output);
}

} // namespace cuvs::preprocessing::pca::detail
Loading
Loading