diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cc0fb26..6803be6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
runs-on: ubuntu-24.04
strategy:
matrix:
- python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
@@ -46,7 +46,7 @@ jobs:
runs-on: ubuntu-24.04
strategy:
matrix:
- python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
diff --git a/.gitignore b/.gitignore
index 0a19790..47f98d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+.vscode/
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/README.md b/README.md
index bded931..22e6746 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,40 @@
-# Robust principle component analysis for Python
+# Robust principal component analysis for Python
-Work in progress. Coming soon here and on PyPI.
+This package provides algorithms to solve the Robust Principal Component Analysis (RPCA) problem, as presented by Candès et al.[[1]](#candes2011).
+Currently, a single algorithm is implemented: it solves the Principal Component Pursuit (PCP) convex relaxation of RPCA from the same paper, using the Inexact Augmented Lagrange Multiplier (IALM) method from Lin et al.[[2]](#lin2011)[[3]](#lin2013).
+
+## Example
+```python
+from pyrpca import rpca_pcp_ialm
+
+# given an m x n data matrix
+data = ...
+
+# decide on sparsity factor.
+# this parameter is also commonly known as `lambda`.
+sparsity_factor = 1.0 / numpy.sqrt(max(data.shape))
+
+# run the ialm algorithm.
+low_rank, sparse = rpca_pcp_ialm(data, sparsity_factor)
+```
## Installing
```shell
pip install pyrpca
```
-## Example
-TODO
+## Feature requests and contributing
+Pull requests and feature requests are welcome. The current version is minimal and suits my personal needs, but feel free to make suggestions. Even if the repository looks inactive, I will still respond :)
+
+## References
+1. [Emmanuel J. Candès, Xiaodong Li, Yi Ma, John Wright. Robust principal component analysis? Association for Computing Machinery 2011.](https://doi.org/10.1145/1970392.1970395) (preprint on [arXiv](https://doi.org/10.48550/arXiv.0912.3599))
+2. [Zhouchen Lin, Risheng Liu, Zhixun Su. Linearized Alternating Direction Method with Adaptive Penalty for Low-Rank Representation. arXiv 2011.](https://doi.org/10.48550/arXiv.1109.0367)
+3. [Zhouchen Lin, Minming Chen, Yi Ma. The Augmented Lagrange Multiplier Method for Exact Recovery of Corrupted Low-Rank Matrices, V3. arXiv 2013.](https://doi.org/10.48550/arXiv.1009.5055)
-## Acknowledgements
-Appreciation is due to various other Python implementations of RPCA; below is a non-exhaustive list.
-The code in this project has been loosely inspired by these works.
+## Acknowledgements
+Appreciation is due to various other Python implementations of RPCA that served as inspiration for this project. Below is a non-exhaustive list:
-- https://github.com/2020leon/rpca
-- https://github.com/dganguli/robust-pca
-- https://github.com/weilinear/PyRPCA
+- https://github.com/2020leon/rpca
+- https://github.com/dganguli/robust-pca
+- https://github.com/weilinear/PyRPCA
- https://github.com/loiccoyle/RPCA
diff --git a/pyproject.toml b/pyproject.toml
index c9790b3..da1b54f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,5 @@
[build-system]
-requires = [
- "setuptools >= 78",
- "setuptools-git-versioning >=2.1, <3",
-]
+requires = ["setuptools >= 78", "setuptools-git-versioning >=2.1, <3"]
build-backend = "setuptools.build_meta"
[tool.setuptools-git-versioning]
@@ -10,41 +7,32 @@ enabled = true
[project]
name = "pyrpca"
-description = "Robust principle component analysis for Python."
+description = "Robust principal component analysis for Python."
dynamic = ["version"]
readme = "README.md"
-requires-python = ">=3.9,<4"
+requires-python = ">=3.10,<4"
classifiers = [
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
]
-dependencies = [
- "numpy >=2.0.2, <3",
-]
-keywords = [
- "rpca",
- "robust pca",
- "robust principal component analysis",
-]
-authors = [
- {name = "Aart Stuurman", email = "aart@astuurman.com"},
-]
-maintainers = [
- {name = "Aart Stuurman", email = "aart@astuurman.com"},
-]
-license = {file = "LICENSE"}
+dependencies = ["numpy >= 2.0.2, < 3", "scipy >= 1.15.2"]
+keywords = ["rpca", "robust pca", "robust principal component analysis"]
+authors = [{ name = "Aart Stuurman", email = "aart@astuurman.com" }]
+maintainers = [{ name = "Aart Stuurman", email = "aart@astuurman.com" }]
+license = { file = "LICENSE" }
[project.optional-dependencies]
dev = [
"ruff == 0.11.2",
"mypy == 1.15.0",
"pytest == 8.3.5",
+ "scipy-stubs >= 1.15.2",
]
[project.urls]
homepage = "https://github.com/surgura/PyRPCA"
[tool.setuptools]
-package-dir = {"pyrpca" = "pyrpca"}
+package-dir = { "pyrpca" = "pyrpca" }
[tool.mypy]
strict = true
diff --git a/pyrpca/__init__.py b/pyrpca/__init__.py
index e69de29..573ceb6 100644
--- a/pyrpca/__init__.py
+++ b/pyrpca/__init__.py
@@ -0,0 +1,3 @@
+from .pcp_ialm import rpca_pcp_ialm
+
+__all__ = ["rpca_pcp_ialm"]
diff --git a/pyrpca/pcp_ialm.py b/pyrpca/pcp_ialm.py
new file mode 100644
index 0000000..084d054
--- /dev/null
+++ b/pyrpca/pcp_ialm.py
@@ -0,0 +1,85 @@
+import numpy.typing as npt
+from typing import Tuple
+import numpy as np
+from numpy.linalg import norm
+from scipy.linalg import svd
+
+
+def rpca_pcp_ialm(
+ observations: npt.ArrayLike,
+ sparsity_factor: float,
+ max_iter: int = 1000,
+ mu: float | None = None,
+ mu_upper_bound: float | None = None,
+ rho: float = 1.5,
+ tol: float = 1e-7,
+ verbose: bool = True,
+) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
+ """
+ Solve the Principal Component Pursuit (PCP) convex relaxation of Robust PCA using the Inexact Augmented Lagrange Multiplier (IALM) method.
+
+ See README for algorithmic details and references.
+
+ Mu is updated every loop by multiplying it by `rho` until reaching `mu_upper_bound`.
+
+ Parameters:
+ observations: The m x n input matrix to decompose ('D' in the IALM paper).
+ sparsity_factor: Weight on the sparse term in the objective ('lambda' in the IALM paper).
+ max_iter: Maximum number of iterations to perform.
+ mu: Initial value for the penalty parameter. If None, defaults to 1/spectral norm of observations.
+ mu_upper_bound: Maximum allowed value for `mu`. If None, defaults to `mu * 1e7`.
+ rho: Multiplicative factor to increase `mu` in each iteration.
+ tol: Tolerance for stopping criterion (relative Frobenius norm of the residual).
+ verbose: If True, print status and debug information during optimization.
+
+ Returns:
+ low_rank_component: The recovered low-rank matrix ('A' in the IALM paper).
+ sparse_component: The recovered sparse matrix ('E' in the IALM paper).
+ """
+ if mu is None:
+ mu = float(1.25 / norm(observations, ord=2))
+ if mu_upper_bound is None:
+ mu_upper_bound = mu * 1e7
+
+ norm_fro_obs = norm(observations, ord="fro")
+
+ dual = observations / np.maximum(
+ norm(observations, ord=2), norm(observations, ord=np.inf) / sparsity_factor
+ )
+ sparse = np.zeros_like(observations)
+
+ i = 0
+ while True:
+ # compute next iteration of a
+ u, s, v = svd(observations - sparse + 1.0 / mu * dual, full_matrices=False)
+ s_thresholded = np.maximum(s - 1.0 / mu, 0)
+ low_rank = (u * s_thresholded) @ v
+
+ # compute next iteration of e
+ residual_for_sparse = observations - low_rank + 1.0 / mu * dual
+ sparse = np.sign(residual_for_sparse) * np.maximum(
+ np.abs(residual_for_sparse) - sparsity_factor / mu, 0
+ )
+
+ # calculate error
+ residual = observations - low_rank - sparse
+ err = norm(residual, ord="fro") / norm_fro_obs
+
+ i += 1
+
+ if verbose:
+ print(f"iter {i:<4} | err {err:<25} | mu {mu:<25}")
+
+ if err < tol:
+ if verbose:
+ print("Finished optimization. Error smaller than tolerance.")
+ break
+ if i == max_iter:
+ if verbose:
+ print("Finized optimization. Max iterations reached.")
+ break
+
+ # update dual and mu
+ dual = dual + mu * (residual)
+ mu = min(mu * rho, mu_upper_bound)
+ return low_rank, sparse
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
deleted file mode 100644
index 5514af1..0000000
--- a/tests/test_dummy.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Dummy test file to see the tests run properly.
-
-To be removed after an actual test is added.
-"""
-
-
-def test_dummpy() -> None:
- """
- Dummy test.
- """
- pass
diff --git a/tests/test_pcp_ialm.py b/tests/test_pcp_ialm.py
new file mode 100644
index 0000000..33fb10d
--- /dev/null
+++ b/tests/test_pcp_ialm.py
@@ -0,0 +1,43 @@
+import numpy as np
+from pyrpca import rpca_pcp_ialm
+from numpy.linalg import norm
+from scipy.sparse import random as sparse_random
+
+
+def test_rpca_separates_low_rank_and_sparse():
+ np.random.seed(0)
+ m, n, rank = 500, 400, 5
+
+ # create low-rank matrix A
+ u = np.random.randn(m, rank)
+ v = np.random.randn(rank, n)
+ low_rank = u @ v
+
+ # create sparse matrix E
+ sparse = sparse_random(
+ m, n, density=0.1, format="csr", data_rvs=np.random.randn
+ ).toarray()
+
+ # create observation matrix
+ observations = low_rank + sparse
+
+ # Run RPCA
+ low_rank_recovered, sparse_recovered = rpca_pcp_ialm(
+ observations,
+ sparsity_factor=1.0 / np.sqrt(max(observations.shape)),
+ )
+
+ # check that the reconstruction is close
+ reconstruction_error = norm(
+ observations - (low_rank_recovered + sparse_recovered), ord="fro"
+ ) / norm(observations, ord="fro")
+ assert reconstruction_error < 1e-6, (
+ f"Reconstruction error too high: {reconstruction_error}"
+ )
+
+ # check that recovered matrices are low rank and sparse
+ approx_rank = np.linalg.matrix_rank(low_rank_recovered, tol=1e-3)
+ sparsity = np.count_nonzero(sparse_recovered) / sparse_recovered.size
+
+ assert approx_rank <= rank + 2, f"Recovered A not low rank: {approx_rank}"
+ assert sparsity < 0.2, f"Recovered E not sparse: sparsity={sparsity}"