DsysDML · nbereux · Oct 17, 2025 · Oct 17, 2025 · Nov 4, 2025 · Nov 17, 2025
diff --git a/.github/workflows/codecov.yaml b/.github/workflows/codecov.yaml
@@ -15,7 +15,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: 3.14
+          python-version: 3.13
 
       - name: Install test dependencies
         run: pip install pytest pytest-cov

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.12, 3.13, 3.14]
+        python-version: [3.12, 3.13]
     steps:
       - name: Checkout
         uses: actions/checkout@v4

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "rbms"
-version = "0.5.0"
+version = "0.6.0"
 authors = [
   {name="Nicolas Béreux", email="nicolas.bereux@gmail.com"},
   {name="Aurélien Decelle"},
@@ -19,12 +19,12 @@ maintainers = [
 ]
 description = "Training and analyzing Restricted Boltzmann Machines in PyTorch"
 readme = "README.md"
-requires-python = ">=3.12"
+requires-python = ">=3.12, <3.14"
 dependencies = [
   "h5py>=3.12.0",
   "numpy>=2.0.0",
   "matplotlib>=3.8.0",
-  "torch>=2.5.0",
+  "torch>=2.6.0", 
   "tqdm>=4.65.0",
 ]
 

diff --git a/rbms/__init__.py b/rbms/__init__.py
@@ -0,0 +1,42 @@
+from rbms.bernoulli_bernoulli.classes import BBRBM
+from rbms.bernoulli_gaussian.classes import BGRBM
+from rbms.dataset import load_dataset
+from rbms.dataset.utils import convert_data
+from rbms.io import load_model, load_params
+from rbms.ising_ising.classes import IIRBM
+from rbms.map_model import map_model
+from rbms.plot import plot_image, plot_mult_PCA
+from rbms.potts_bernoulli.classes import PBRBM
+from rbms.utils import (
+    bernoulli_to_ising,
+    compute_log_likelihood,
+    get_categorical_configurations,
+    get_eigenvalues_history,
+    get_flagged_updates,
+    get_saved_updates,
+    ising_to_bernoulli,
+)
+
+__all__ = [
+    BBRBM,
+    BGRBM,
+    IIRBM,
+    PBRBM,
+    map_model,
+    bernoulli_to_ising,
+    ising_to_bernoulli,
+    compute_log_likelihood,
+    get_eigenvalues_history,
+    get_saved_updates,
+    get_flagged_updates,
+    get_categorical_configurations,
+    plot_mult_PCA,
+    plot_image,
+    load_params,
+    load_model,
+    load_dataset,
+    convert_data,
+]
+
+
+__version__ = "0.5.1"
diff --git a/rbms/bernoulli_bernoulli/__init__.py b/rbms/bernoulli_bernoulli/__init__.py
@@ -1,3 +1,12 @@
 # ruff: noqa
 from rbms.bernoulli_bernoulli.classes import BBRBM
-from rbms.bernoulli_bernoulli.functional import *
+from rbms.bernoulli_bernoulli.functional import (
+    compute_energy,
+    compute_energy_hiddens,
+    compute_energy_visibles,
+    compute_gradient,
+    init_chains,
+    init_parameters,
+    sample_hiddens,
+    sample_visibles,
+)
diff --git a/rbms/bernoulli_bernoulli/classes.py b/rbms/bernoulli_bernoulli/classes.py
@@ -19,6 +19,8 @@
 
 class BBRBM(RBM):
     """Parameters of the Bernoulli-Bernoulli RBM"""
+
+    visible_type: str = "bernoulli"
 
     def __init__(
         self,

diff --git a/rbms/bernoulli_bernoulli/implement.py b/rbms/bernoulli_bernoulli/implement.py
@@ -1,6 +1,5 @@
 import torch
 from torch import Tensor
-from torch.nn.functional import softmax
 
 
 @torch.jit.script
@@ -77,7 +76,7 @@ def _compute_gradient(
     w_data = w_data.view(-1, 1)
     w_chain = w_chain.view(-1, 1)
     # Turn the weights of the chains into normalized weights
-    chain_weights = softmax(-w_chain, dim=0)
+    chain_weights = w_chain / w_chain.sum()
     w_data_norm = w_data.sum()
 
     # Averages over data and generated samples
@@ -102,11 +101,6 @@ def _compute_gradient(
         grad_vbias = v_data_mean - v_gen_mean - (grad_weight_matrix @ h_data_mean)
         grad_hbias = h_data_mean - h_gen_mean - (v_data_mean @ grad_weight_matrix)
     else:
-        v_data_centered = v_data
-        h_data_centered = mh_data
-        v_gen_centered = v_chain
-        h_gen_centered = h_chain
-
         # Gradient
         grad_weight_matrix = ((v_data * w_data).T @ mh_data) / w_data_norm - (
             (v_chain * chain_weights).T @ h_chain

diff --git a/rbms/bernoulli_gaussian/__init__.py b/rbms/bernoulli_gaussian/__init__.py
@@ -0,0 +1,12 @@
+# ruff: noqa
+from rbms.bernoulli_gaussian.classes import BGRBM
+from rbms.bernoulli_gaussian.functional import (
+    compute_energy,
+    compute_energy_hiddens,
+    compute_energy_visibles,
+    compute_gradient,
+    init_chains,
+    init_parameters,
+    sample_hiddens,
+    sample_visibles,
+)
diff --git a/rbms/bernoulli_gaussian/classes.py b/rbms/bernoulli_gaussian/classes.py
@@ -19,6 +19,8 @@
 class BGRBM(RBM):
     """Bernoulli-Gaussian RBM with fixed hidden variance = 1/Nv, 0-1 visibles, hidden and visible biases"""
 
+    visible_type: str = "bernoulli"
+
     def __init__(
         self,
         weight_matrix: Tensor,

diff --git a/rbms/bernoulli_gaussian/implement.py b/rbms/bernoulli_gaussian/implement.py
@@ -1,6 +1,5 @@
 import torch
 from torch import Tensor
-from torch.nn.functional import softmax
 
 
 @torch.jit.script
@@ -84,7 +83,7 @@ def _compute_gradient(
 ) -> None:
     w_data = w_data.view(-1, 1)
     w_chain = w_chain.view(-1, 1)
-    chain_weights = softmax(-w_chain, dim=0)
+    chain_weights = w_chain / w_chain.sum()
     w_data_norm = w_data.sum()
 
     v_data_mean = (v_data * w_data).sum(0) / w_data_norm
@@ -108,11 +107,6 @@ def _compute_gradient(
         grad_vbias = v_data_mean - v_gen_mean - (grad_weight_matrix @ h_data_mean)
         grad_hbias = h_data_mean - h_gen_mean - (v_data_mean @ grad_weight_matrix)
     else:
-        v_data_centered = v_data
-        h_data_centered = h_data
-        v_gen_centered = v_chain
-        h_gen_centered = h_chain
-
         # Gradient: h_data instead of mh_data
         grad_weight_matrix = ((v_data * w_data).T @ h_data) / w_data_norm - (
             (v_chain * chain_weights).T @ h_chain

diff --git a/rbms/classes.py b/rbms/classes.py
@@ -14,6 +14,7 @@ class EBM(ABC):
 
     name: str
     device: torch.device
+    visible_type: str
 
     @abstractmethod
     def __init__(self): ...
@@ -28,6 +29,13 @@ def __mul__(self, other: float) -> EBM:
         """Multiplies the parameters of the RBM by a float."""
         ...
 
+    def __eq__(self, other: EBM):
+        other_params = other.named_parameters()
+        for k, v in self.named_parameters().items():
+            if not torch.equal(other_params[k], v):
+                return False
+        return True
+
     @abstractmethod
     def sample_visibles(
         self, chains: dict[str, Tensor], beta: float = 1.0
@@ -209,12 +217,22 @@ def init_grad(self) -> None:
         for p in self.parameters():
             p.grad = torch.zeros_like(p)
 
+    @torch.compile
     def normalize_grad(self) -> None:
         norm_grad = torch.sqrt(
             torch.sum(torch.tensor([p.grad.square().sum() for p in self.parameters()]))
         )
         for p in self.parameters():
             p.grad /= norm_grad
+        # for p in self.parameters():
+        #     p.grad /= p.grad.norm()
+
+    def clip_grad(self, max_norm=5):
+        for p in self.parameters():
+            grad_norm = p.grad.norm()
+            if grad_norm > max_norm:
+                p.grad /= grad_norm
+                p.grad *= max_norm
 
 
 class RBM(EBM):

diff --git a/rbms/correlations.py b/rbms/correlations.py
@@ -48,10 +48,8 @@ def compute_2b_correlations(
         )
         if full_mat:
             res = torch.triu(res, 1) + torch.tril(res).T
-        return res / torch.sqrt(
-            torch.diag(res).unsqueeze(1) @ torch.diag(res).unsqueeze(0)
-        )
-    return torch.corrcoef(data)
+        return res #/ torch.sqrt(torch.diag(res).unsqueeze(1) @ torch.diag(res).unsqueeze(0))
+    return torch.corrcoef(data.T)
 
 
 @torch.jit.script
@@ -102,7 +100,7 @@ def compute_3b_correlations(
         res = _3b_batched(
             centered_data=centered_data,
             weights=weights.unsqueeze(1),
-            batcu_size=batch_size,
+            batch_size=batch_size,
         )
         if full_mat:
             res = _3b_full_mat(res)

diff --git a/rbms/dataset/__init__.py b/rbms/dataset/__init__.py
@@ -15,6 +15,7 @@ def load_dataset(
     subset_labels: list[int] | None = None,
     use_weights: bool = False,
     alphabet="protein",
+    remove_duplicates: bool = False,
     device: str = "cpu",
     dtype: torch.dtype = torch.float32,
 ) -> tuple[RBMDataset, RBMDataset | None]:
@@ -54,10 +55,15 @@ def load_dataset(
             if labels is None:
                 labels = -np.ones(data.shape[0])
 
-            # Remove duplicates and internally shuffle the dataset
-            unique_ind = get_unique_indices(torch.from_numpy(data)).cpu().numpy()
+            if remove_duplicates:
+                # Remove duplicates and internally shuffle the dataset
+                unique_ind = get_unique_indices(torch.from_numpy(data)).cpu().numpy()
+            else:
+                unique_ind = np.arange(data.shape[0])
 
             idx = torch.randperm(unique_ind.shape[0])
+            if unique_ind.shape[0] < data.shape[0]:
+                print(f"N_samples: {data.shape[0]} -> {unique_ind.shape[0]}")
             data = data[unique_ind[idx]]
             labels = labels[unique_ind[idx]]
             weights = weights[unique_ind[idx]]

diff --git a/rbms/dataset/dataset_class.py b/rbms/dataset/dataset_class.py
@@ -6,7 +6,7 @@
 import numpy as np
 import torch
 from torch.utils.data import Dataset
-from tqdm import tqdm
+from tqdm.autonotebook import tqdm
 
 
 class RBMDataset(Dataset):
@@ -126,6 +126,13 @@ def get_gzip_entropy(self, mean_size: int = 50, num_samples: int = 100):
 
     def match_model_variable_type(self, visible_type: str):
         self.data = convert_data[self.variable_type][visible_type](self.data)
+        if self.variable_type != visible_type:
+            print(f"Converting from '{self.variable_type}' to '{visible_type}'")
+            print(self.data)
+        self.variable_type = visible_type
+
+    def astype(self, target_variable_type: str):
+        return convert_data[self.variable_type][target_variable_type](self.data)
 
     def split_train_test(
         self,
@@ -173,3 +180,13 @@ def split_train_test(
                 dtype=self.dtype,
             )
         return train_dataset, test_dataset
+
+    def batch(self, batch_size: int) -> dict[str, Union[np.ndarray, torch.Tensor]]:
+        rand_idx = torch.randperm(len(self))
+        sampled_batch = self[rand_idx[:batch_size]]
+        match self.variable_type:
+            case "bernoulli":
+                sampled_batch["data"] = torch.bernoulli(sampled_batch["data"])
+            case _:
+                pass
+        return sampled_batch
diff --git a/rbms/dataset/load_h5.py b/rbms/dataset/load_h5.py
@@ -19,7 +19,7 @@ def load_HDF5(
         Tuple[np.ndarray, np.ndarray]: The dataset and labels.
     """
     labels = None
-    variable_type = "binary"
+    variable_type = "bernoulli"
     with h5py.File(filename, "r") as f:
         if "samples" not in f.keys():
             raise ValueError(
@@ -28,10 +28,10 @@ def load_HDF5(
         dataset = np.array(f["samples"][()])
         if "variable_type" not in f.keys():
             print(
-                f"No variable_type found in the hdf5 file keys: {f.keys()}. Assuming 'binary'."
+                f"No variable_type found in the hdf5 file keys: {f.keys()}. Assuming 'bernoulli'."
             )
             print(
-                "Set a 'variable_type' with value 'binary', 'categorical' or 'continuous' in the hdf5 archive to remove this message"
+                "Set a 'variable_type' with value 'bernoulli', 'ising', 'categorical' or 'continuous' in the hdf5 archive to remove this message"
             )
         else:
             variable_type = f["variable_type"][()].decode()

diff --git a/rbms/dataset/parser.py b/rbms/dataset/parser.py
@@ -51,6 +51,12 @@ def add_args_dataset(parser: argparse.ArgumentParser) -> argparse.ArgumentParser
         default="protein",
         help="(Defaults to protein). Type of encoding for the sequences. Choose among ['protein', 'rna', 'dna'] or a user-defined string of tokens.",
     )
+    dataset_args.add_argument(
+        "--remove_duplicates",
+        default=False,
+        action="store_true",
+        help="Remove duplicates from the dataset before splitting.",
+    )
     dataset_args.add_argument(
         "--seed",
         default=None,