Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
bb6d8d8
Add ECG, MGAB, and MITDB datasets from the tsb-uad benchmark
Jad-yehya Aug 1, 2025
95deadb
FIX reshape y_test and y_hat in evaluate_result method; update get_ob…
Jad-yehya Aug 1, 2025
3e53d19
ADD implement Matrix Profile solver
Jad-yehya Aug 1, 2025
4952aca
ADD implement TSB-OCSVM
Jad-yehya Aug 1, 2025
3a0ab4c
ADD Autoencoder for univariate data (inspired from tsb-uad)
Jad-yehya Aug 1, 2025
2cdf79a
ADD RoseCDL solver
Jad-yehya Aug 1, 2025
f6f55ea
ADD DAPHNET dataset
Jad-yehya Aug 1, 2025
4bd99f5
ADD Dodgers dataset
Jad-yehya Aug 1, 2025
8d26697
ADD Genesis dataset
Jad-yehya Aug 1, 2025
95c9a99
ADD GHL dataset
Jad-yehya Aug 1, 2025
01ed75b
ADD Occupancy dataset
Jad-yehya Aug 1, 2025
dfa9fed
ADD SensorScope dataset
Jad-yehya Aug 1, 2025
e05fd97
ADD SensorScope dataset
Jad-yehya Aug 1, 2025
4118e06
Fix formatting
Jad-yehya Aug 1, 2025
30491fe
Fix formatting
Jad-yehya Aug 1, 2025
ae5c1c2
New datasets
Jad-yehya Aug 1, 2025
4ff164e
Refactor datasets and new solvers.
Jad-yehya Sep 15, 2025
fef8251
moved legacy solvers to solvers/legacy
Jad-yehya Sep 22, 2025
ff9e80a
delete duplicate solvers
Jad-yehya Sep 22, 2025
9226905
RFC dataset loading and reshaping for anomaly detection
Jad-yehya Dec 5, 2025
d3c3f24
solvers lint
Jad-yehya Dec 6, 2025
32e701c
Remove TSB-OCSVM solver implementation
Jad-yehya Dec 6, 2025
365875a
lint + removed plotting
Jad-yehya Dec 6, 2025
8e2921d
datasets lint
Jad-yehya Dec 6, 2025
5b4730b
lint and clean objective
Jad-yehya Dec 6, 2025
bc1fd65
lint test_config.py
Jad-yehya Dec 6, 2025
c4042a4
lint
Jad-yehya Dec 6, 2025
023d396
commented unsupported feature
Jad-yehya Dec 6, 2025
1cc55b6
FIX skip test with proper syntax
tomMoral Dec 7, 2025
f7b9e74
FIX test running on get_data
tomMoral Dec 7, 2025
3c64f1c
xfailing anomalybert installation
Jad-yehya Dec 7, 2025
eb61355
lint test_config
Jad-yehya Dec 7, 2025
09cdbb0
CLN remove safe_import_context + adapt pip
tomMoral Dec 7, 2025
b89e3b5
ENH improve tests
tomMoral Dec 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 248 additions & 0 deletions benchmark_utils/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
from torch import nn
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from tqdm import tqdm


class ARModel(nn.Module):
Expand Down Expand Up @@ -122,3 +128,245 @@ def forward(self, x):
x, (_, _) = self.decoder(x)

return x


class SlidingWindowDataset(Dataset):
def __init__(self, data, window_size):
self.data = data
self.window_size = window_size

def __len__(self):
return len(self.data) - self.window_size + 1

def __getitem__(self, idx):
window = self.data[idx:idx + self.window_size]
return window # Input and target are the same for autoencoder


class Autoencoder(nn.Module):
def __init__(
self,
input_size=32,
hidden_size=32,
latent_size=16,
sliding_window=10
):
super(Autoencoder, self).__init__()

self.sliding_window = sliding_window
self.decision_scores_ = None

# Encoder
self.encoder = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, latent_size),
nn.ReLU(),
nn.BatchNorm1d(latent_size),
)

# Decoder
self.decoder = nn.Sequential(
nn.Linear(latent_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, input_size),
nn.ReLU(),
)

def forward(self, x):
# Flatten input if needed
x = x.view(x.size(0), -1)

# Encode
encoded = self.encoder(x)

# Decode
decoded = self.decoder(encoded)

return decoded

def encode(self, x):
x = x.view(x.size(0), -1)
return self.encoder(x)

def _create_sliding_windows(self, X):
"""Create sliding windows from input data"""
if isinstance(X, np.ndarray):
X = torch.from_numpy(X).float()

# If X is 1D, reshape to 2D
if X.dim() == 1:
X = X.unsqueeze(1)

windows = []
for i in range(len(X) - self.sliding_window + 1):
window = X[i:i + self.sliding_window].flatten()
windows.append(window)

return torch.stack(windows)

def fit(
self,
X,
num_epochs=50,
learning_rate=1e-3,
device=None,
batch_size=32
):
"""
Train the autoencoder on the provided data.

Args:
X: Input data tensor or numpy array shape (n_samples, n_features)
num_epochs: Number of training epochs
learning_rate: Learning rate for optimizer
device: Device to train on ('cuda' or 'cpu')
batch_size: Batch size for training

Returns:
List of training losses per epoch
"""
if device is None:
device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
)

# Convert to tensor if numpy array
if isinstance(X, np.ndarray):
X = torch.from_numpy(X).float()

# Ensure X is 2D
if X.dim() == 1:
X = X.unsqueeze(1)
if X.dim() == 3:
# (n_samples, n_timesteps, n_features)
X = X.view(-1, 1)

# Create sliding windows
windowed_data = self._create_sliding_windows(X)

# Create dataset and dataloader
# window_size=1 since we already created windows
dataset = SlidingWindowDataset(windowed_data, window_size=1)
dataloader = DataLoader(
dataset, batch_size=batch_size, shuffle=True, drop_last=True)

self.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(self.parameters(), lr=learning_rate)

self.train()
losses = []

# Progress bar for epochs
epoch_pbar = tqdm(range(num_epochs), desc="Training", unit="epoch")

for epoch in epoch_pbar:
epoch_loss = 0.0

# Progress bar for batches
batch_pbar = tqdm(
dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

for batch_idx, (data) in enumerate(batch_pbar):
data = data.to(device)

# Forward pass
output = self(data)
loss = criterion(output, data)

# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()

epoch_loss += loss.item()

# Update batch progress bar
batch_pbar.set_postfix({"Batch Loss": f"{loss.item():.4f}"})

avg_loss = epoch_loss / len(dataloader)
losses.append(avg_loss)

# Update epoch progress bar
epoch_pbar.set_postfix({"Avg Loss": f"{avg_loss:.4f}"})

return losses

def predict(self, X_test, X_dirty=None, device="cuda"):
"""
Predict anomaly scores for time series data.

Args:
X_test: Test data for reconstruction
X_dirty: Original dirty data (if None, uses X_test)
device: Device to run inference on

Returns:
Reconstructed data and sets decision_scores_ attribute
"""
self.eval()
self.to(device)

# Create sliding windows for test data
if isinstance(X_test, np.ndarray):
X_test = torch.from_numpy(X_test).float()

windowed_test = self._create_sliding_windows(X_test)
windowed_test = windowed_test.to(device)

with torch.no_grad():
test_predict = self(windowed_test).cpu().numpy()

# Calculate MAE loss
test_mae_loss = np.mean(
np.abs(test_predict - windowed_test.cpu().numpy()), axis=1)

# Normalize MAE loss
nor_test_mae_loss = MinMaxScaler().fit_transform(
test_mae_loss.reshape(-1, 1)).ravel()

# Use X_dirty if provided, otherwise use original X_test
if X_dirty is None:
if isinstance(X_test, torch.Tensor):
X_dirty = X_test.cpu().numpy()
else:
X_dirty = X_test

# Initialize score array
score = np.zeros(len(X_dirty))

# Fill the score array with sliding window approach
score[self.sliding_window // 2:self.sliding_window //
2 + len(test_mae_loss)] = nor_test_mae_loss
score[:self.sliding_window // 2] = nor_test_mae_loss[0]
score[self.sliding_window // 2 +
len(test_mae_loss):] = nor_test_mae_loss[-1]

# Store decision scores
self.decision_scores_ = score

return test_predict

def encode_data(self, x, device="cuda"):
"""
Encode input data to latent representation.

Args:
x: Input tensor or numpy array
device: Device to run inference on

Returns:
Encoded data as numpy array
"""
self.eval()
self.to(device)

# Convert to tensor if numpy array
if isinstance(x, np.ndarray):
x = torch.from_numpy(x).float()
x = x.to(device)
with torch.no_grad():
encoded = self.encode(x)
return encoded.cpu().numpy()
113 changes: 113 additions & 0 deletions benchmark_utils/windowing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np
import torch
from torch.utils.data import TensorDataset


def make_windows(X, window_size=32, stride=1, padding=False):
"""Create a windowed view of the data.

Parameters
----------
X : np.ndarray
Input data of shape (n_samples, n_features, n_times).
window_size : int
Size of the sliding window.
stride : int
Stride of the sliding window.

Returns
-------
windows : np.ndarray
A windowed view of the data in shape:
(n_eff_samples, window_size, n_features)
"""

if padding:
n_samples, n_features, n_times = X.shape
n_pad = (window_size - stride + n_times % stride) % stride
pad_width = ((0, 0), (0, 0), (0, n_pad))
X = np.pad(X, pad_width=pad_width, mode='constant')

return np.lib.stride_tricks.sliding_window_view(
X, window_shape=window_size, axis=-1
)[..., ::stride, :].transpose(0, 2, 1, 3).reshape(
-1, X.shape[1], window_size
).transpose(0, 2, 1)


def make_windowed_dataset(X, y=None, window_size=32, stride=1):
"""
Create a DataLoader with windowed views of the data.

Parameters
----------
X : np.ndarray
Input data of shape (n_samples, n_features, n_times).
y : np.ndarray, optional
Target data of shape (n_samples, n_times).
window_size : int
Size of the sliding window.
stride : int
Stride of the sliding window.

Returns
-------
Dataset
A PyTorch Dataset with windowed data in shape:
(n_eff_samples, window_size, n_features)
"""

if window_size is not None:
X = make_windows(X, window_size, stride)

X_tensor = torch.tensor(X, dtype=torch.float32)

if y is not None:
if window_size is not None:
y = np.lib.stride_tricks.sliding_window_view(
y, window_shape=window_size, axis=-1
)[..., ::stride, :].reshape(-1, window_size)

y_tensor = torch.tensor(y, dtype=torch.float32)
dataset = TensorDataset(X_tensor, y_tensor)
else:
dataset = TensorDataset(X_tensor)

return dataset


def reconstruct_from_windows(windows, stride, batch, n_features):
"""Reconstruct the original signal from overlapping windows

Parameters
----------
windows : np.ndarray
The overlapping windows of shape (batch*n_windows, window_size, n_features)
stride : int
The stride used to create the windows
batch : int
The batch size used when creating the windows
n_features : int
The number of features in the original signal
"""
# windows: (batch*n_windows, window_size, n_features)
w = windows.shape[1]
windows = windows.reshape(batch, -1, w, n_features)
b, nw, ws, nf = windows.shape
nt = (nw - 1) * stride + ws

# allocate accumulator + counts for correct overlap averaging
acc = np.zeros((b, nf, nt))
cnt = np.zeros((nt,), dtype=int)

# build index map for overlap positions
idx = np.arange(ws)[:, None] + stride * np.arange(nw)

# add windows efficiently
np.add.at(acc, (slice(None), slice(None), idx.ravel()),
windows.transpose(0, 3, 1, 2).reshape(b, nf, -1))

# count contributions
np.add.at(cnt, idx.ravel(), 1)

return acc / cnt
Loading
Loading