AEFDI · roelofsc · Jan 13, 2026 · Jan 8, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ results = fault_detector.predict(sensor_data=test_sensor_data)
 
 The pandas `DataFrame` `sensor_data` contains the operational data in wide format with the timestamp as index, the
 pandas `Series` `normal_index` indicates which timestamps are considered 'normal' operation and can be used to create
-a normal behaviour model. The [`base_config.yaml`](energy_fault_detector/base_config.yaml) file contains all model 
+a normal behaviour model. The [`base_config.yaml`](energy_fault_detector/base_config.yaml) file contains the model 
 settings, an example is found [here](energy_fault_detector/base_config.yaml).
 
 
@@ -100,12 +100,17 @@ This project is licensed under the [MIT License](./LICENSE).
 ## References
 If you use this work, please cite us:
 
+**Fault detection in district heating substations**:
+- Enabling Predictive Maintenance in District Heating Substations: A Labelled Dataset and Fault Detection Evaluation Framework based on Service Data. 
+PrePrint on ArXiv. https://doi.org/10.48550/arXiv.2511.14791
+- Dataset: PreDist Dataset - Operational data of district heating substations labelled with faults and maintenance information. Zenodo, Nov 2025, https://doi.org/10.5281/zenodo.17522254.
+
 **ARCANA Algorithm**:
 Autoencoder-based anomaly root cause analysis for wind turbines. Energy and AI. 2021;4:100065. https://doi.org/10.1016/j.egyai.2021.100065
 
 **CARE to Compare dataset and CARE-Score**:
 - Paper: CARE to Compare: A Real-World Benchmark Dataset for Early Fault Detection in Wind Turbine Data. Data. 2024; 9(12):138. https://doi.org/10.3390/data9120138 
-- Dataset: Wind Turbine SCADA Data For Early Fault Detection. Zenodo, Mar. 2025, https://doi.org/10.5281/ZENODO.14958989.
+- Dataset: Wind Turbine SCADA Data For Early Fault Detection. Zenodo, Oct. 2024, https://doi.org/10.5281/ZENODO.14958989.
 
 **Transfer learning methods**:
 Transfer learning applications for autoencoder-based anomaly detection in wind turbines. Energy and AI. 2024;17:100373. https://doi.org/10.1016/j.egyai.2024.100373

diff --git a/energy_fault_detector/config/config.py b/energy_fault_detector/config/config.py
@@ -98,6 +98,7 @@
     'train': {'type': 'dict', 'schema': TRAIN_SCHEMA, 'required': False, 'allow_unknown': True},
     'predict': {'type': 'dict', 'schema': PREDICT_SCHEMA, 'required': False},
     'root_cause_analysis': {'type': 'dict', 'schema': ROOT_CAUSE_ANALYSIS_SCHEMA, 'required': False},
+    'dtype': {'type': 'string', 'required': False, 'allowed': ['float32', 'float64']}
 }
 
 
@@ -203,3 +204,8 @@ def fit_threshold_on_val(self) -> bool:
     def verbose(self) -> int:
         """Verbosity Level of the Autoencoder."""
         return self.config_dict.get('train', {}).get('autoencoder', {}).get('verbose', 1)
+
+    @property
+    def dtype(self):
+        """Data type, float32 by default."""
+        return self.config_dict.get('dtype', 'float32')
diff --git a/energy_fault_detector/core/__init__.py b/energy_fault_detector/core/__init__.py
@@ -1,7 +1,8 @@
 """This module contains class templates for most of the anomaly detection classes, such as
 autoencoders, anomaly scores, threshold selectors and data classes."""
 
-from energy_fault_detector.core.anomaly_score import AnomalyScore
-from energy_fault_detector.core.autoencoder import Autoencoder
-from energy_fault_detector.core.data_transformer import DataTransformer
-from energy_fault_detector.core.threshold_selector import ThresholdSelector
+from .anomaly_score import AnomalyScore
+from .autoencoder import Autoencoder
+from .data_transformer import DataTransformer
+from .threshold_selector import ThresholdSelector
+from .fault_detection_result import FaultDetectionResult, ModelMetadata
diff --git a/energy_fault_detector/_logs.py → energy_fault_detector/core/_logs.py b/energy_fault_detector/_logs.py → energy_fault_detector/core/_logs.py
@@ -1,34 +1,35 @@
 """Logging settings"""
 
 import os
+from pathlib import Path
 import logging.config as logging_config
 
 import yaml
 
 
-def setup_logging(default_path: str = 'logging.yaml', env_key: str = 'LOG_CFG') -> None:
+def setup_logging(default_path: str | Path = 'logging.yaml', env_key: str = 'LOG_CFG') -> None:
     """Setup logging configuration
 
     Args:
-        default_path (str): default logging configuration file. Default is 'logging.yaml'
+        default_path (str or Path): default logging configuration file. Default is 'logging.yaml'
         env_key (str): Environment variable holding logging config file path (overrides default_path). Default is
             'LOG_CFG'
     """
 
-    path = default_path
+    path = Path(default_path)
     value = os.getenv(env_key, None)
     if value:
-        path = value
+        path = Path(value)
 
     try:
         with open(path, 'rt', encoding='utf-8') as f:
             config = yaml.safe_load(f.read())
             # check paths exist or create them:
             for _, handler in config['handlers'].items():
-                if handler.get('filename'):
-                    dirname = os.path.dirname(handler['filename'])
-                    if dirname != '' and not os.path.exists(dirname):
-                        os.makedirs(dirname)
+                filename = handler.get('filename')
+                if filename:
+                    # Resolve path and create parent directories if they don't exist
+                    Path(filename).parent.mkdir(parents=True, exist_ok=True)
 
         logging_config.dictConfig(config)
     except Exception as e:

diff --git a/energy_fault_detector/core/fault_detection_model.py b/energy_fault_detector/core/fault_detection_model.py
@@ -2,9 +2,10 @@
 
 import os
 from abc import ABC, abstractmethod
-from typing import Any, Optional, Union, List, Tuple
+from typing import Optional, Union, List, Tuple
 import logging
 from datetime import datetime
+from pathlib import Path
 
 import pandas as pd
 import numpy as np
@@ -16,10 +17,10 @@
 from energy_fault_detector.core.model_factory import ModelFactory
 from energy_fault_detector.core.fault_detection_result import ModelMetadata, FaultDetectionResult
 from energy_fault_detector.data_preprocessing import DataPreprocessor
-from energy_fault_detector._logs import setup_logging
+from energy_fault_detector.core._logs import setup_logging
 from energy_fault_detector.data_splitting.data_splitter import BlockDataSplitter
 
-setup_logging(os.path.join(os.path.dirname(__file__), '..', 'logging.yaml'))
+setup_logging(Path(__file__).parent.parent / 'logging.yaml')
 logger = logging.getLogger('energy_fault_detector')
 
 DATA_PREP_DIR = 'data_preprocessor'
@@ -28,6 +29,8 @@
 SCORE_DIR = 'anomaly_score'
 
 DataType = Union[pd.DataFrame, np.ndarray, List]
+PathLike = Union[str, Path]
+ModelPart = Union[DataPreprocessor, Autoencoder, AnomalyScore, ThresholdSelector]
 
 
 class NoTrainingData(Exception):
@@ -50,9 +53,9 @@ class FaultDetectionModel(ABC):
         save_timestamps: a list of string timestamps, indicating when the model was saved.
     """
 
-    def __init__(self, config: Optional[Config] = None, model_directory: str = 'models'):
+    def __init__(self, config: Optional[Config] = None, model_directory: PathLike = 'models'):
         self.config: Optional[Config] = config
-        self.model_directory: str = model_directory
+        self.model_directory: PathLike = model_directory
 
         self.anomaly_score: Optional[AnomalyScore] = None
         self.autoencoder: Optional[Autoencoder] = None
@@ -191,11 +194,11 @@ def save_models(self, model_name: Union[str, int] = None, overwrite: bool = Fals
 
         return os.path.abspath(model_dir), current_datetime
 
-    def load_models(self, model_path: str) -> None:
+    def load_models(self, model_path: PathLike) -> None:
         """Load saved models given the model path.
 
         Args:
-            model_path: Path to the model files.
+            model_path (str, Path): Path to the model files.
         """
 
         data_prep_dir = os.path.join(model_path, DATA_PREP_DIR)
@@ -221,7 +224,7 @@ def load_models(self, model_path: str) -> None:
             self._model_factory = ModelFactory(self.config)
 
     @staticmethod
-    def _load_pickled_model(model_type: str, model_directory: str):
+    def _load_pickled_model(model_type: str, model_directory: str) -> ModelPart:
         """Load a pickled model of given type, using file name (which is the class name)."""
         model_class_name = os.listdir(model_directory)[0].split('.')[0]
         if model_type != 'data_preprocessor':

diff --git a/energy_fault_detector/core/fault_detection_result.py b/energy_fault_detector/core/fault_detection_result.py
@@ -1,11 +1,13 @@
 
-import os
 from typing import Optional, List
 from dataclasses import dataclass
+from pathlib import Path
 
 import pandas as pd
 import numpy as np
 
+from ..utils.analysis import calculate_criticality
+
 
 @dataclass
 class FaultDetectionResult:
@@ -27,43 +29,109 @@ class FaultDetectionResult:
     """DataFrame with ARCANA results (ARCANA bias). None if ARCANA was not run."""
 
     arcana_losses: Optional[pd.DataFrame] = None
-    """DataFrame containing recorded values for all losses in ARCANA. None if ARCANA was not run."""
+    """DataFrame containing recorded values for all losses in ARCANA. None if ARCANA was not run.
+       Empty if losses were not tracked."""
 
     tracked_bias: Optional[List[pd.DataFrame]] = None
-    """List of DataFrames containing the ARCANA bias every 50th iteration. None if ARCANA was not run."""
+    """List of DataFrames containing the ARCANA bias every 50th iteration. None if ARCANA was not run.
+       Empty if bias was not tracked."""
+
+    def criticality(self, normal_idx: pd.Series | None = None, init_criticality: int = 0, max_criticality: int = 1000
+                    ) -> pd.Series:
+        """Criticality based on the predicted anomalies.
+
+        Args:
+            normal_idx (pd.Series, optional): A pandas Series with boolean values indicating normal operation, indexed
+                by timestamp. Ignored if None.
+            init_criticality (int, optional): The initial criticality value. Defaults to 0.
+            max_criticality (int, optional): The maximum criticality value. Defaults to 1000.
+
+        """
+        return calculate_criticality(self.predicted_anomalies, normal_idx, init_criticality, max_criticality)
 
-    def save(self, directory: str, **kwargs) -> None:
+    def save(self, directory: str | Path, **kwargs) -> None:
         """Saves the results to CSV files in the specified directory.
 
         Args:
             directory (str): The directory where the CSV files will be saved.
-            kwargs: other keywords args for `pd.DataFrame.to_csv`
+            kwargs: other keywords args for `pd.DataFrame.to_csv` (i.e. sep=',')
         """
         # Ensure the directory exists
-        os.makedirs(directory, exist_ok=True)
+        directory = Path(directory)
+        directory.mkdir(exist_ok=True, parents=True)
 
         # Save each DataFrame as a CSV file
-        self.predicted_anomalies.to_csv(os.path.join(directory, 'predicted_anomalies.csv'), **kwargs)
-        self.reconstruction.to_csv(os.path.join(directory, 'reconstruction.csv'), **kwargs)
-        self.recon_error.to_csv(os.path.join(directory, 'reconstruction_errors.csv'), **kwargs)
-        self.anomaly_score.to_csv(os.path.join(directory, 'anomaly_scores.csv'), **kwargs)
+        self.predicted_anomalies.to_csv(directory / 'predicted_anomalies.csv', **kwargs)
+        self.reconstruction.to_csv(directory / 'reconstruction.csv', **kwargs)
+        self.recon_error.to_csv(directory / 'reconstruction_errors.csv', **kwargs)
+        self.anomaly_score.to_csv(directory / 'anomaly_scores.csv', **kwargs)
 
         if self.bias_data is not None:
-            self.bias_data.to_csv(os.path.join(directory, 'bias_data.csv'), **kwargs)
+            self.bias_data.to_csv(directory / 'bias_data.csv', **kwargs)
 
         if self.arcana_losses is not None:
-            self.arcana_losses.to_csv(os.path.join(directory, 'arcana_losses.csv'), **kwargs)
+            self.arcana_losses.to_csv(directory / 'arcana_losses.csv', **kwargs)
 
         if self.tracked_bias is not None and len(self.tracked_bias) > 0:
             for idx, bias_df in enumerate(self.tracked_bias):
-                bias_df.to_csv(os.path.join(directory, f'tracked_bias_{idx}.csv'), **kwargs)
+                bias_df.to_csv(directory / f'tracked_bias_{idx}.csv', **kwargs)
+
+    @classmethod
+    def load(cls, directory: str | Path, **kwargs) -> "FaultDetectionResult":
+        """Loads the results from CSV files in the specified directory.
+
+        Args:
+            directory (str | Path): The directory where the CSV files are stored.
+            kwargs: other keywords args for `pd.read_csv` (e.g., sep=',')
+
+        Returns:
+            FaultDetectionResult: The loaded result object.
+        """
+        directory = Path(directory)
+
+        # Default pandas loading arguments to ensure indices are restored correctly
+        params = {'index_col': 0, 'parse_dates': True}
+        params.update(kwargs)
+
+        # Load mandatory fields
+        predicted_anomalies = pd.read_csv(directory / 'predicted_anomalies.csv', **params).iloc[:, 0]
+        # Ensure predicted_anomalies is explicitly a Series and boolean
+        predicted_anomalies = predicted_anomalies.astype(bool)
+
+        reconstruction = pd.read_csv(directory / 'reconstruction.csv', **params)
+        recon_error = pd.read_csv(directory / 'reconstruction_errors.csv', **params)
+        anomaly_score = pd.read_csv(directory / 'anomaly_scores.csv', **params).iloc[:, 0]
+
+        # Load optional fields if they exist
+        bias_data = None
+        if (directory / 'bias_data.csv').exists():
+            bias_data = pd.read_csv(directory / 'bias_data.csv', **params)
+
+        arcana_losses = None
+        if (directory / 'arcana_losses.csv').exists():
+            arcana_losses = pd.read_csv(directory / 'arcana_losses.csv', **params)
+
+        tracked_bias = None
+        tracked_files = sorted(directory.glob('tracked_bias_*.csv'))
+        if tracked_files:
+            tracked_bias = [pd.read_csv(f, **params) for f in tracked_files]
+
+        return cls(
+            predicted_anomalies=predicted_anomalies,
+            reconstruction=reconstruction,
+            recon_error=recon_error,
+            anomaly_score=anomaly_score,
+            bias_data=bias_data,
+            arcana_losses=arcana_losses,
+            tracked_bias=tracked_bias
+        )
 
 
 @dataclass
 class ModelMetadata:
     """Class to encapsulate metadata about the FaultDetector model."""
 
     model_date: str
-    model_path: str
+    model_path: str | Path
     train_recon_error: np.ndarray
     val_recon_error: Optional[np.ndarray] = None
diff --git a/energy_fault_detector/evaluation/__init__.py b/energy_fault_detector/evaluation/__init__.py
@@ -1,4 +1,5 @@
 """Evaluation classes and methods, including the CARE-Score and Care2CompareDataset."""
 
-from energy_fault_detector.evaluation.care_score import CAREScore
-from energy_fault_detector.evaluation.care2compare import Care2CompareDataset
+from .care_score import CAREScore
+from .care2compare import Care2CompareDataset
+from .predist_dataset import PreDistDataset
diff --git a/energy_fault_detector/evaluation/care2compare.py b/energy_fault_detector/evaluation/care2compare.py
@@ -18,9 +18,6 @@ class Care2CompareDataset:
     The data can be downloaded either manually from https://doi.org/10.5281/zenodo.14958989 (in this case specify
     `path`) or it can be downloaded automatically by setting download_dataset to True.
 
-    All data is loaded into memory, which might be problematic for large datasets (consider using DataLoader classes of
-    TensorFlow and PyTorch in that case).
-
     By default, only the averages are read. See statistics argument of the data loading methods.
 
     Method overview: