AEFDI · roelofsc · Dec 16, 2025 · Nov 2, 2025 · Nov 26, 2025 · Nov 27, 2025
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -23,7 +23,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install sphinx sphinx-rtd-theme sphinx-autodoc-typehints sphinx-copybutton
+          pip install -r requirements-docs.txt
+          pip install -r requirements.txt
 
       - name: Create VERSION file
         run: echo "${GITHUB_REF##*/}" > "${PKG_NAME}/VERSION"

diff --git a/README.md b/README.md
@@ -48,11 +48,15 @@ For an example using one of the CARE2Compare datasets, run:
 For more information, have a look at the notebook [Quick Fault Detection](./notebooks/Example%20-%20Quick%20Fault%20Detection.ipynb)
 
 
-## Fault detection in 4 lines of code
+## Fault detection quickstart
 
 ```python
 from energy_fault_detector import FaultDetector, Config
+from energy_fault_detector.config import generate_quickstart_config
 
+# 1) Generate and save a base config (YAML)
+generate_quickstart_config(output_path="base_config.yaml")
+# 2) Train and predict using the generated config
 fault_detector = FaultDetector(config=Config('base_config.yaml'))
 model_data = fault_detector.train(sensor_data=sensor_data, normal_index=normal_index)
 results = fault_detector.predict(sensor_data=test_sensor_data)
@@ -84,10 +88,8 @@ All contributions, bug reports, bug fixes, documentation improvements, enhanceme
 2. Unification, standardisation and generic improvements
    1. Additional options for all autoencoders (e.g. drop out, regularization)
    2. Data preparation (e.g. extend imputation strategies).
-   3. Download method for the Care2Compare class.
-   3. Unify default value settings. 
-   4. No or low configuration
-   5. Upgrade to Keras 3.0
+   3. No or low configuration need (e.g. use defaults where possible).
+   4. Upgrade to Keras 3.0
 
 3. Root cause analysis expansion
    1. integrate SHAP and possibly other XAI-methods.

diff --git a/docs/advanced_config.yaml b/docs/advanced_config.yaml
@@ -0,0 +1,111 @@
+train:
+  # clip training data to remove outliers (only applied for training)
+  data_clipping:  # (optional) if not specified, not applied.
+    lower_percentile: 0.01
+    upper_percentile: 0.99
+    # Choose one of:
+    # features_to_exclude:
+    #   - do_not_clip_this_feature
+    # features_to_clip:
+    #   - clip_only_this_feature
+
+  data_preprocessor:
+    steps:
+      # Replace consecutive duplicate 0-values with NaN
+      - name: duplicate_to_nan
+        params:
+          value_to_replace: 0
+          n_max_duplicates: 6
+          features_to_exclude:
+            - do_not_replace_value_with_nan_for_this_feature
+      # Normalize counters to differences (configure your counter columns)
+      # If needed, you can create multiple counter_diff_transformer steps with different settings for different counters
+      - name: counter_diff_transformer
+        step_name: counter_diff_energy
+        params:
+          counters:
+            - energy_total_kwh
+          compute_rate: false
+          reset_strategy: zero
+          fill_first: nan
+      # Column selection: drop columns where > 20% is missing and exclude specific features
+      - name: column_selector
+        params:
+          max_nan_frac_per_col: 0.20
+          features_to_exclude:
+            - feature1
+            - feature2
+          # Alternatively, keep only selected features:
+          # features_to_select:
+          #   - temp_outdoor
+          #   - flow
+          #   - power
+      # Filter low unique value features or high-zero-fraction columns
+      - name: low_unique_value_filter
+        params:
+          min_unique_value_count: 2
+          max_col_zero_frac: 0.99
+      # Transform angles to sin/cos
+      - name: angle_transformer
+        params:
+          angles:
+            - angle1
+            - angle2
+      # Imputer (explicit; will be auto-inserted if omitted)
+      - name: simple_imputer
+        params:
+          strategy: mean
+      # Scaler (choose one; StandardScaler is auto-added by default if omitted)
+      - name: standard_scaler
+        params:
+          with_mean: true
+          with_std: true
+
+  data_splitter:
+    # How to split data in train and validation sets for the autoencoder
+    type: sklearn
+    validation_split: 0.2
+    shuffle: true  # false by default (last part of the data is taken as validation data in this case)
+    # or block splitting, 4 weeks training, 1 week validation
+    # type: DataSplitter
+    # train_block_size: 4032
+    # val_block_size: 1008
+
+  autoencoder:
+    name: MultilayerAutoencoder
+    params:
+      batch_size: 128
+      # Use a ExponentialDecay schedule for the learning rate:
+      learning_rate: 0.001  # starting point
+      decay_rate: 0.99
+      decay_steps: 100000
+      # Set early stopping with max 1000 epochs, minimal improvement of 1e-4 and patience of 5 epochs
+      early_stopping: True
+      min_delta: 0.0001
+      patience: 5
+      epochs: 1000
+      # architecture settings
+      layers: [200, 100, 50]
+      code_size: 20
+      act: prelu  # activation to use for hidden layers
+      last_act: linear  # output layer activation
+
+  anomaly_score:
+    name: rmse
+    params:
+      scale: false
+
+  threshold_selector:
+    name: fbeta
+    params:
+      beta: 0.5
+
+root_cause_analysis:
+  alpha: 0.5
+  init_x_bias: recon
+  num_iter: 1000
+  verbose: true
+
+predict:
+  criticality:
+    max_criticality: 144
diff --git a/docs/basic_config.yaml b/docs/basic_config.yaml
@@ -0,0 +1,45 @@
+train:
+  # clip training data to remove outliers (only applied for training)
+  data_clipping:  # (optional) if not specified, not applied.
+    # Use features_to_exclude or features_to_clip: [feature] to skip or to apply to specific features
+    lower_percentile: 0.001
+    upper_percentile: 0.999
+
+  data_preprocessor:
+    steps:
+      # This drops features where > 20% is missing
+      - name: column_selector
+        params:
+          max_nan_frac_per_col: 0.2
+      # This drops constants by default (controlled by `min_unique_value_count`)
+      - name: low_unique_value_filter
+      # SimpleImputer and StandardScaler are always added
+
+  data_splitter:
+    # How to split data in train and validation sets for the autoencoder
+    type: sklearn
+    validation_split: 0.2
+    shuffle: true
+
+  autoencoder:
+    name: default
+    params:
+      layers:  # Symmetric autoencoder: inputs - 200 - 100 - 50 - 20 - 50 - 100 - 200 - outputs
+      - 200
+      - 100
+      - 50
+      code_size: 20  # Size of the bottleneck layer
+
+  anomaly_score:
+    name: rmse
+
+  threshold_selector:
+    fit_on_val: true
+    name: quantile
+    params:
+      quantile: 0.95
+
+root_cause_analysis:
+  alpha: 0.8
+  init_x_bias: recon
+  num_iter: 1000
diff --git a/docs/conf.py b/docs/conf.py
@@ -127,6 +127,11 @@
 napoleon_use_param = True
 napoleon_use_rtype = True
 
+napoleon_type_aliases = {
+    "Config": "energy_fault_detector.Config",
+    "FaultDetector": "energy_fault_detector.FaultDetector",
+}
+
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for

diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -0,0 +1,136 @@
+.. _configuration_guide:
+
+Configuration
+================================
+This page explains how to configure training, prediction, and optional root cause analysis (ARCANA).
+
+.. contents:: Table of Contents
+   :depth: 3
+   :local:
+
+Quick start: minimal configuration
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+A minimal configuration that clips outliers, imputes missing values, and scales features:
+
+.. include:: basic_config.yaml
+   :literal:
+
+This setup:
+
+- Applies DataClipper if specified.
+- Builds a DataPreprocessor with:
+
+  - ColumnSelector that drops columns with more than 20% NaNs (configurable).
+  - LowUniqueValueFilter that removes constant features by default (configurable).
+  - SimpleImputer (mean) and a scaler (StandardScaler by default). If you do not add an imputer/scaler explicitly,
+    the pipeline ensures mean-imputation and StandardScaler are added.
+
+- Trains a default autoencoder (with provided architecture, otherwise default values), with an RMSE anomaly score and a
+  quantile threshold selector.
+- Runs ARCANA with provided parameters when calling :py:obj:`FaultDetector.predict(..., root_cause_analysis=True) <energy_fault_detector.fault_detector.FaultDetector.predict>`.
+  If not provided, default ARCANA parameters are used (see :py:obj:`ARCANA docs <energy_fault_detector.root_cause_analysis.arcana.Arcana>`).
+
+If you leave out the data_preprocessor configuration (i.e., ``data_preprocessor: {}``), a default preprocessing pipeline
+is generated, which drops constant features, features where >5% of the data is missing, imputes remaining missing values
+with the mean value and scales the data to zero mean and unit standard deviation.
+
+Detailed configuration
+^^^^^^^^^^^^^^^^^^^^^^
+Below is a more thorough configuration. It shows how to specify preprocessing steps and more model parameters.
+
+.. include:: advanced_config.yaml
+   :literal:
+
+DataPreprocessor specification
+""""""""""""""""""""""""""""""
+A steps-based preprocessing pipeline can be configured under ``train.data_preprocessor.steps``. Each step is a dict
+with the following keys:
+
+- ``name`` (str): the registered step name (see table below).
+- ``enabled`` (bool, optional): default ``True``; set to ``False`` to skip a step.
+- ``params`` (dict, optional): constructor arguments for the step.
+- ``step_name`` (str, optional): custom key for the sklearn pipeline; useful if a step is repeated.
+
+Allowed step names and aliases:
+
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| Step name               | Purpose                                       | Aliases                                        |
++=========================+===============================================+================================================+
+| column_selector         | Drop columns with too many NaNs               | \-                                             |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| low_unique_value_filter | Drop columns with low variance/many zeros     | \-                                             |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| angle_transformer       | Convert angles to sin/cos pairs               | angle_transform                                |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| counter_diff_transformer| Convert counters to differences/rates         | counter_diff, counter_diff_transform           |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| simple_imputer          | Impute missing values                         | imputer                                        |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| standard_scaler         | Standardize features (z-score)                | standardize, standardscaler, standard          |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| minmax_scaler           | Scale to [0, 1]                               | minmax                                         |
++-------------------------+-----------------------------------------------+------------------------------------------------+
+| duplicate_to_nan        | Replace consecutive duplicate values with NaN | duplicate_value_to_nan, duplicate_values_to_nan|
++-------------------------+-----------------------------------------------+------------------------------------------------+
+
+For detailed documentation of the data preprocessor pipeline, refer to the
+:py:obj:`DataPreprocessor <energy_fault_detector.data_preprocessing.data_preprocessor.DataPreprocessor>` docs.
+
+Other training configuration sections
+"""""""""""""""""""""""""""""""""""""
+
+- Data clipping:
+  :py:obj:`DataClipper <energy_fault_detector.data_preprocessing.data_clipper.DataClipper>` supports
+  ``features_to_exclude`` and ``features_to_clip`` for fine-grained control.
+
+
+- Data splitter (``train.data_splitter``):
+
+  - ``type``: one of ``BlockDataSplitter`` (aliases: ``blocks``, ``DataSplitter``), or ``sklearn`` (alias ``train_test_split``).
+  - For sklearn: ``validation_split`` (float in (0, 1)) and ``shuffle`` (bool).
+  - For :py:obj:`BlockDataSplitter <energy_fault_detector.data_splitting.data_splitter.BlockDataSplitter>`: ``train_block_size`` and ``val_block_size``.
+  - Early stopping guard: if ``train.autoencoder.params.early_stopping`` is true, you must either set a
+    valid ``validation_split`` in (0, 1), or use :py:obj:`BlockDataSplitter <energy_fault_detector.data_splitting.data_splitter.BlockDataSplitter>`
+    with a positive ``val_block_size``.
+
+
+- Autoencoder (``train.autoencoder``):
+
+  - ``name``: class name in the registry.
+  - ``params``: architecture and training args (e.g., ``layers``, ``epochs``, ``learning_rate``, ``early_stopping``).
+    Refer to the autoencoder class docs (:py:obj:`autoencoders <energy_fault_detector.autoencoders>`) for specific params and their defaults.
+
+- Anomaly score (``train.anomaly_score``):
+
+  - ``name``: score name (e.g., ``rmse``, ``mahalanobis``).
+  - ``params``: score-specific parameters. Refer to the :py:obj:`anomaly_scores <energy_fault_detector.anomaly_scores>` docs.
+
+- Threshold selector (``train.threshold_selector``):
+
+  - ``name``: e.g., ``quantile``, ``fbeta``, etc.
+  - ``fit_on_val``: fit the threshold on validation only.
+  - ``params``: selector-specific parameters (e.g., ``quantile`` for the quantile selector).
+    See the :py:obj:`threshold_selectors <energy_fault_detector.threshold_selectors>` docs for more info on the settings.
+
+Prediction options
+^^^^^^^^^^^^^^^^^^
+Under ``predict``, you can set:
+
+- ``criticality.max_criticality``: cap the calculated criticality (anomaly counter) to this value.
+
+
+Root cause analysis (ARCANA)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+If ``root_cause_analysis`` is provided, ARCANA will attempt to attribute anomalies to specific features using the
+provided settings. If not provided, default settings are used. For detailed documentation refer to
+:py:obj:`ARCANA docs <energy_fault_detector.root_cause_analysis.arcana.Arcana>`.
+
+
+Old params data preprocessing configuration (for older versions)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Older configurations use params under ``train.data_preprocessor.params``.
+These remain supported but are deprecated in favor of steps mode.
+When both ``steps`` and legacy params are present, ``steps`` take precedence and legacy params are ignored with a warning.
+
+.. include:: old_config.yaml
+   :literal:
diff --git a/docs/index.rst b/docs/index.rst
@@ -1,10 +1,10 @@
 Energy Fault Detector - Autoencoder-based Fault Detection for the Future Energy System
-============================================================
+======================================================================================
 
 **Energy Fault Detector** is an open-source Python package designed for the automated detection of anomalies in
 operational data from renewable energy systems as well as power grids. It uses autoencoder-based normal behaviour
 models to identify irregularities in operational data. In addition to the classic anomaly detection, the package
-includes the unique “ARCANA” approach for root cause analysis and thus allows interpretable early fault detection.
+includes the unique ''ARCANA'' approach for root cause analysis and thus allows interpretable early fault detection.
 In addition to the pure ML models, the package also contains a range of preprocessing methods, which are particularly
 useful for analyzing systems in the energy sector. A holistic `EnergyFaultDetector` framework is provided for easy use of all
 these methods, which can be adapted to the respective use case via a single configuration file.
@@ -27,11 +27,10 @@ To install the `energy-fault-detector` package, run:
     :glob:
     :maxdepth: 2
 
-    The Energy Fault Detector package <energy_fault_detector>
     usage_examples
+    configuration
     logging
-    changelog
-
+    The EnergyFaultDetector package <modules>
 
 Module index
 ==================