remdui
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 31 additions & 5 deletions b/‎README.md‎
Lines changed: 31 additions & 5 deletions
diff --git a/‎config/config_default.yml‎
Lines changed: 10 additions & 34 deletions b/‎config/config_default.yml‎
Lines changed: 10 additions & 34 deletions
diff --git a/‎config/config_example_experiment.yml‎
Lines changed: 77 additions & 0 deletions b/‎config/config_example_experiment.yml‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎config/config_example_smri.yml‎
Lines changed: 77 additions & 0 deletions b/‎config/config_example_smri.yml‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎src/config/config_schema.py‎
Lines changed: 24 additions & 30 deletions b/‎src/config/config_schema.py‎
Lines changed: 24 additions & 30 deletions
@@ -24,6 +24,8 @@ tests/.hypothesis/
 config/*
 !config/.gitkeep
 !config/config_default.yml
+!config/config_example_smri.yml
+!config/config_example_experiment.yml
 
 # Local data artifacts (never track user datasets)
 data/*
 
@@ -111,8 +111,8 @@ docker run --rm \
 ### Arguments
 
 The `main.py` file accepts the following required arguments:
-- `--config`: Path to the configuration file. Default: `config/config_default.yml`.
-- `--mode`: Mode to run the project in. Options: `train`, `inference`, `validate`, `tune`. Default: `train`.
+- `--config`: Configuration filename located under `./config` (for example `config_default.yml` or `config.yml`).
+- `--mode`: Mode to run the project in. Options: `train`, `inference`, `validate`, `tune`, `experiment`.
 
 The `main.py` file accepts the following optional arguments:
 - `--checkpoint`: Path to a checkpoint to load.
@@ -151,23 +151,49 @@ To skip the preprocessing pipeline, use the `--skip-preprocessing` flag.
 
 ### Configuration
 
-The project configuration is defined in the `config` directory. The configuration file is in `YAML` format. The default configuration file is `config/config_default.yml`.
+The project configuration is defined in the `config` directory and uses `YAML`.
 
-**Important:** Copy this file to create a custom configuration. The default configuration will be **overwritten** each time the application is run.
+- `config/config_default.yml`: generic runtime defaults intended for new projects and new datasets.
+- `config/config_example_smri.yml`: a non-runnable example template showing an sMRI-oriented setup with placeholder paths.
+- `config/config_example_experiment.yml`: an experiment-sweep template showing how to define custom `experiment` mode runs.
+
+At startup, the app ensures `config/config_default.yml` exists. If it already exists, it is kept as-is (not overwritten).
+For your own run configs, create `config/config.yml` (or another filename in `./config`) and pass it with `--config`.
 
 The configuration file contains the following sections:
 
 - `dataset`: Configuration for the dataset.
 - `model`: Configuration for the model.
 - `train`: Configuration for the training process.
-- `inference`: Configuration for the inference process.
 - `validation`: Configuration for the validation process.
 - `meta`: Meta information for the project.
 - `general`: General configuration for the project.
 - `system`: System configuration for the project.
 
 The configuration is validated against the schema before a task is run. Missing values are filled with default values from the default configuration file.
 
+`experiment` mode is fully config-driven via `model.experiment_matrix`:
+
+```yaml
+model:
+  components:
+    vae:
+      covariate_embedding: no_embedding
+      latent_dim: 32
+  experiment_matrix:
+    embedding_methods: [no_embedding, fair_embedding]
+    latent_dims: [8, 16, 32]
+    dataset_files: [dataset_a.csv, dataset_b.csv]
+    repetitions: 3
+```
+
+For a concrete copy-paste example similar to the previous site-based setup, see `config/config_example_experiment.yml`.
+That example uses explicit dataset filenames (`site_dataset_<rep>_site_<site>.rds`) with `repetitions: 1`.
+
+To reproduce the old "harmonized" branch behavior, run a second experiment config where:
+- `model.experiment_matrix.embedding_methods: [no_embedding]`
+- `model.experiment_matrix.dataset_files` contains only `harmonized_site_dataset_<rep>_site_<site>.rds` files.
+
 ### Output Files
 
 The application generates different types of artifacts during the execution of the project.
 
@@ -13,7 +13,7 @@ general:
   verbose: false
 meta:
   config_version: 2
-  description: Variational Autoencoder design experiment setup
+  description: Multivariate normative modeling configuration
   name: vae_basic
   version: 1
 dataset:
@@ -23,7 +23,7 @@ dataset:
   covariates: []
   skipped_covariates: []
   targets: []
-  input_data: generated_data.rds
+  input_data: your_dataset.csv
   data_type: tabular
   image_type: grayscale
   internal_file_format: hdf
@@ -42,40 +42,11 @@ dataset:
   - name: EncodingTransform
     type: preprocessing
     params:
-      default: z-score
-      one_hot_encoding:
-      - site
-      - sex
-      z-score:
-      - age
+      default: min-max
+      one_hot_encoding: []
+      z-score: []
       min-max: []
       raw: []
-  - name: SiteFilterTransform
-    type: preprocessing
-    params:
-      selected_site: -1
-      col_name: site
-  - name: WaveFilterTransform
-    type: preprocessing
-    params:
-      selected_wave: -1
-      col_name: wave
-  - name: AgeFilterTransform
-    type: preprocessing
-    params:
-      age_lowerbound: 0.0
-      age_upperbound: 100.0
-      col_name: age
-  - name: SexFilterTransform
-    type: preprocessing
-    params:
-      sex: -1
-      col_name: sex
-  - name: SampleLimitTransform
-    type: preprocessing
-    params:
-      max_samples: 1000
-      shuffle: true
 data_analysis:
   features:
     reconstruction_mse: true
@@ -388,6 +359,11 @@ model:
       decoder: mlp
       latent_dim: 32
       covariate_embedding: no_embedding
+  experiment_matrix:
+    embedding_methods: []
+    latent_dims: []
+    dataset_files: []
+    repetitions: 1
   hidden_layers:
   - 1024
   - 512
 
@@ -0,0 +1,77 @@
+# Example experiment configuration (customizable template).
+# This file is intended as a reference and uses placeholder dataset names.
+
+meta:
+  name: experiment_site_sweep_template
+  description: Example of a custom experiment sweep over embeddings, latent dims, and site-specific datasets
+
+dataset:
+  input_data: site_dataset_0_site_0.rds
+  covariates:
+  - age
+  - sex
+  - site
+  enable_transforms: true
+  transforms:
+  - name: DataCleaningTransform
+    type: preprocessing
+    params:
+      drop_na: true
+      remove_duplicates: true
+  - name: EncodingTransform
+    type: preprocessing
+    params:
+      default: z-score
+      one_hot_encoding:
+      - sex
+      - site
+      z-score:
+      - age
+      min-max: []
+      raw: []
+
+model:
+  architecture: vae
+  components:
+    vae:
+      encoder: mlp
+      decoder: mlp
+      covariate_embedding: no_embedding
+      latent_dim: 8
+  # Custom experiment sweep keys used by `--mode experiment`.
+  # This setup is similar to the previous thesis sweep, but fully config-driven.
+  experiment_matrix:
+    embedding_methods:
+    - no_embedding
+    - encoderdecoder_embedding
+    - fair_embedding
+    latent_dims:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 8
+    - 12
+    - 16
+    # Previous setup equivalent: explicit per-file list (site x repetition),
+    # and `repetitions: 1`.
+    dataset_files:
+    - site_dataset_0_site_0.rds
+    - site_dataset_0_site_1.rds
+    - site_dataset_0_site_2.rds
+    - site_dataset_1_site_0.rds
+    - site_dataset_1_site_1.rds
+    - site_dataset_1_site_2.rds
+    - site_dataset_2_site_0.rds
+    - site_dataset_2_site_1.rds
+    - site_dataset_2_site_2.rds
+    repetitions: 1
+
+train:
+  loss_function: mse_vae
+  batch_size: 64
+  epochs: 100
+
+validation:
+  model: your_model_best.safetensors
@@ -0,0 +1,77 @@
+# Example-only sMRI configuration template.
+# This file is intentionally NOT runnable as-is because it references placeholder
+# data/model artifacts that are not part of this repository.
+
+meta:
+  name: smri_example_template
+  description: Example-only sMRI template (non-runnable placeholder values)
+
+dataset:
+  input_data: smri_example_dataset_not_in_repo.rds
+  unique_identifier_column: subject_id
+  row_data_leakage_columns:
+  - subject_id
+  skipped_columns:
+  - subject_id
+  - diagnosis
+  covariates:
+  - age
+  - sex
+  - site
+  - wave
+  enable_transforms: true
+  transforms:
+  - name: DataCleaningTransform
+    type: preprocessing
+    params:
+      drop_na: true
+      remove_duplicates: true
+  - name: EncodingTransform
+    type: preprocessing
+    params:
+      default: z-score
+      one_hot_encoding:
+      - sex
+      - site
+      - wave
+      z-score:
+      - age
+      min-max: []
+      raw: []
+  - name: SiteFilterTransform
+    type: preprocessing
+    params:
+      selected_site: -1
+      col_name: site
+  - name: WaveFilterTransform
+    type: preprocessing
+    params:
+      selected_wave: -1
+      col_name: wave
+  - name: AgeFilterTransform
+    type: preprocessing
+    params:
+      age_lowerbound: 6.0
+      age_upperbound: 25.0
+      col_name: age
+  - name: SexFilterTransform
+    type: preprocessing
+    params:
+      sex: -1
+      col_name: sex
+
+model:
+  components:
+    vae:
+      encoder: mlp
+      decoder: mlp
+      latent_dim: 16
+      covariate_embedding: fair_embedding
+
+train:
+  loss_function: mse_vae
+  batch_size: 64
+  epochs: 100
+
+validation:
+  model: smri_example_model_best.safetensors
@@ -102,7 +102,7 @@ class DatasetConfig(BaseModel):
     covariates: list[str] = []
     skipped_covariates: list[str] = []
     targets: list[str] = []
-    input_data: str = "generated_data.rds"
+    input_data: str = "your_dataset.csv"
     data_type: str = "tabular"
     image_type: str = "grayscale"
     internal_file_format: str = "hdf"
@@ -122,38 +122,13 @@ class DatasetConfig(BaseModel):
             name="EncodingTransform",
             type="preprocessing",
             params={
-                "default": "z-score",
-                "one_hot_encoding": ["site", "sex"],
-                "z-score": ["age"],
+                "default": "min-max",
+                "one_hot_encoding": [],
+                "z-score": [],
                 "min-max": [],
                 "raw": [],
             },
         ),
-        TransformConfig(
-            name="SiteFilterTransform",
-            type="preprocessing",
-            params={"selected_site": -1, "col_name": "site"},
-        ),
-        TransformConfig(
-            name="WaveFilterTransform",
-            type="preprocessing",
-            params={"selected_wave": -1, "col_name": "wave"},
-        ),
-        TransformConfig(
-            name="AgeFilterTransform",
-            type="preprocessing",
-            params={"age_lowerbound": 0.0, "age_upperbound": 100.0, "col_name": "age"},
-        ),
-        TransformConfig(
-            name="SexFilterTransform",
-            type="preprocessing",
-            params={"sex": -1, "col_name": "sex"},
-        ),
-        TransformConfig(
-            name="SampleLimitTransform",
-            type="preprocessing",
-            params={"max_samples": 1000, "shuffle": True},
-        ),
     ]
 
     @model_validator(mode="after")
@@ -178,11 +153,27 @@ class MetaConfig(BaseModel):
     """Metadata configuration."""
 
     config_version: int = 2
-    description: str = "Variational Autoencoder design experiment setup"
+    description: str = "Multivariate normative modeling configuration"
     name: str = "vae_basic"
     version: int = 1
 
 
+class ExperimentMatrixConfig(BaseModel):
+    """Experiment sweep configuration used by experiment mode."""
+
+    embedding_methods: list[str] = []
+    latent_dims: list[int] = []
+    dataset_files: list[str] = []
+    repetitions: int = Field(default=1, ge=1)
+
+    @model_validator(mode="after")
+    def validate_latent_dims(self) -> "ExperimentMatrixConfig":
+        """Ensure configured latent dimensions are strictly positive."""
+        if any(latent_dim <= 0 for latent_dim in self.latent_dims):
+            raise ValueError("All experiment_matrix.latent_dims must be > 0.")
+        return self
+
+
 class ModelConfig(BaseModel):
     """Model configuration."""
 
@@ -222,6 +213,9 @@ class ModelConfig(BaseModel):
             # },
         }
     )
+    experiment_matrix: ExperimentMatrixConfig = Field(
+        default_factory=ExperimentMatrixConfig
+    )
 
     # Model components
     hidden_layers: list[int] = [1024, 512, 256]