diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index e1c7c862..207bd1c5 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,7 +4,8 @@ Changelog
 
 current
 --------------------
-*
+* Add the creation module and create entry: They implement yaml based metadata creation, provide template feature to keep metadata creation DRY, provide functionality to setup the metadata structure & generate metadata from existing sources like datapackages and csv files, provide functionality to create the full datapackage.json and save it to file [(#127)](https://github.com/rl-institut/super-repo/pull/127)
+
 
 1.1.0 (2025-03-25)
 --------------------
diff --git a/docs/create.md b/docs/create.md
new file mode 100644
index 00000000..20b816e7
--- /dev/null
+++ b/docs/create.md
@@ -0,0 +1,159 @@
+# OMI “Create” Entry Point
+
+This mini-guide explains how to use the **programmatic entry points** that turn your split YAML metadata (dataset + template + resources) into a single OEMetadata JSON document.
+
+> If you’re looking for how to author the YAML files and how templating works, see the main **Assembly Guide** in the `creation` module directory. This page just shows how to *call* the entry points.
+
+---
+
+## What it does
+
+The functions in `omi.create` wrap the full assembly pipeline:
+
+1. **Discover / load** your YAML parts (dataset, optional template, resources).
+2. **Apply the template** to each resource (deep merge; resource wins; keywords/topics/languages concatenate).
+3. **Generate & validate** the final OEMetadata JSON using the official schema (via `OEMetadataCreator`).
+4. **Write** the result to disk (`build_from_yaml`) or many results to a directory (`build_many_from_yaml`).
+
+---
+
+## API
+
+```python
+from omi.create import build_from_yaml, build_many_from_yaml
+```
+
+### `build_from_yaml(base_dir, dataset_id, output_file, *, index_file=None) -> None`
+
+Assemble **one** dataset and write `<output_file>` (JSON).
+
+* `base_dir` (`str | Path`): Root that contains:
+
+  * `datasets/<dataset_id>.dataset.yaml`
+  * `datasets/<dataset_id>.template.yaml` *(optional)*
+  * `resources/<dataset_id>/*.resource.yaml`
+* `dataset_id` (`str`): Logical dataset name (e.g. `"powerplants"`).
+* `output_file` (`str | Path`): Path to write the generated OEMetadata JSON.
+* `index_file` (`str | Path | None`): Optional explicit mapping file (`metadata_index.yaml`). If provided, paths are taken from the index instead of convention.
+
+### `build_many_from_yaml(base_dir, output_dir, *, dataset_ids=None, index_file=None) -> None`
+
+Assemble **multiple** datasets and write each as `<output_dir>/<dataset_id>.json`.
+
+* `base_dir` (`str | Path`): Same as above.
+* `output_dir` (`str | Path`): Destination directory for one JSON file per dataset.
+* `dataset_ids` (`list[str] | None`): Limit to specific datasets. If `None`, we:
+
+  * Use keys from `index_file` when provided, **else**
+  * Discover all `datasets/*.dataset.yaml` in `base_dir`.
+* `index_file` (`str | Path | None`): Optional `metadata_index.yaml`.
+
+---
+
+## Quick examples
+
+### One dataset (convention-based discovery)
+
+```python
+from omi.create import build_from_yaml
+
+build_from_yaml(
+    base_dir="./metadata",
+    dataset_id="powerplants",
+    output_file="./out/powerplants.json",
+)
+```
+
+Directory layout:
+
+```bash
+metadata/
+  datasets/
+    powerplants.dataset.yaml
+    powerplants.template.yaml     # optional
+  resources/
+    powerplants/
+      *.resource.yaml
+```
+
+### One dataset (explicit index)
+
+```python
+from omi.create import build_from_yaml
+
+build_from_yaml(
+    base_dir="./metadata",
+    dataset_id="powerplants",
+    output_file="./out/powerplants.json",
+    index_file="./metadata/metadata_index.yaml",
+)
+```
+
+### Many datasets (discover all)
+
+```python
+from omi.create import build_many_from_yaml
+
+build_many_from_yaml(
+    base_dir="./metadata",
+    output_dir="./out",
+)
+# writes ./out/<dataset_id>.json for each dataset found
+```
+
+### Many datasets (index + subset)
+
+```python
+from omi.create import build_many_from_yaml
+
+build_many_from_yaml(
+    base_dir="./metadata",
+    output_dir="./out",
+    dataset_ids=["powerplants", "households"],
+    index_file="./metadata/metadata_index.yaml",
+)
+```
+
+---
+
+## Notes & behavior
+
+* Output JSON is written with `indent=2` and **`ensure_ascii=False`** to preserve characters like `©`.
+* Validation happens via `OEMetadataCreator` using the official schema provided by `oemetadata` (imported through `omi.base.get_metadata_specification`).
+* If a dataset YAML is missing, `FileNotFoundError` is raised.
+* If schema validation fails, you’ll get an exception from `omi.validation`. Catch it where you call the entry point if you want to handle/report errors.
+
+---
+
+## Using in 3rd Party code like data pipelines
+
+```python
+from pathlib import Path
+from omi.create import build_from_yaml
+
+def build_oemetadata_callable(**context):
+    base = Path("/project/metadata")
+    out = Path("/project/metadata/out/powerplants.json")
+    build_from_yaml(base, "powerplants", out)
+    # optionally push to airflow XCom, publish, upload, etc.
+```
+
+---
+
+## Testing tips
+
+* For **unit tests** of `omi.create`, patch `omi.create.assemble_metadata_dict` / `assemble_many_metadata` and verify files are written.
+* For **integration tests**, put real example YAMLs under `tests/test_data/create/metadata/` and call `build_from_yaml` end-to-end.
+
+---
+
+## Troubleshooting
+
+* **“Dataset YAML not found”**
+  Check `base_dir/datasets/<dataset_id>.dataset.yaml` exists, or supply the correct `index_file`.
+
+* **Unicode characters appear escaped (`\u00a9`)**
+  Ensure you’re not re-writing the JSON elsewhere with `ensure_ascii=True`.
+
+* **Template not applied**
+  Confirm your template file name matches `<dataset_id>.template.yaml` (or is correctly referenced from the index), and the keys you expect to inherit aren’t already set in the resource (resource values win).
diff --git a/pyproject.toml b/pyproject.toml
index cd3fadfd..585b49fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,3 +78,6 @@ unfixable = ["UP007", "I001"]
 "*/__init__.py" = [
   "D104",  # Missing docstring in public package
 ]
+
+[omi.scripts]
+omi = "omi.cli:main"
diff --git a/src/omi/cli.py b/src/omi/cli.py
index 6b4d0aac..fbfb722e 100644
--- a/src/omi/cli.py
+++ b/src/omi/cli.py
@@ -1,29 +1,118 @@
 """
-Module that contains the command line app.
+Command line interface for OMI.
 
-Why does this file exist, and why not put this in __main__?
+This CLI only supports the split-files layout:
+- datasets/<dataset_id>.dataset.yaml
+- datasets/<dataset_id>.template.yaml  (optional)
+- resources/<dataset_id>/*.resource.yaml
+(optionally wired via metadata_index.yaml)
 
-  You might be tempted to import things from __main__ later, but that will cause
-  problems: the code will get executed twice:
+Usage:
+omi assemble \
+  --base-dir ./metadata \
+  --dataset-id powerplants \
+  --output-file ./out/powerplants.json \
+  --index-file ./metadata/metadata_index.yaml   # optional
 
-  - When you run `python -m omi` python will execute
-    ``__main__.py`` as a script. That means there won't be any
-    ``omi.__main__`` in ``sys.modules``.
-  - When you import __main__ it will get executed again (as a module) because
-    there's no ``omi.__main__`` in ``sys.modules``.
-
-  Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
 """
 
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional
+
 import click
 
+from omi.creation.creator import OEMetadataCreator
+from omi.creation.init import init_dataset, init_resources_from_files
+from omi.creation.utils import apply_template_to_resources, load_parts
+
 
 @click.group()
 def grp() -> None:
-    """Init click group."""
+    """OMI CLI."""
+
+
+@grp.command("assemble")
+@click.option(
+    "--base-dir",
+    required=True,
+    type=click.Path(file_okay=False, path_type=Path),
+    help="Root directory containing 'datasets/' and 'resources/'.",
+)
+@click.option("--dataset-id", required=True, help="Logical dataset id (e.g. 'powerplants').")
+@click.option(
+    "--output-file",
+    required=True,
+    type=click.Path(dir_okay=False, path_type=Path),
+    help="Path to write the generated OEMetadata JSON.",
+)
+@click.option(
+    "--index-file",
+    default=None,
+    type=click.Path(dir_okay=False, path_type=Path),
+    help="Optional metadata index YAML for explicit mapping.",
+)
+def assemble_cmd(base_dir: Path, dataset_id: str, output_file: Path, index_file: Optional[Path]) -> None:
+    """Assemble OEMetadata from split YAML files and write JSON to OUTPUT_FILE."""
+    # Load pieces
+    version, dataset, resources, template = load_parts(base_dir, dataset_id, index_file=index_file)
+    merged_resources = apply_template_to_resources(resources, template)
+
+    # Build & save with the correct spec version
+    creator = OEMetadataCreator(oem_version=version)
+    creator.save(dataset, merged_resources, output_file, ensure_ascii=False, indent=2)
+
+
+@click.group()
+def init() -> None:
+    """Scaffold OEMetadata split-files layout."""
+
+
+@init.command("dataset")
+@click.argument("base_dir", type=click.Path(file_okay=False, path_type=Path))
+@click.argument("dataset_id")
+@click.option("--oem-version", default="OEMetadata-2.0", show_default=True)
+@click.option("--resource", "resources", multiple=True, help="Initial resource names (repeatable).")
+@click.option("--overwrite", is_flag=True, help="Overwrite existing files.")
+def init_dataset_cmd(
+    base_dir: Path,
+    dataset_id: str,
+    oem_version: str,
+    resources: tuple[str, ...],
+    *,
+    overwrite: bool,
+) -> None:
+    """Initialize a split-files OEMetadata dataset layout under BASE_DIR."""
+    res = init_dataset(base_dir, dataset_id, oem_version=oem_version, resources=resources, overwrite=overwrite)
+    click.echo(f"dataset:  {res.dataset_yaml}")
+    click.echo(f"template: {res.template_yaml}")
+    for p in res.resource_yamls:
+        click.echo(f"resource: {p}")
+
+
+@init.command("resources")
+@click.argument("base_dir", type=click.Path(file_okay=False, path_type=Path))
+@click.argument("dataset_id")
+@click.argument("files", nargs=-1, type=click.Path(exists=True, dir_okay=False, path_type=Path))
+@click.option("--oem-version", default="OEMetadata-2.0", show_default=True)
+@click.option("--overwrite", is_flag=True, help="Overwrite existing files.")
+def init_resources_cmd(
+    base_dir: Path,
+    dataset_id: str,
+    files: tuple[Path, ...],
+    oem_version: str,
+    *,
+    overwrite: bool,
+) -> None:
+    """Create resource YAML files for DATASET_ID from the given FILES."""
+    outs = init_resources_from_files(base_dir, dataset_id, files, oem_version=oem_version, overwrite=overwrite)
+    for p in outs:
+        click.echo(p)
 
 
-cli = click.CommandCollection(sources=[grp])
+# Keep CommandCollection for backwards compatibility with your entry point
+cli = click.CommandCollection(sources=[grp, init])
 
 
 def main() -> None:
diff --git a/src/omi/create.py b/src/omi/create.py
new file mode 100644
index 00000000..2f8faaaf
--- /dev/null
+++ b/src/omi/create.py
@@ -0,0 +1,75 @@
+"""Entry point for OEMetadata creation (split-files layout only)."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Optional, Union
+
+from omi.creation.assembler import assemble_many_metadata, assemble_metadata_dict
+
+
+def build_from_yaml(
+    base_dir: Union[str, Path],
+    dataset_id: str,
+    output_file: Union[str, Path],
+    *,
+    index_file: Optional[Union[str, Path]] = None,
+) -> None:
+    """
+    Assemble one dataset and write the resulting OEMetadata JSON to a file.
+
+    Parameters
+    ----------
+    base_dir : Union[str, Path]
+        Base directory containing the split-files dataset structure.
+    dataset_id : str
+        The dataset ID to assemble.
+    output_file : Union[str, Path]
+        Path to write the resulting OEMetadata JSON file.
+    index_file : Optional[Union[str, Path]], optional
+        Optional path to an index file for resolving cross-dataset references,
+        by default None.
+    """
+    md = assemble_metadata_dict(base_dir, dataset_id, index_file=index_file)
+    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
+    Path(output_file).write_text(json.dumps(md, indent=2, ensure_ascii=False), encoding="utf-8")
+
+
+def build_many_from_yaml(
+    base_dir: Union[str, Path],
+    output_dir: Union[str, Path],
+    *,
+    dataset_ids: Optional[list[str]] = None,
+    index_file: Optional[Union[str, Path]] = None,
+) -> None:
+    """
+    Assemble multiple datasets and write each as <dataset_id>.json to output_dir.
+
+    Parameters
+    ----------
+    base_dir : Union[str, Path]
+        Base directory containing the split-files dataset structure.
+    output_dir : Union[str, Path]
+        Directory to write the resulting OEMetadata JSON files.
+    dataset_ids : Optional[list[str]], optional
+        Optional list of dataset IDs to assemble. If None, all datasets found
+        in base_dir will be assembled, by default None.
+    index_file : Optional[Union[str, Path]], optional
+        Optional path to an index file for resolving cross-dataset references,
+        by default None.
+    """
+    out_dir = Path(output_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    results = assemble_many_metadata(
+        base_dir,
+        dataset_ids=dataset_ids,
+        index_file=index_file,
+        as_dict=True,  # keep it as a mapping id -> metadata
+    )
+    for ds_id, md in results.items():
+        (out_dir / f"{ds_id}.json").write_text(
+            json.dumps(md, indent=2, ensure_ascii=False),
+            encoding="utf-8",
+        )
diff --git a/src/omi/creation/README.md b/src/omi/creation/README.md
new file mode 100644
index 00000000..7fedbfef
--- /dev/null
+++ b/src/omi/creation/README.md
@@ -0,0 +1,496 @@
+# OMI OEMetadata Assembly Guide
+
+This guide explains how to author, assemble, and validate **OEMetadata** using **YAML files** with OMI. It covers file structure, templating behavior, discovery vs. explicit mapping, Python APIs, multi-dataset usage, initialization scaffolding, testing, and common pitfalls.
+
+---
+
+## Table of Contents
+
+1. [Overview](#overview)
+2. [Concepts & Data Flow](#concepts--data-flow)
+3. [Repository Layout](#repository-layout)
+4. [YAML File Formats](#yaml-file-formats)
+
+   * [Dataset YAML](#dataset-yaml)
+   * [Template YAML (optional)](#template-yaml-optional)
+   * [Resource YAML](#resource-yaml)
+   * [Index YAML (optional)](#index-yaml-optional)
+5. [Templating Rules](#templating-rules)
+6. [Discovery vs. Index Mapping](#discovery-vs-index-mapping)
+7. [Programmatic Usage](#programmatic-usage)
+
+   * [Minimal Usage](#minimal-usage)
+   * [With Index Mapping](#with-index-mapping)
+   * [Manual Loading (No Discovery)](#manual-loading-no-discovery)
+8. [Multi-dataset Assembly](#multi-dataset-assembly)
+9. [Spec-Driven Output Ordering](#spec-driven-output-ordering)
+10. [Project Initialization (Scaffolding)](#project-initialization-scaffolding)
+11. [Airflow Integration Example](#airflow-integration-example)
+12. [Testing](#testing)
+13. [Validation & Error Handling](#validation--error-handling)
+14. [Auto-Generation From Directory (Optional Onboarding)](#auto-generation-from-directory-optional-onboarding)
+15. [Filtering Irrelevant Files (Optional)](#filtering-irrelevant-files-optional)
+16. [Design Notes & Extensibility](#design-notes--extensibility)
+17. [FAQ](#faq)
+
+---
+
+## Overview
+
+* **Goal:** Author OEMetadata as **YAML** (dataset + resources), keep it **DRY** via **templates**, assemble into a single **JSON** metadata document, and **validate** it with the official schema.
+* **Core ideas:**
+
+  * Maintain a dataset YAML, an optional template YAML (applied to all resources), and one or more resource YAMLs.
+  * OMI assembles + validates metadata into final OEMetadata JSON.
+  * Works in pipelines (e.g., Airflow) and plain Python.
+
+---
+
+## Concepts & Data Flow
+
+1. **Authoring:**
+
+   * `datasets/<id>.dataset.yaml`
+   * `datasets/<id>.template.yaml` *(optional)*
+   * `resources/<id>/*.resource.yaml`
+2. **Assembly:**
+
+   * Load dataset, template, and resource YAML files.
+   * Apply template → deep merge; resource overrides.
+   * Create OEMetadata JSON via `OEMetadataCreator` and validate.
+3. **Storage:**
+
+   * Assembly returns a Python `dict`. Store wherever you like (file/DB/API).
+
+---
+
+## Repository Layout
+
+```bash
+metadata/
+  datasets/
+    <dataset_id>.dataset.yaml
+    <dataset_id>.template.yaml        # optional
+  resources/
+    <dataset_id>/
+      <resource_a>.resource.yaml
+      <resource_b>.resource.yaml
+  metadata_index.yaml                 # optional explicit mapping
+```
+
+Use the **convention** above or an **index** file for explicit mapping.
+
+---
+
+## YAML File Formats
+
+### Dataset YAML
+
+```yaml
+# metadata/datasets/powerplants.dataset.yaml
+version: "OEMetadata-2.0.4"               # optional (default: OEMetadata-2.0.4)
+dataset:
+  name: oep_oemetadata
+  title: OEP OEMetadata
+  description: A dataset for the OEMetadata examples.
+  "@id": https://databus.openenergyplatform.org/oeplatform/supply/wri_global_power_plant_database/
+```
+
+> Backwards compatibility: dataset fields can also be at top-level; OMI treats that as `dataset: {...}`.
+
+---
+
+### Template YAML (optional)
+
+Applied to **every** resource (unless overridden). Keeps YAML DRY.
+
+```yaml
+# metadata/datasets/powerplants.template.yaml
+licenses:
+  - name: ODbL-1.0
+    title: Open Data Commons Open Database License 1.0
+    path: https://opendatacommons.org/licenses/odbl/1-0/index.html
+    instruction: >
+      You are free to share and change, but you must attribute, and
+      share derivations under the same license. See https://tldrlegal.com/license/odc-open-database-license-(odbl)
+      for further information.
+    attribution: © Reiner Lemoine Institut
+    copyrightStatement: https://github.com/OpenEnergyPlatform/oemetadata/blob/production/LICENSE.txt
+
+context:
+  title: NFDI4Energy
+  homepage: https://nfdi4energy.uol.de/
+  documentation: https://nfdi4energy.uol.de/sites/about_us/
+  sourceCode: https://github.com/NFDI4Energy
+  publisher: Open Energy Platform (OEP)
+  publisherLogo: https://github.com/OpenEnergyPlatform/organisation/blob/production/logo/OpenEnergyFamily_Logo_OpenEnergyPlatform.svg
+  contact: contact@example.com
+  fundingAgency: " Deutsche Forschungsgemeinschaft (DFG)"
+  fundingAgencyLogo: https://upload.wikimedia.org/wikipedia/commons/8/86/DFG-logo-blau.svg
+  grantNo: "501865131"
+
+topics: [model_draft]
+languages: [en-GB, de-DE]
+keywords: [example, ODbL-1.0, NFDI4Energy]
+```
+
+---
+
+### Resource YAML
+
+```yaml
+# metadata/resources/powerplants/oemetadata_table.resource.yaml
+name: oemetadata_table
+type: table
+title: OEMetadata Table Template
+description: Example table used to illustrate the OEMetadata structure and features.
+
+# Resource-specific attributes
+path: http://openenergyplatform.org/dataedit/view/model_draft/oemetadata_table
+scheme: http
+format: CSV
+encoding: UTF-8
+
+dialect:
+  decimalSeparator: "."
+  csv:
+    delimiter: ";"
+
+schema:
+  fields:
+    - name: id
+      type: integer
+      description: Unique identifier
+      nullable: false
+    # ... more fields ...
+  primaryKey: [id]
+  foreignKeys:
+    - fields: [id, version]
+      reference:
+        resource: model_draft.oep_oemetadata_table_example_version
+        fields: [id, version]
+
+"@id": https://databus.openenergyplatform.org/oeplatform/supply/wri_global_power_plant_database/2022-11-07/wri_global_power_plant_database_variant=data.csv
+
+sources:
+  - title: IPCC Sixth Assessment Report (AR6) - Climate Change 2023 - Synthesis Report
+    authors: [Hoesung Lee, José Romero, The Core Writing Team]
+    publicationYear: "2023"
+    path: https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_FullVolume.pdf
+    sourceLicenses:
+      - name: CC-BY-4.0
+        title: Creative Commons Attribution 4.0 International
+        path: https://creativecommons.org/licenses/by/4.0/legalcode
+        instruction: >
+          You are free to share and change, but you must attribute.
+          See https://tldrlegal.com/license/odc-open-database-license-odbl for further information.
+        attribution: © Intergovernmental Panel on Climate Change 2023
+        copyrightStatement: https://www.ipcc.ch/copyright/
+```
+
+Second resource:
+
+```yaml
+# metadata/resources/powerplants/data_2.resource.yaml
+name: data_2
+type: table
+title: My Second Resource
+path: reGon/metadata/data_2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+schema:
+  fields:
+    - name: id
+      type: integer
+      nullable: true
+    - name: i
+      type: integer
+      nullable: true
+    - name: o
+      type: string
+      nullable: true
+  primaryKey: [id]
+```
+
+---
+
+### Index YAML (optional)
+
+Explicit mappings instead of convention:
+
+```yaml
+# metadata/metadata_index.yaml
+datasets:
+  powerplants:
+    dataset: datasets/powerplants.dataset.yaml
+    template: datasets/powerplants.template.yaml
+    resources:
+      - resources/powerplants/oemetadata_table.resource.yaml
+      - resources/powerplants/data_2.resource.yaml
+```
+
+---
+
+## Templating Rules
+
+* **Deep merge** for dictionaries (e.g., `context`):
+  Resource **overrides**; missing nested keys are **filled** from template.
+* **Lists**:
+  **Concatenate** for `keywords`, `topics`, `languages` (resource first, then template-only items).
+  For other lists (e.g., `licenses`, `contributors`): **resource wins** (no concat).
+  *(Modify via `DEFAULT_CONCAT_LIST_KEYS` if you want different behavior.)*
+* **Scalars**: resource value **wins**.
+
+---
+
+## Discovery vs. Index Mapping
+
+* **Discovery (convention):**
+  `datasets/<id>.dataset.yaml`, `datasets/<id>.template.yaml`, `resources/<id>/*.resource.yaml`
+  → No index needed.
+* **Index (explicit):**
+  Provide `metadata_index.yaml` with explicit paths relative to your base directory.
+
+---
+
+## Programmatic Usage
+
+### Minimal Usage
+
+```python
+from omi.creation.assembly import assemble_metadata_dict
+
+metadata = assemble_metadata_dict(base_dir="./metadata", dataset_id="powerplants")
+```
+
+### With Index Mapping
+
+```python
+metadata = assemble_metadata_dict(
+    base_dir="./metadata",
+    dataset_id="powerplants",
+    index_file="./metadata/metadata_index.yaml",
+)
+```
+
+### Manual Loading (No Discovery)
+
+```python
+from pathlib import Path
+from omi.creation.creator import OEMetadataCreator
+from omi.creation.utils import load_yaml, apply_template_to_resources
+
+dataset = load_yaml(Path("./metadata/datasets/powerplants.dataset.yaml")).get("dataset", {})
+template = load_yaml(Path("./metadata/datasets/powerplants.template.yaml"))
+resources = [
+    load_yaml(Path("./metadata/resources/powerplants/oemetadata_table.resource.yaml")),
+    load_yaml(Path("./metadata/resources/powerplants/data_2.resource.yaml")),
+]
+resources = apply_template_to_resources(resources, template)
+
+creator = OEMetadataCreator(oem_version="OEMetadata-2.0.4")
+metadata = creator.generate_metadata(dataset, resources)
+```
+
+> `OEMetadataCreator` injects `@context` and `metaMetadata` from the spec and validates the result.
+
+---
+
+## Multi-dataset Assembly
+
+Assemble **N datasets** in one call:
+
+```python
+from omi.creation.assembly import assemble_many_metadata
+
+# Discover by convention (datasets/*.dataset.yaml)
+all_metadata = assemble_many_metadata(base_dir="./metadata")
+
+# From explicit index
+all_metadata = assemble_many_metadata(
+    base_dir="./metadata", index_file="./metadata/metadata_index.yaml"
+)
+
+# Subset
+some = assemble_many_metadata(base_dir="./metadata", dataset_ids=["powerplants", "households"])
+```
+
+Result is a dict `{dataset_id: metadata}` by default.
+
+---
+
+## Spec-Driven Output Ordering
+
+For human-friendly JSON key order without hard-coded lists, order by the **official example** (fallback: schema `properties`):
+
+```python
+from omi.creation.assembly import assemble_metadata_dict
+from omi.creation.creator import OEMetadataCreator
+from omi.creation.utils import order_with_spec
+
+creator = OEMetadataCreator(oem_version="OEMetadata-2.0.4")
+metadata = assemble_metadata_dict("./metadata", "powerplants")
+
+ordered = order_with_spec(metadata, creator.oem_spec)  # uses spec.example and schema
+```
+
+Write with preserved unicode:
+
+```python
+import json, pathlib
+out = pathlib.Path("./out/powerplants.json")
+out.parent.mkdir(parents=True, exist_ok=True)
+out.write_text(json.dumps(ordered, indent=2, ensure_ascii=False), encoding="utf-8")
+```
+
+---
+
+## Project Initialization (Scaffolding)
+
+Create a metadata skeleton **from the spec** (no inline templates):
+
+```python
+from omi.creation.scaffold import init_skeleton_from_spec
+
+paths = init_skeleton_from_spec(
+    base_dir="./metadata",
+    dataset_id="powerplants",
+    oem_version="OEMetadata-2.0.4",
+    resource_name="oemetadata_table",
+    with_index=True,   # creates metadata_index.yaml
+    force=False,       # do not overwrite
+)
+```
+
+This imports the spec via:
+
+```python
+from omi.base import get_metadata_specification
+```
+
+…and derives:
+
+* `datasets/<id>.dataset.yaml` (with version from spec)
+* `datasets/<id>.template.yaml` (from `oem_spec.template` or pruned example resource)
+* `resources/<id>/sample.resource.yaml` (sanitized from example)
+* optional `metadata_index.yaml`
+
+You can expose a CLI command `omi init` that wraps `init_skeleton_from_spec`.
+
+---
+
+## Airflow Integration Example
+
+```python
+from omi.creation.assembly import assemble_metadata_dict
+
+def build_oemetadata_for_powerplants(**context):
+    md = assemble_metadata_dict(
+        base_dir="/opt/airflow/dags/metadata",
+        dataset_id="powerplants",
+        index_file="/opt/airflow/dags/metadata/metadata_index.yaml",
+    )
+    context["ti"].xcom_push(key="oemetadata", value=md)
+```
+
+---
+
+## Testing
+
+* **Assembly test** (uses a fake creator): see `tests/test_assembly.py` example in this doc.
+* **Utils tests** (I/O, discovery, merging): see `tests/test_creation_utils.py`.
+  It covers:
+
+  * `load_parts` (template application)
+  * `_merge_lists`, `deep_apply_template_to_resource`, `apply_template_to_resources`
+  * `load_yaml`
+  * `discover_paths`, `resolve_from_index`, `load_parts`
+  * `discover_dataset_ids`, `discover_dataset_ids_from_index`
+
+Run:
+
+```bash
+pytest -q
+```
+
+---
+
+## Validation & Error Handling
+
+`OEMetadataCreator.generate_metadata()` validates with the official schema:
+
+```python
+from omi.validation import ValidationError
+
+try:
+    metadata = assemble_metadata_dict("./metadata", "powerplants")
+except ValidationError as e:
+    print("Validation failed:", e)
+```
+
+**Common causes**:
+
+* Missing required field keys (e.g., a schema field without `"nullable"`).
+* Wrong types (e.g., non-URI where `format: uri` is required).
+* Invalid list shapes (e.g., `primaryKey`, `foreignKeys`).
+
+---
+
+## Auto-Generation From Directory (Optional Onboarding)
+
+You can bootstrap YAMLs from a directory or zip:
+
+* infer resources from file names/extensions
+* for CSV, infer a table schema
+* emit dataset YAML + one resource YAML per file
+
+Use filters to skip temp/log/backup files (see next section).
+
+---
+
+## Filtering Irrelevant Files (Optional)
+
+When scanning directories, exclude noise such as backup and editor artifacts:
+
+```python
+exclude_extensions = {".log", ".tmp", ".bak", ".DS_Store", ".md"}
+exclude_patterns   = {"*_backup.*", "*~", "*.old", "*.ignore"}
+exclude_hidden     = True
+```
+
+---
+
+## Design Notes & Extensibility
+
+* **Separation of concerns**:
+
+  * `utils`: YAML loading, discovery, deep merge, ordering by spec.
+  * `assembly`: Orchestrates load → merge → create → (optionally) order.
+  * `creator`: Pulls spec via `get_metadata_specification`, injects `@context` and `metaMetadata`, validates.
+  * `scaffold`: Initializes a project from the **spec/example** (no inline strings).
+* **Storage-agnostic**: assembly returns a dict; saving is up to you.
+* **Configurable merging**: tweak `DEFAULT_CONCAT_LIST_KEYS` to change list concat behavior.
+
+---
+
+## FAQ
+
+**Q: Can resource YAML override template-provided `licenses`?**
+A: Yes. By default, resource lists override template lists except for `keywords`, `topics`, `languages` (which concatenate). Add `"licenses"` to `DEFAULT_CONCAT_LIST_KEYS` if you want concatenation.
+
+**Q: Where do `@context` and `metaMetadata` come from?**
+A: `OEMetadataCreator` loads the spec (`get_metadata_specification(oem_version)`) and injects both before validation.
+
+**Q: Why does JSON show `\u00a9` instead of `©`?**
+A: Use `ensure_ascii=False` in `json.dump` to preserve unicode characters.
+
+**Q: I got a validation error: `'nullable' is a required property`.**
+A: Ensure each `schema.fields[]` has **`name`**, **`type`**, **`nullable`**. If you auto-generate, set `nullable: false` unless you detect nulls.
+
+**Q: Can I reorder output keys to match the official example?**
+A: Yes. Use `order_with_spec(metadata, creator.oem_spec)` for spec-driven ordering (no hard-coded key lists).
+
+**Q: Can I manage multiple datasets in one metadata module?**
+A: Yes. Use `assemble_many_metadata(...)` to discover/assemble **N datasets** at once (by convention or index).
diff --git a/src/omi/creation/__init__.py b/src/omi/creation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/omi/creation/assembler.py b/src/omi/creation/assembler.py
new file mode 100644
index 00000000..edaa4318
--- /dev/null
+++ b/src/omi/creation/assembler.py
@@ -0,0 +1,96 @@
+"""Assemble OEMetadata dictionary from parts: dataset, template, and resources."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Optional, Union
+
+from .creator import OEMetadataCreator
+from .utils import (
+    apply_template_to_resources,
+    discover_dataset_ids,
+    discover_dataset_ids_from_index,
+    load_parts,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+
+def assemble_metadata_dict(
+    base_dir: Union[str, Path],
+    dataset_id: str,
+    index_file: Optional[Union[str, Path]] = None,
+) -> dict[str, Any]:
+    """
+    Load dataset/template/resources; apply template; validate via creator; return dict.
+
+    Parameters
+    ----------
+    base_dir: Union[str, Path]
+        Base directory containing datasets, templates, and resources.
+    dataset_id: str
+        Identifier for the dataset to load.
+    index_file: Optional[Union[str, Path]]
+        Optional path to an index YAML file for resolving dataset parts.
+
+    Returns
+    -------
+    Dict[str, Any]
+        The assembled and validated OEMetadata dictionary.
+    """
+    version, dataset, resources, template = load_parts(base_dir, dataset_id, index_file)
+    merged_resources = apply_template_to_resources(resources, template)
+    creator = OEMetadataCreator(oem_version=version)
+    return creator.generate_metadata(dataset, merged_resources)
+
+
+def assemble_many_metadata(
+    base_dir: Union[str, Path],
+    dataset_ids: Optional[Iterable[str]] = None,
+    index_file: Optional[Union[str, Path]] = None,
+    *,
+    as_dict: bool = True,
+) -> Union[dict[str, dict], list[tuple[str, dict]]]:
+    """
+    Assemble OEMetadata for multiple datasets in one call.
+
+    - If dataset_ids is None:
+        * when index_file is provided -> use keys from index
+        * otherwise -> discover by 'datasets/*.dataset.yaml'
+    - Returns a mapping {dataset_id: metadata} if as_dict=True,
+      else a list of (dataset_id, metadata) pairs in sorted id order.
+
+    Parameters
+    ----------
+    base_dir : Union[str, Path]
+        Base directory containing datasets, templates, and resources.
+    dataset_ids : Optional[Iterable[str]], optional
+        Optional iterable of dataset IDs to assemble. If None, all datasets found
+        in base_dir will be assembled, by default None.
+    index_file : Optional[Union[str, Path]], optional
+        Optional path to an index YAML file for resolving dataset parts.
+    as_dict : bool, optional
+        Whether to return results as a dict mapping dataset_id to metadata. If False,
+        returns a list of (dataset_id, metadata) tuples, by default True.
+
+    Returns
+    -------
+    Union[dict[str, dict], list[tuple[str, dict]]]
+        Assembled OEMetadata for each dataset.
+    """
+    base = Path(base_dir)
+
+    if dataset_ids is None:
+        ids = discover_dataset_ids_from_index(index_file) if index_file else discover_dataset_ids(base)
+    else:
+        ids = list(dataset_ids)
+
+    results_pairs: list[tuple[str, dict]] = []
+    for ds_id in ids:
+        md = assemble_metadata_dict(base, ds_id, index_file=index_file)
+        results_pairs.append((ds_id, md))
+
+    if as_dict:
+        return dict(results_pairs)
+    return results_pairs
diff --git a/src/omi/creation/creator.py b/src/omi/creation/creator.py
new file mode 100644
index 00000000..9d93b2b3
--- /dev/null
+++ b/src/omi/creation/creator.py
@@ -0,0 +1,68 @@
+"""Create OEMetadata JSON datapackage structure and return or store it."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from omi.base import get_metadata_specification
+from omi.validation import validate_metadata
+
+
+class OEMetadataCreator:
+    """
+    Create OEMetadata JSON datapackages.
+
+    Output is based on dataset and resource descriptions and validated against
+    the official schema.
+    """
+
+    def __init__(self, oem_version: str = "OEMetadata-2.0") -> None:
+        """Initialize the creator with a specific OEMetadata version."""
+        self.oem_spec = get_metadata_specification(oem_version)
+
+    def generate_metadata(self, dataset: dict, resources: list[dict]) -> dict:
+        """Generate OEMetadata JSON datapackage from dataset and resources."""
+        metadata = {
+            "@context": self.oem_spec.schema["properties"]["@context"]["examples"][0],
+            **dataset,
+            "resources": resources,
+            "metaMetadata": self.oem_spec.example["metaMetadata"],
+        }
+
+        validate_metadata(metadata, check_license=False)
+        return metadata
+
+    def save(
+        self,
+        dataset: dict,
+        resources: list[dict],
+        output_file: Path | str,
+        **dump_kwargs,
+    ) -> None:
+        """
+        Generate OEMetadata and save it to a JSON file.
+
+        Parameters
+        ----------
+        dataset : dict
+            Dataset metadata.
+        resources : list[dict]
+            List of resource metadata entries.
+        output_file : Path | str
+            Path to the output JSON file.
+        **dump_kwargs :
+            Extra kwargs forwarded to `json.dump`. Defaults applied here:
+            - indent: 2
+            - ensure_ascii: False
+        """
+        metadata = self.generate_metadata(dataset, resources)
+
+        # Defaults, can be overridden by caller via **dump_kwargs
+        indent = dump_kwargs.pop("indent", 2)
+        ensure_ascii = dump_kwargs.pop("ensure_ascii", False)
+
+        with Path(output_file).open("w", encoding="utf-8") as f:
+            json.dump(metadata, f, indent=indent, ensure_ascii=ensure_ascii, **dump_kwargs)
+
+        print(f"OEMetadata written to {output_file}")  # noqa: T201
diff --git a/src/omi/creation/generator.py b/src/omi/creation/generator.py
new file mode 100644
index 00000000..a141e9ad
--- /dev/null
+++ b/src/omi/creation/generator.py
@@ -0,0 +1,205 @@
+"""
+Generate an OEMetadata configuration file.
+
+Module for generating metadata files from resources like directories or zip files.
+This used to get started from scratch - init metadata.
+"""
+
+import fnmatch
+import zipfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Union
+
+import yaml
+
+from omi.inspection import infer_metadata
+
+
+@dataclass
+class FileFilterOptions:
+    """
+    Options for filtering files when reading directories or zip files.
+
+    Attributes
+    ----------
+    exclude_extensions: list[str] | None
+        List of file extensions to exclude (e.g., ['.log', '.tmp']).
+    exclude_patterns: list[str] | None
+        List of filename patterns to exclude (e.g., ['*_backup.*', '*.bak']).
+    exclude_hidden: bool
+    Whether to exclude hidden files/directories (default True).
+    """
+
+    exclude_extensions: list[str] | None = None
+    exclude_patterns: list[str] | None = None
+    exclude_hidden: bool = True
+
+
+def read_directory(
+    directory: Union[str, Path],
+    filter_opts: FileFilterOptions,
+) -> list[Path]:
+    """
+    Recursively read files from the directory, applying optional filters.
+
+    Parameters
+    ----------
+    directory: Union[str, Path]
+        The directory to read files from. Can be a string or a Path object.
+    filter_opts: FileFilterOptions
+        Filtering options including extensions, patterns, and hidden files.
+
+    Returns
+    -------
+    list[Path]
+        A list of Path objects representing the files that match the criteria.
+    """
+    directory = Path(directory)
+
+    exclude_extensions = set(filter_opts.exclude_extensions or [".log", ".tmp", ".bak", ".DS_Store", ".md"])
+    exclude_patterns = filter_opts.exclude_patterns or ["*_backup.*", "*~", "*.old", "*.ignore"]
+
+    valid_files = []
+    for file_path in directory.rglob("*"):
+        if not file_path.is_file():
+            continue
+
+        if filter_opts.exclude_hidden and any(part.startswith(".") for part in file_path.parts):
+            continue
+
+        if file_path.suffix in exclude_extensions:
+            continue
+
+        if any(fnmatch.fnmatch(file_path.name, pattern) for pattern in exclude_patterns):
+            continue
+
+        valid_files.append(file_path)
+
+    return valid_files
+
+
+def read_zipfile(
+    zip_path: Union[str, Path],
+    extract_to: Union[str, Path],
+    filter_opts: FileFilterOptions,
+) -> list[Path]:
+    """Extract a zip file and return list of extracted files."""
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
+        zip_ref.extractall(extract_to)
+    return read_directory(extract_to, filter_opts)
+
+
+def infer_file_metadata(file_path: Path) -> dict:
+    """
+    Infer basic resource metadata from file name and type.
+
+    Parameters
+    ----------
+    file_path: Path
+        Path to the file for which metadata should be inferred.
+
+    Returns
+    -------
+    dict
+        A dictionary containing inferred metadata for the resource.
+    """
+    file_name = file_path.stem
+    file_format = file_path.suffix.replace(".", "").upper()
+
+    resource = {
+        "name": file_name.lower().replace(" ", "_"),
+        "title": file_name.replace("_", " ").title(),
+        "path": file_path.as_posix(),
+        "description": f"Auto-generated description for {file_name}",
+        "type": "table" if file_format in ["CSV", "XLSX", "JSON"] else "file",
+        "format": file_format,
+        "encoding": "UTF-8",
+    }
+
+    if file_format == "CSV":
+        with file_path.open("r") as f:
+            fields = infer_metadata(f, "OEP")["resources"][0]["schema"]
+
+        resource["schema"] = fields
+        resource["dialect"] = {"delimiter": fields.get("delimiter", ","), "decimalSeparator": "."}
+
+    return resource
+
+
+def generate_oemetadata_yaml_from_datapackage(
+    directory: Union[str, Path],
+    output_yaml: Union[str, Path],
+    dataset_metadata: dict,
+    filter_opts: FileFilterOptions,
+) -> None:
+    """
+    Generate an OEMetadata YAML configuration file based on files in a directory or zipped directory.
+
+    Parameters
+    ----------
+    directory: Union[str, Path]
+        Path to the directory or zip file containing data files.
+    output_yaml: Union[str, Path]
+        Path to the output YAML file.
+    dataset_metadata: dict
+        Metadata for the dataset, including name, title, description, and ID.
+    filter_opts: FileFilterOptions
+        Filtering options for excluding files by extension, pattern, or hidden state.
+    """
+    temp_dir = None
+    directory = Path(directory)
+    if zipfile.is_zipfile(directory):
+        temp_dir = Path("temp_extracted")
+        files = read_zipfile(directory, temp_dir, filter_opts)
+        files = read_directory(temp_dir, filter_opts)  # Apply filtering after extraction
+    else:
+        files = read_directory(directory, filter_opts)
+
+    resources = []
+    for file in files:
+        resource_meta = infer_file_metadata(file)
+
+        resources.append(resource_meta)
+
+    yaml_structure = {
+        "dataset": dataset_metadata,
+        "template": {  # TODO @jh-RLI: This section must be defined by user # noqa: TD003
+            "context": {
+                "title": "Your Project Title",
+                "homepage": "https://yourhomepage.org",
+                "contact": "contact@yourproject.org",
+            },
+        },
+        "resources": resources,
+    }
+
+    with open(output_yaml, "w", encoding="utf-8") as yaml_file:  # noqa: PTH123
+        yaml.dump(yaml_structure, yaml_file, sort_keys=False, allow_unicode=True)
+
+    if temp_dir:
+        import shutil
+
+        shutil.rmtree(temp_dir)
+
+    print(f"YAML configuration generated at: {output_yaml}")  # noqa: T201
+
+
+# Example usage
+if __name__ == "__main__":
+    dataset_metadata_example = {
+        "name": "example_dataset",
+        "title": "Example Dataset",
+        "description": "This dataset was autogenerated from directory content.",
+        "@id": "https://example.org/dataset/example_dataset",
+    }
+
+    generate_oemetadata_yaml_from_datapackage(
+        directory="/home/jh/projekte/SLE/postprocessed/",
+        output_yaml="generated_metadata.yaml",
+        dataset_metadata=dataset_metadata_example,
+        filter_opts=FileFilterOptions(
+            exclude_patterns=[".snake*"],
+            exclude_hidden=True,
+        ),
+    )
diff --git a/src/omi/creation/init.py b/src/omi/creation/init.py
new file mode 100644
index 00000000..8be170ff
--- /dev/null
+++ b/src/omi/creation/init.py
@@ -0,0 +1,229 @@
+"""
+Initialization helpers for OEMetadata split-files layout.
+
+Provides functions to scaffold dataset and resource YAML files and to
+infer resource information from existing data files.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import yaml
+
+from omi.base import get_metadata_specification
+from omi.inspection import InspectionError, infer_metadata
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+    from pathlib import Path
+
+
+@dataclass
+class InitResult:
+    """Paths to created or reused YAML files for a single dataset."""
+
+    dataset_yaml: Path
+    template_yaml: Path
+    resource_yamls: list[Path]
+
+
+# -----------------------------
+# helpers
+# -----------------------------
+
+
+def _blankify(obj: object) -> object:
+    """
+    Return a copy of `obj` with the same structure but 'empty' leaf values.
+
+    Rules:
+    - dict  -> recursively blankify values
+    - list  -> [] if scalar list; if list of dicts and non-empty, keep one blankified element; else []
+    - str   -> ""
+    - bool  -> False
+    - int/float -> ""   (prefer empty so users must choose proper types)
+    - None  -> None
+    - everything else -> ""
+    """
+    if isinstance(obj, dict):
+        blank: object = {k: _blankify(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        if not obj:
+            blank = []
+        else:
+            first = obj[0]
+            # show one skeleton item so users see the structure for list-of-dicts;
+            # scalar lists -> show empty by default
+            blank = [_blankify(first)] if isinstance(first, dict) else []
+    elif isinstance(obj, str):
+        blank = ""
+    elif isinstance(obj, bool):
+        blank = False
+    elif obj is None:
+        blank = None
+    else:
+        # numbers / other scalars -> empty
+        blank = ""
+    return blank
+
+
+def _load_spec_template(oem_version: str) -> dict:
+    """Return the raw OEMetadata template document for the given version."""
+    spec = get_metadata_specification(oem_version)
+    return spec.template or {}
+
+
+def _dataset_stub_from_spec_template(oem_version: str, dataset_id: str) -> dict:
+    """
+    Build datasets/<id>.dataset.yaml from top-level template (not from resources).
+
+    Remove @context/resources/metaMetadata and blankify the rest.
+    """
+    t = _load_spec_template(oem_version).copy()
+    t.pop("@context", None)
+    t.pop("resources", None)  # <-- filter out resource-level keys
+    t.pop("metaMetadata", None)
+
+    blank = _blankify(t)
+    blank.setdefault("name", dataset_id)
+    blank.setdefault("title", "")
+    blank.setdefault("description", "")
+    blank.setdefault("@id", "")
+
+    return {"version": oem_version, "dataset": blank}
+
+
+def _resource_template_from_spec(oem_version: str) -> dict:
+    """Build datasets/<id>.template.yaml from the *first* resource template only."""
+    tmpl = _load_spec_template(oem_version)
+    resources = tmpl.get("resources") or []
+    base = resources[0] if resources else {}
+    return _blankify(base)
+
+
+def _resource_stub_from_spec(oem_version: str, resource_name: str) -> dict:
+    """Build resources/<id>/<name>.resource.yaml from the resource template."""
+    res = _resource_template_from_spec(oem_version)
+    res["name"] = resource_name
+    return res
+
+
+def _dump_yaml(path: Path, data: dict, *, overwrite: bool) -> Path:
+    """Write `data` as YAML to `path`, respecting the `overwrite` flag."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if path.exists() and not overwrite:
+        return path
+    path.write_text(
+        yaml.safe_dump(data, sort_keys=False, allow_unicode=True),
+        encoding="utf-8",
+    )
+    return path
+
+
+# -----------------------------
+# public API
+# -----------------------------
+
+
+def init_dataset(
+    base_dir: Path,
+    dataset_id: str,
+    *,
+    oem_version: str = "OEMetadata-2.0",
+    resources: Iterable[str] = (),
+    overwrite: bool = False,
+) -> InitResult:
+    """
+    Create or extend the split-files layout for one dataset.
+
+    Creates:
+
+    - datasets/<id>.dataset.yaml
+    - datasets/<id>.template.yaml
+    - resources/<id>/<resource>.resource.yaml for each requested resource.
+    """
+    # touch spec (also ensures the version string is valid)
+    _ = get_metadata_specification(oem_version)
+
+    dataset_yaml = base_dir / "datasets" / f"{dataset_id}.dataset.yaml"
+    template_yaml = base_dir / "datasets" / f"{dataset_id}.template.yaml"
+
+    dataset_doc = _dataset_stub_from_spec_template(oem_version, dataset_id)
+    resource_template_doc = _resource_template_from_spec(oem_version)
+
+    out_dataset = _dump_yaml(dataset_yaml, dataset_doc, overwrite=overwrite)
+    out_template = _dump_yaml(template_yaml, resource_template_doc, overwrite=overwrite)
+
+    created_resources: list[Path] = []
+    for res_name in resources:
+        res_doc = _resource_stub_from_spec(oem_version, res_name)
+        res_path = base_dir / "resources" / dataset_id / f"{res_name}.resource.yaml"
+        created_resources.append(_dump_yaml(res_path, res_doc, overwrite=overwrite))
+
+    return InitResult(dataset_yaml=out_dataset, template_yaml=out_template, resource_yamls=created_resources)
+
+
+def init_resources_from_files(
+    base_dir: Path,
+    dataset_id: str,
+    files: Iterable[Path],
+    *,
+    oem_version: str = "OEMetadata-2.0.4",
+    overwrite: bool = False,
+) -> list[Path]:
+    """
+    Create resource stubs for DATASET_ID from the given FILES.
+
+    Uses the spec resource template structure, prefills name/path/format hints,
+    and for CSV files also infers a schema (fields + types) using `omi.inspection`.
+    """
+    _ = get_metadata_specification(oem_version)
+
+    outputs: list[Path] = []
+    for f in files:
+        name = f.stem
+        ext = f.suffix.lower().lstrip(".")
+        res = _resource_stub_from_spec(oem_version, name)
+        res["path"] = str(f)
+
+        # Lightweight format hinting (non-authoritative; user should review)
+        if ext == "csv":
+            res.setdefault("format", "CSV")
+            res.setdefault("encoding", "UTF-8")
+            res.setdefault("scheme", "file")
+
+            # Use existing inspection: "OEP" == OEMetadata in this code base
+            try:
+                inferred = infer_metadata(str(f), metadata_format="OEP")
+            except InspectionError:
+                inferred = None
+
+            if inferred is not None:
+                # We only care about the *resource* part here
+                try:
+                    inferred_resource = inferred["resources"][0]
+                    inferred_schema = inferred_resource.get("schema")
+                except (KeyError, IndexError, TypeError):
+                    inferred_schema = None
+
+                if inferred_schema:
+                    # Overwrite/attach the schema from inspection to this resource stub
+                    res["schema"] = inferred_schema
+
+        elif ext == "json":
+            res.setdefault("format", "json")
+            res.setdefault("scheme", "file")
+        elif ext == "xlsx":
+            res.setdefault("format", "xlsx")
+            res.setdefault("scheme", "file")
+        else:
+            if ext:
+                res.setdefault("format", ext)
+            res.setdefault("scheme", "file")
+
+        out_path = base_dir / "resources" / dataset_id / f"{name}.resource.yaml"
+        outputs.append(_dump_yaml(out_path, res, overwrite=overwrite))
+
+    return outputs
diff --git a/src/omi/creation/utils.py b/src/omi/creation/utils.py
new file mode 100644
index 00000000..fa591863
--- /dev/null
+++ b/src/omi/creation/utils.py
@@ -0,0 +1,295 @@
+"""
+Utility functions for the OMI creation module.
+
+This module provides deep-merge templating, YAML IO, and discovery helpers
+for assembling OEMetadata from split YAML files (dataset/template/resources).
+"""
+
+from __future__ import annotations
+
+from copy import deepcopy
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional, Union
+
+import yaml
+
+if TYPE_CHECKING:
+    from collections.abc import Hashable
+
+# --- deep merge helpers -------------------------------------------------------
+
+# List keys we concatenate (resource + template) instead of replacing.
+DEFAULT_CONCAT_LIST_KEYS = {"keywords", "topics", "languages"}
+
+
+def _hashable_key(x: object) -> Hashable | tuple:
+    """
+    Return a hashable representation of `x` for deduplication purposes.
+
+    - dict  -> sorted tuple of (key, value) pairs
+    - list  -> tuple(list)
+    - other -> value itself
+    """
+    if isinstance(x, dict):
+        return tuple(sorted(x.items()))
+    if isinstance(x, list):
+        return tuple(x)
+    return x  # type: ignore[return-value]
+
+
+def _merge_lists(
+    template_list: list[object],
+    resource_list: list[object],
+    *,
+    deduplicate: bool = True,
+) -> list[object]:
+    """
+    Concatenate lists with resource-first priority.
+
+    When `deduplicate` is True, only items that are not already present in
+    `resource_list` (by hashable representation) are appended from `template_list`.
+    """
+    merged = list(resource_list)
+    if not template_list:
+        return merged
+
+    if deduplicate:
+        existing = {_hashable_key(v) for v in merged}
+        for item in template_list:
+            k = _hashable_key(item)
+            if k not in existing:
+                merged.append(item)
+    else:
+        merged.extend(template_list)
+    return merged
+
+
+def deep_apply_template_to_resource(
+    resource: dict[str, object],
+    template: dict[str, object],
+    concat_list_keys: Union[tuple[str, ...], set[str]] = DEFAULT_CONCAT_LIST_KEYS,
+) -> dict[str, object]:
+    """
+    Apply a resource template using deep-merge semantics.
+
+    Rules:
+    - Missing keys are copied from the template.
+    - Dicts are deep-merged (resource wins on conflicts).
+    - Lists are concatenated only for keys in `concat_list_keys`; otherwise, the
+      resource list is preserved as-is.
+    - Scalars: resource values win.
+    """
+    if not template:
+        return resource
+
+    result = deepcopy(resource)
+    for key, tval in template.items():
+        if key not in result:
+            result[key] = deepcopy(tval)
+            continue
+
+        rval = result[key]
+        if isinstance(rval, dict) and isinstance(tval, dict):
+            result[key] = deep_apply_template_to_resource(rval, tval, concat_list_keys)
+            continue
+
+        if isinstance(rval, list) and isinstance(tval, list):
+            if key in concat_list_keys:
+                result[key] = _merge_lists(tval, rval, deduplicate=True)
+            # else: resource list stays as-is
+            continue
+        # scalar: resource value stays
+    return result
+
+
+def apply_template_to_resources(
+    resources: list[dict[str, object]],
+    template: dict[str, object],
+) -> list[dict[str, object]]:
+    """Apply the same `template` to each resource in `resources`."""
+    if not template:
+        return resources
+    return [deep_apply_template_to_resource(r, template) for r in resources]
+
+
+# --- YAML IO + discovery ------------------------------------------------------
+
+
+def load_yaml(path: Union[str, Path]) -> dict[str, object]:
+    """Load a YAML mapping from `path`, returning an empty dict for empty files."""
+    with Path(path).open("r", encoding="utf-8") as f:
+        return yaml.safe_load(f) or {}
+
+
+def discover_paths(
+    base_dir: Union[str, Path],
+    dataset_id: str,
+) -> tuple[Optional[Path], Optional[Path], list[Path]]:
+    """
+    Discover dataset/template/resources paths by convention.
+
+    - dataset:   datasets/{dataset_id}.dataset.yaml
+    - template:  datasets/{dataset_id}.template.yaml  (optional)
+    - resources: resources/{dataset_id}/*.resource.yaml
+    """
+    base = Path(base_dir)
+    dataset_path = base / "datasets" / f"{dataset_id}.dataset.yaml"
+    template_path = base / "datasets" / f"{dataset_id}.template.yaml"
+    resources_dir = base / "resources" / dataset_id
+
+    dataset = dataset_path if dataset_path.exists() else None
+    template = template_path if template_path.exists() else None
+    resources = sorted(resources_dir.glob("*.resource.yaml")) if resources_dir.exists() else []
+    return dataset, template, resources
+
+
+def resolve_from_index(
+    base_dir: Union[str, Path],
+    dataset_id: str,
+    index_file: Optional[Union[str, Path]],
+) -> tuple[Optional[Path], Optional[Path], list[Path]]:
+    """
+    Resolve dataset/template/resources using an explicit index YAML.
+
+    Example YAML:
+
+        datasets:
+          <dataset_id>:
+            dataset: path/to/dataset.yaml
+            template: path/to/template.yaml   # optional
+            resources:
+              - path/to/res1.yaml
+              - path/to/res2.yaml
+
+    Paths are interpreted as relative to `base_dir`.
+
+    Parameters
+    ----------
+    base_dir : Union[str, Path]
+        Base directory containing datasets, templates, and resources.
+    dataset_id : str
+        Identifier for the dataset to load.
+    index_file : Optional[Union[str, Path]]
+        Optional path to an index YAML file for resolving dataset parts.
+
+    Returns
+    -------
+    tuple[Optional[Path], Optional[Path], list[Path]]
+        A tuple containing:
+        - dataset_path: Optional[Path]
+            Path to the dataset YAML (or None if not found).
+        - template_path: Optional[Path]
+            Path to the template YAML (or None if not found).
+        - resource_paths: list[Path]
+            List of paths to resource YAMLs.
+    """
+    if not index_file:
+        return discover_paths(base_dir, dataset_id)
+
+    base = Path(base_dir)
+    index_path = Path(index_file)
+    index = load_yaml(index_path)
+    entry = (index.get("datasets") or {}).get(dataset_id, {})
+    dataset = base / entry["dataset"] if "dataset" in entry else None
+    template = base / entry["template"] if "template" in entry else None
+    resources = [base / p for p in entry.get("resources", [])]
+    return dataset, template, resources
+
+
+def load_parts(
+    base_dir: Union[str, Path],
+    dataset_id: str,
+    index_file: Optional[Union[str, Path]] = None,
+) -> tuple[str, dict[str, object], list[dict[str, object]], dict[str, object]]:
+    """
+    Load dataset YAML, optional template YAML, and all resource YAMLs.
+
+    Returns a tuple: (version, dataset, resources, template).
+
+    Parameters
+    ----------
+    base_dir : Union[str, Path]
+        Base directory containing datasets, templates, and resources.
+    dataset_id : str
+        Identifier for the dataset to load.
+    index_file : Optional[Union[str, Path]], optional
+        Optional path to an index YAML file for resolving dataset parts,
+        by default None.
+
+    Returns
+    -------
+    tuple[str, dict[str, object], list[dict[str, object]], dict[str, object]]
+        A tuple containing:
+        - version: str
+            The OEMetadata version from the dataset YAML (default "OEMetadata-2.0.4").
+        - dataset: dict[str, object]
+            The dataset mapping from the dataset YAML.
+        - resources: list[dict[str, object]]
+            A list of resource mappings from the resource YAMLs.
+        - template: dict[str, object]
+            The template mapping from the template YAML (empty dict if none).
+    """
+    dataset_path, template_path, resource_paths = resolve_from_index(base_dir, dataset_id, index_file)
+
+    if dataset_path is None or not dataset_path.exists():
+        raise FileNotFoundError(f"Dataset YAML not found for '{dataset_id}'")
+
+    dataset_yaml = load_yaml(dataset_path)
+    version = str(dataset_yaml.get("version", "OEMetadata-2.0.4"))
+    # Support either dataset: {...} or flat style with top-level dataset keys.
+    dataset = dataset_yaml.get("dataset", dataset_yaml)
+
+    template: dict[str, object] = {}
+    if template_path and template_path.exists():
+        template = load_yaml(template_path)
+
+    resources: list[dict[str, object]] = [load_yaml(p) for p in resource_paths]
+    return version, dataset, resources, template
+
+
+def discover_dataset_ids(base_dir: Union[str, Path]) -> list[str]:
+    """
+    Discover dataset ids by scanning datasets/*.dataset.yaml.
+
+    For 'datasets/powerplants.dataset.yaml' returns 'powerplants'.
+
+    Parameters
+    ----------
+    base_dir : Union[str, Path]
+        Base directory containing datasets, templates, and resources.
+
+    Returns
+    -------
+    list[str]
+        Sorted list of discovered dataset IDs.
+    """
+    base = Path(base_dir)
+    datasets_dir = base / "datasets"
+    if not datasets_dir.exists():
+        return []
+    return sorted([p.stem.replace(".dataset", "") for p in datasets_dir.glob("*.dataset.yaml")])
+
+
+def discover_dataset_ids_from_index(index_file: Union[str, Path]) -> list[str]:
+    """
+    Discover dataset ids from an explicit metadata_index.yaml.
+
+    Returns the sorted list of top-level keys under `datasets`.
+
+    Parameters
+    ----------
+    index_file : Union[str, Path]
+        Path to an index YAML file for resolving dataset parts.
+
+    Returns
+    -------
+    list[str]
+        Sorted list of discovered dataset IDs.
+    """
+    idx_path = Path(index_file)
+    if not idx_path.exists():
+        return []
+    with idx_path.open("r", encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+    ds = data.get("datasets") or {}
+    return sorted(ds.keys())
diff --git a/src/omi/inspection.py b/src/omi/inspection.py
index f7b4dd52..e7b2b526 100644
--- a/src/omi/inspection.py
+++ b/src/omi/inspection.py
@@ -1,6 +1,7 @@
 """Module to inspect data and create metadata from it."""
 
 from collections.abc import Callable
+from copy import deepcopy
 from typing import Any
 
 from frictionless import Detector, Dialect, Resource
@@ -121,7 +122,9 @@ def convert_field(field: dict[str, str]) -> dict[str, str]:
                 return {"name": field["name"], "type": f"array {type_mapping[item_type]}"}
             # All arrays are empty - so no further subtype can be detected
             return {"name": field["name"], "type": "array"}
-        return field
+        oem_field = deepcopy(metadata["resources"][0]["schema"]["fields"][0])
+        oem_field.update(field)
+        return oem_field
 
     rows = resource.read_rows()
     fields = [convert_field(field) for field in fields]
diff --git a/tests/test_assembler.py b/tests/test_assembler.py
new file mode 100644
index 00000000..318c73e1
--- /dev/null
+++ b/tests/test_assembler.py
@@ -0,0 +1,316 @@
+"""
+Assembly integration tests for split-files OEMetadata authoring.
+
+This module exercises the public assembler entry point by building a small
+on-disk YAML tree, applying a template, and verifying the merged OEMetadata.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import yaml
+
+# We test the public assembler entry point
+from omi.creation.assembler import assemble_many_metadata, assemble_metadata_dict
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    import pytest
+
+
+# ---------- helpers ----------
+
+
+def write_yaml(p: Path, data: object) -> None:
+    """Write `data` (any YAML-serializable object) to path `p`."""
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_text(
+        yaml.safe_dump(data, sort_keys=False, allow_unicode=True),
+        encoding="utf-8",
+    )
+
+
+class FakeCreator:
+    """
+    Minimal stand-in for OEMetadataCreator used via monkeypatching.
+
+    It mimics `generate_metadata(dataset, resources)` and skips validation.
+    The constructor accepts the OEMetadata version to embed in metaMetadata.
+    """
+
+    def __init__(self, oem_version: str = "OEMetadata-2.0") -> None:
+        """Initialize the fake creator with a specific OEMetadata version."""
+        self.oem_version = oem_version
+
+    def generate_metadata(self, dataset: dict, resources: list[dict]) -> dict:
+        """Return a small OEMetadata-like dict sufficient for assertions."""
+        return {
+            "@context": "https://example.org/context.json",
+            **dataset,
+            "resources": resources,
+            "metaMetadata": {"metadataVersion": self.oem_version},
+        }
+
+
+# ---------- tests ----------
+
+
+def test_assemble_by_convention_with_template_merge(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """
+    Assemble via convention and verify deep merge semantics.
+
+    Asserts:
+    - dataset is loaded from datasets/{id}.dataset.yaml
+    - template is applied deeply (resource wins on conflicts)
+    - keywords are concatenated (resource first, then template-only)
+    - licenses remain resource-provided if present (no concat by default)
+    - creator is invoked and returns a full dict
+    """
+    # dataset
+    write_yaml(
+        tmp_path / "datasets" / "demo.dataset.yaml",
+        {
+            "version": "OEMetadata-2.0.4",
+            "dataset": {"name": "demo", "title": "Demo", "description": "Demo dataset"},
+        },
+    )
+
+    # template
+    write_yaml(
+        tmp_path / "datasets" / "demo.template.yaml",
+        {
+            "context": {"publisher": "OEP", "contact": "a@b"},
+            "keywords": ["k1"],
+            "topics": ["model_draft"],
+            "languages": ["en-GB"],
+            "licenses": [{"name": "L1"}],  # applies only if resource doesn't provide licenses
+        },
+    )
+
+    # resources
+    write_yaml(
+        tmp_path / "resources" / "demo" / "r1.resource.yaml",
+        {
+            "name": "r1",
+            "title": "R1",
+            # overrides nested key, should still inherit contact from template
+            "context": {"publisher": "Other"},
+            # resource provides its own licenses -> should NOT be concatenated by default
+            "licenses": [{"name": "R1-license"}],
+            # own keywords -> should concat with template keywords
+            "keywords": ["r1k"],
+        },
+    )
+    write_yaml(
+        tmp_path / "resources" / "demo" / "r2.resource.yaml",
+        {
+            "name": "r2",
+            "title": "R2",
+            # no licenses provided -> should get template licenses
+        },
+    )
+
+    # Patch the creator used inside assembler to our Fake
+    monkeypatch.setattr("omi.creation.assembler.OEMetadataCreator", FakeCreator)
+
+    md = assemble_metadata_dict(tmp_path, "demo")
+
+    # dataset propagated
+    assert md["name"] == "demo"
+    assert md["title"] == "Demo"
+
+    # resources present
+    assert isinstance(md["resources"], list)
+    assert len(md["resources"]) == 2
+    r1, r2 = md["resources"]
+
+    # deep merge for context: resource wins on conflicts, template fills missing keys
+    assert r1["context"]["publisher"] == "Other"
+    assert r1["context"]["contact"] == "a@b"
+
+    # keywords/topics/languages concatenate (resource first, then template-only)
+    assert r1["keywords"] == ["r1k", "k1"]
+    # topics/languages inherited if missing
+    assert r1["topics"] == ["model_draft"]
+    assert r1["languages"] == ["en-GB"]
+
+    # licenses: resource list wins (no concat by default)
+    assert r1["licenses"] == [{"name": "R1-license"}]
+
+    # r2 inherits licenses from template (since none provided)
+    assert r2["licenses"] == [{"name": "L1"}]
+    # r2 inherits keywords/topics/languages from template
+    assert r2["keywords"] == ["k1"]
+    assert r2["topics"] == ["model_draft"]
+    assert r2["languages"] == ["en-GB"]
+
+    # metaMetadata present from FakeCreator (assembler passes through the version)
+    assert md["metaMetadata"]["metadataVersion"] == "OEMetadata-2.0.4"
+
+
+def test_assemble_with_index_mapping(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Assemble using an explicit metadata_index.yaml mapping."""
+    base = tmp_path
+
+    # index mapping
+    write_yaml(
+        base / "metadata_index.yaml",
+        {
+            "datasets": {
+                "pp": {
+                    "dataset": "datasets/powerplants.dataset.yaml",
+                    "template": "datasets/powerplants.template.yaml",
+                    "resources": [
+                        "resources/powerplants/a.resource.yaml",
+                        "resources/powerplants/b.resource.yaml",
+                    ],
+                },
+            },
+        },
+    )
+
+    write_yaml(
+        base / "datasets" / "powerplants.dataset.yaml",
+        {"dataset": {"name": "pp", "title": "PP"}},
+    )
+    write_yaml(
+        base / "datasets" / "powerplants.template.yaml",
+        {"keywords": ["t-k"]},
+    )
+    write_yaml(
+        base / "resources" / "powerplants" / "a.resource.yaml",
+        {"name": "a", "title": "A", "keywords": ["a-k"]},
+    )
+    write_yaml(
+        base / "resources" / "powerplants" / "b.resource.yaml",
+        {"name": "b", "title": "B"},
+    )
+
+    monkeypatch.setattr("omi.creation.assembler.OEMetadataCreator", FakeCreator)
+
+    # Use the index explicitly
+    md = assemble_metadata_dict(base, "pp", index_file=base / "metadata_index.yaml")
+
+    assert md["name"] == "pp"
+    names = [r["name"] for r in md["resources"]]
+    assert names == ["a", "b"]
+
+    # keywords concatenated for 'a', inherited for 'b'
+    r_a = md["resources"][0]
+    r_b = md["resources"][1]
+    assert r_a["keywords"] == ["a-k", "t-k"]
+    assert r_b["keywords"] == ["t-k"]
+
+
+def test_assemble_many_metadata_convention_as_dict(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Assemble all datasets by convention; expect a dict keyed by dataset id."""
+    # Dataset A
+    write_yaml(
+        tmp_path / "datasets" / "a.dataset.yaml",
+        {"version": "OEMetadata-2.0.4", "dataset": {"name": "a", "title": "A"}},
+    )
+    write_yaml(
+        tmp_path / "resources" / "a" / "r1.resource.yaml",
+        {"name": "r1", "title": "R1"},
+    )
+
+    # Dataset B (with template)
+    write_yaml(
+        tmp_path / "datasets" / "b.dataset.yaml",
+        {"version": "OEMetadata-2.0.4", "dataset": {"name": "b", "title": "B"}},
+    )
+    write_yaml(
+        tmp_path / "datasets" / "b.template.yaml",
+        {"keywords": ["tk"]},
+    )
+    write_yaml(
+        tmp_path / "resources" / "b" / "r2.resource.yaml",
+        {"name": "r2", "title": "R2", "keywords": ["rk"]},
+    )
+
+    # Use the FakeCreator inside the assembler
+    monkeypatch.setattr("omi.creation.assembler.OEMetadataCreator", FakeCreator)
+
+    out = assemble_many_metadata(tmp_path)  # dict[str, dict]
+    # discover_dataset_ids returns sorted ids
+    assert list(out.keys()) == ["a", "b"]
+
+    # Dataset A checks
+    md_a = out["a"]
+    assert md_a["name"] == "a"
+    assert [r["name"] for r in md_a["resources"]] == ["r1"]
+
+    # Dataset B checks (template applied with concat)
+    md_b = out["b"]
+    assert md_b["name"] == "b"
+    r2 = md_b["resources"][0]
+    assert r2["name"] == "r2"
+    assert r2["keywords"] == ["rk", "tk"]
+
+
+def test_assemble_many_metadata_with_index_as_list(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Assemble all datasets declared in index; expect a list of (id, md) pairs sorted by id."""
+    base = tmp_path
+
+    # Index with two datasets (note: keys will be sorted by helper)
+    write_yaml(
+        base / "metadata_index.yaml",
+        {
+            "datasets": {
+                "x": {
+                    "dataset": "datasets/x.dataset.yaml",
+                    "resources": ["resources/x/x1.resource.yaml"],
+                },
+                "y": {
+                    "dataset": "datasets/y.dataset.yaml",
+                    "template": "datasets/y.template.yaml",
+                    "resources": ["resources/y/y1.resource.yaml"],
+                },
+            },
+        },
+    )
+
+    # Dataset x
+    write_yaml(base / "datasets" / "x.dataset.yaml", {"dataset": {"name": "x", "title": "X"}})
+    write_yaml(base / "resources" / "x" / "x1.resource.yaml", {"name": "x1"})
+
+    # Dataset y (with template)
+    write_yaml(base / "datasets" / "y.dataset.yaml", {"dataset": {"name": "y", "title": "Y"}})
+    write_yaml(base / "datasets" / "y.template.yaml", {"keywords": ["t"]})
+    write_yaml(base / "resources" / "y" / "y1.resource.yaml", {"name": "y1", "keywords": ["r"]})
+
+    monkeypatch.setattr("omi.creation.assembler.OEMetadataCreator", FakeCreator)
+
+    pairs = assemble_many_metadata(
+        base,
+        index_file=base / "metadata_index.yaml",
+        as_dict=False,
+    )  # list[tuple[str, dict]]
+
+    # Expect sorted ids: ['x', 'y']
+    ids = [ds_id for ds_id, _ in pairs]
+    assert ids == ["x", "y"]
+
+    md_x = pairs[0][1]
+    md_y = pairs[1][1]
+
+    assert md_x["name"] == "x"
+    assert [r["name"] for r in md_x["resources"]] == ["x1"]
+
+    # Template concat for y
+    r_y1 = md_y["resources"][0]
+    assert r_y1["keywords"] == ["r", "t"]
diff --git a/tests/test_create.py b/tests/test_create.py
new file mode 100644
index 00000000..7fe2dd07
--- /dev/null
+++ b/tests/test_create.py
@@ -0,0 +1,147 @@
+"""
+Integration tests for OEMetadata assembly and entry point using YAML test data.
+
+This test suite consumes the example YAML tree located at:
+tests/test_data/create/metadata/
+and verifies that OMI assembles and writes a valid OEMetadata document.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from omi.create import build_from_yaml
+from omi.creation.assembler import assemble_metadata_dict
+
+if TYPE_CHECKING:
+    import pytest
+
+
+def _fixture_metadata_root() -> Path:
+    """Return the absolute path to tests/test_data/create/metadata."""
+    here = Path(__file__).resolve().parent
+    return here / "test_data" / "create" / "metadata"
+
+
+def test_assemble_metadata_dict_with_fixture() -> None:
+    """Assemble OEMetadata dict from the real fixture and assert key content."""
+    base = _fixture_metadata_root()
+    dataset_id = "powerplants"
+
+    md = assemble_metadata_dict(base, dataset_id)
+
+    # dataset-level checks (from powerplants.dataset.yaml)
+    assert md["name"] == "oep_oemetadata"
+    assert md["title"] == "OEP OEMetadata"
+    assert md["@id"].startswith("https://databus.openenergyplatform.org/")
+
+    # context injected from template if not overridden in resource
+    assert "resources" in md
+    assert isinstance(md["resources"], list)
+    assert md["resources"]
+    r_names = {r["name"] for r in md["resources"]}
+    # Both resources from your example exist
+    assert {"oemetadata_table", "data_2"}.issubset(r_names)
+
+    # Check one resource that should have inherited from template
+    r1 = next(r for r in md["resources"] if r["name"] == "oemetadata_table")
+    assert r1["context"]["title"] == "NFDI4Energy"  # from template
+    assert "licenses" in r1
+    assert isinstance(r1["licenses"], list)
+    assert r1["licenses"]
+    assert r1["licenses"][0]["name"] in {"ODbL-1.0", "ODbL-1.0".upper(), "ODBL-1.0"}
+
+    # Meta metadata is present
+    assert "metaMetadata" in md
+    assert md["metaMetadata"]["metadataVersion"].startswith("OEMetadata-2.0")
+
+
+def test_entrypoint_build_from_yaml_writes_file(tmp_path: Path) -> None:
+    """Use the real entry point to write JSON and compare basic structure."""
+    base = _fixture_metadata_root()
+    out = tmp_path / "out" / "powerplants.json"
+
+    build_from_yaml(base, "powerplants", out)
+
+    assert out.exists(), "Entry point did not write the output file."
+    written = json.loads(out.read_text(encoding="utf-8"))
+
+    # Sanity checks on written JSON
+    assert written["name"] == "oep_oemetadata"
+    assert isinstance(written["resources"], list)
+    assert written["resources"]
+    # Ensure unicode is preserved (© should not be escaped)
+    licenses = written["resources"][0].get("licenses", [])
+    if licenses:
+        # stringify to inspect the character; ensure_ascii=False in writer preserves it
+        text = json.dumps(licenses[0], ensure_ascii=False)
+        assert "©" in text
+
+
+def test_build_from_yaml_writes_file_when_output_is_file(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure build_from_yaml writes to the exact file path provided."""
+    from omi import create as create_mod
+
+    expected: dict[str, object] = {"name": "pp", "resources": []}
+
+    # Avoid needing real YAML on disk
+    def fake_assemble(
+        base_dir: Path,
+        dataset_id: str,
+        index_file: Path | None = None,
+    ) -> dict[str, object]:
+        # use args to avoid ARG001
+        _ = base_dir, index_file
+        assert dataset_id == "powerplants"
+        return expected
+
+    monkeypatch.setattr(create_mod, "assemble_metadata_dict", fake_assemble)
+
+    out = tmp_path / "out.json"
+    create_mod.build_from_yaml(tmp_path / "meta", "powerplants", out)
+
+    assert out.exists()
+    assert json.loads(out.read_text(encoding="utf-8")) == expected
+
+
+def test_build_many_from_yaml_writes_many_default_names(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure build_many_from_yaml writes <dataset_id>.json files into output_dir."""
+    from omi import create as create_mod
+
+    canned: dict[str, dict[str, object]] = {
+        "a": {"name": "a", "resources": []},
+        "b": {"name": "b", "resources": []},
+    }
+
+    def fake_many(
+        base_dir: Path,
+        *,
+        dataset_ids: list[str] | None = None,
+        index_file: Path | None = None,
+        as_dict: bool = True,
+    ) -> dict[str, dict[str, object]]:
+        # Called by build_many_from_yaml; return mapping id -> md
+        _ = base_dir, dataset_ids, index_file  # avoid ARG001
+        assert as_dict is True
+        return canned
+
+    monkeypatch.setattr(create_mod, "assemble_many_metadata", fake_many)
+
+    out_dir = tmp_path / "out"
+    create_mod.build_many_from_yaml(tmp_path / "meta", out_dir)
+
+    a_path = out_dir / "a.json"
+    b_path = out_dir / "b.json"
+    assert a_path.exists()
+    assert b_path.exists()
+
+    assert json.loads(a_path.read_text(encoding="utf-8")) == canned["a"]
+    assert json.loads(b_path.read_text(encoding="utf-8")) == canned["b"]
diff --git a/tests/test_creation_utils.py b/tests/test_creation_utils.py
new file mode 100644
index 00000000..de3c7a67
--- /dev/null
+++ b/tests/test_creation_utils.py
@@ -0,0 +1,202 @@
+"""Unit tests for the OMI creation utils (templating, IO, discovery)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+import yaml
+
+# Functions under test
+from omi.creation.utils import (
+    DEFAULT_CONCAT_LIST_KEYS,
+    _merge_lists,
+    apply_template_to_resources,
+    deep_apply_template_to_resource,
+    discover_dataset_ids,
+    discover_dataset_ids_from_index,
+    discover_paths,
+    load_parts,
+    load_yaml,
+    resolve_from_index,
+)
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+# ---------- helpers ----------
+
+
+def _write_yaml(p: Path, data: object) -> None:
+    """Write a YAML-serializable `data` object to `p`, creating parents."""
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True), encoding="utf-8")
+
+
+# ---------- tests: list merging + deep template ----------
+
+
+def test_merge_lists_deduplicates_and_respects_resource_first() -> None:
+    """`_merge_lists` keeps resource-first order and de-duplicates template items."""
+    resource_list = ["a", "b"]
+    template_list = ["b", "c"]
+    merged = _merge_lists(template_list, resource_list, deduplicate=True)
+    assert merged == ["a", "b", "c"]
+
+
+def test_deep_apply_template_to_resource_concat_for_keywords_topics_languages() -> None:
+    """Default concat keys (keywords/topics/languages) are concatenated; others are not."""
+    resource = {
+        "name": "r",
+        "keywords": ["rk"],
+        "topics": ["rt"],
+        "languages": ["rl"],
+        "context": {"publisher": "R"},
+        "list_no_concat": [1, 2],
+    }
+    template = {
+        "keywords": ["tk"],
+        "topics": ["tt"],
+        "languages": ["tl"],
+        "context": {"publisher": "T", "contact": "a@b"},
+        "list_no_concat": [3, 4],
+    }
+
+    out = deep_apply_template_to_resource(resource, template)
+    # concat lists for default concat keys
+    assert out["keywords"] == ["rk", "tk"]
+    assert out["topics"] == ["rt", "tt"]
+    assert out["languages"] == ["rl", "tl"]
+    # resource list wins for non-concat keys
+    assert out["list_no_concat"] == [1, 2]
+    # deep dict merge: resource wins on conflict, template fills missing
+    assert out["context"]["publisher"] == "R"
+    assert out["context"]["contact"] == "a@b"
+
+
+def test_deep_apply_template_to_resource_custom_concat_keys() -> None:
+    """Custom concat set allows concatenating lists like `licenses`."""
+    resource = {"licenses": [{"name": "R1"}]}
+    template = {"licenses": [{"name": "T1"}]}
+    # By default, 'licenses' is NOT concatenated
+    out_default = deep_apply_template_to_resource(resource, template)
+    assert out_default["licenses"] == [{"name": "R1"}]
+
+    # If we opt-in, it concatenates (resource first, then template-only)
+    custom_keys = set(DEFAULT_CONCAT_LIST_KEYS) | {"licenses"}
+    out_custom = deep_apply_template_to_resource(resource, template, concat_list_keys=custom_keys)
+    assert out_custom["licenses"] == [{"name": "R1"}, {"name": "T1"}]
+
+
+def test_apply_template_to_resources_applies_per_item() -> None:
+    """Template is applied to each resource; concat for `keywords` by default."""
+    resources = [{"name": "a"}, {"name": "b", "keywords": ["bk"]}]
+    template = {"keywords": ["tk"]}
+    out = apply_template_to_resources(resources, template)
+    assert out[0]["keywords"] == ["tk"]  # inherited from template
+    assert out[1]["keywords"] == ["bk", "tk"]  # concatenated: resource first, then template-only
+
+
+# ---------- tests: YAML IO + discovery ----------
+
+
+def test_load_yaml_reads_empty_as_empty_dict(tmp_path: Path) -> None:
+    """Empty YAML file is read as an empty dict."""
+    p = tmp_path / "empty.yaml"
+    p.write_text("", encoding="utf-8")
+    data = load_yaml(p)
+    assert data == {}
+
+
+def test_discover_paths_and_resolve_from_index(tmp_path: Path) -> None:
+    """Discovery by convention and resolution by index both return expected paths."""
+    base = tmp_path
+    # convention files
+    ds = base / "datasets" / "powerplants.dataset.yaml"
+    tp = base / "datasets" / "powerplants.template.yaml"
+    rdir = base / "resources" / "powerplants"
+    r1 = rdir / "a.resource.yaml"
+    r2 = rdir / "b.resource.yaml"
+
+    _write_yaml(ds, {"version": "OEMetadata-2.0.4", "dataset": {"name": "pp"}})
+    _write_yaml(tp, {"keywords": ["k1"]})
+    _write_yaml(r1, {"name": "a"})
+    _write_yaml(r2, {"name": "b"})
+
+    dspath, tpath, rpaths = discover_paths(base, "powerplants")
+    assert dspath == ds
+    assert tpath == tp
+    assert rpaths == [r1, r2]
+
+    # index mapping (deliberately flips resource order)
+    idx = base / "metadata_index.yaml"
+    _write_yaml(
+        idx,
+        {
+            "datasets": {
+                "powerplants": {
+                    "dataset": "datasets/powerplants.dataset.yaml",
+                    "template": "datasets/powerplants.template.yaml",
+                    "resources": [
+                        "resources/powerplants/b.resource.yaml",
+                        "resources/powerplants/a.resource.yaml",
+                    ],
+                },
+            },
+        },
+    )
+    d2, t2, rs2 = resolve_from_index(base, "powerplants", idx)
+    assert d2 == ds
+    assert t2 == tp
+    assert rs2 == [base / "resources/powerplants/b.resource.yaml", base / "resources/powerplants/a.resource.yaml"]
+
+
+def test_load_parts_returns_all_sections(tmp_path: Path) -> None:
+    """`load_parts` returns (version, dataset, resources, template) with expected contents."""
+    base = tmp_path
+    ds = base / "datasets" / "households.dataset.yaml"
+    tp = base / "datasets" / "households.template.yaml"
+    rdir = base / "resources" / "households"
+    r1 = rdir / "hh1.resource.yaml"
+
+    _write_yaml(ds, {"version": "OEMetadata-2.0.4", "dataset": {"name": "households", "title": "HH"}})
+    _write_yaml(tp, {"context": {"publisher": "OEP"}})
+    _write_yaml(r1, {"name": "hh1"})
+
+    version, dataset, resources, template = load_parts(base, "households")
+    assert version == "OEMetadata-2.0.4"
+    assert dataset == {"name": "households", "title": "HH"}
+    assert resources == [{"name": "hh1"}]
+    assert template == {"context": {"publisher": "OEP"}}
+
+
+def test_load_parts_raises_when_dataset_missing(tmp_path: Path) -> None:
+    """`load_parts` raises FileNotFoundError if the dataset YAML is missing."""
+    with pytest.raises(FileNotFoundError):
+        load_parts(tmp_path, "missing")
+
+
+# ---------- tests: dataset id discovery ----------
+
+
+def test_discover_dataset_ids(tmp_path: Path) -> None:
+    """`discover_dataset_ids` finds dataset ids by scanning datasets/*.dataset.yaml."""
+    _write_yaml(tmp_path / "datasets" / "a.dataset.yaml", {"dataset": {"name": "a"}})
+    _write_yaml(tmp_path / "datasets" / "b.dataset.yaml", {"dataset": {"name": "b"}})
+    ids = discover_dataset_ids(tmp_path)
+    assert ids == ["a", "b"]
+
+
+def test_discover_dataset_ids_from_index(tmp_path: Path) -> None:
+    """`discover_dataset_ids_from_index` returns top-level 'datasets' keys in index YAML."""
+    idx = tmp_path / "metadata_index.yaml"
+    _write_yaml(idx, {"datasets": {"x": {}, "y": {}}})
+    ids = discover_dataset_ids_from_index(idx)
+    assert ids == ["x", "y"]
+
+
+def test_discover_dataset_ids_from_index_missing_file(tmp_path: Path) -> None:
+    """Missing index file yields an empty list of dataset ids."""
+    ids = discover_dataset_ids_from_index(tmp_path / "nope.yaml")
+    assert ids == []
diff --git a/tests/test_data/create/metadata/datasets/powerplants.dataset.yaml b/tests/test_data/create/metadata/datasets/powerplants.dataset.yaml
new file mode 100644
index 00000000..38bb43a2
--- /dev/null
+++ b/tests/test_data/create/metadata/datasets/powerplants.dataset.yaml
@@ -0,0 +1,6 @@
+version: "OEMetadata-2.0"
+dataset:
+  name: oep_oemetadata
+  title: OEP OEMetadata
+  description: A dataset for the OEMetadata examples.
+  "@id": https://databus.openenergyplatform.org/oeplatform/supply/wri_global_power_plant_database/
diff --git a/tests/test_data/create/metadata/datasets/powerplants.template.yaml b/tests/test_data/create/metadata/datasets/powerplants.template.yaml
new file mode 100644
index 00000000..1b60853a
--- /dev/null
+++ b/tests/test_data/create/metadata/datasets/powerplants.template.yaml
@@ -0,0 +1,26 @@
+licenses:
+  - name: ODbL-1.0
+    title: Open Data Commons Open Database License 1.0
+    path: https://opendatacommons.org/licenses/odbl/1-0/index.html
+    instruction: >
+      You are free to share and change, but you must attribute, and
+      share derivations under the same license. See https://tldrlegal.com/license/odc-open-database-license-(odbl)
+      for further information.
+    attribution: © Reiner Lemoine Institut
+    copyrightStatement: https://github.com/OpenEnergyPlatform/oemetadata/blob/production/LICENSE.txt
+
+context:
+  title: NFDI4Energy
+  homepage: https://nfdi4energy.uol.de/
+  documentation: https://nfdi4energy.uol.de/sites/about_us/
+  sourceCode: https://github.com/NFDI4Energy
+  publisher: Open Energy Platform (OEP)
+  publisherLogo: https://github.com/OpenEnergyPlatform/organisation/blob/production/logo/OpenEnergyFamily_Logo_OpenEnergyPlatform.svg
+  contact: contact@example.com
+  fundingAgency: " Deutsche Forschungsgemeinschaft (DFG)"
+  fundingAgencyLogo: https://upload.wikimedia.org/wikipedia/commons/8/86/DFG-logo-blau.svg
+  grantNo: "501865131"
+
+topics: [model_draft]
+languages: [en-GB, de-DE]
+keywords: [example, ODbL-1.0, NFDI4Energy]
diff --git a/tests/test_data/create/metadata/resources/powerplants/data_2.resource.yaml b/tests/test_data/create/metadata/resources/powerplants/data_2.resource.yaml
new file mode 100644
index 00000000..a03ee242
--- /dev/null
+++ b/tests/test_data/create/metadata/resources/powerplants/data_2.resource.yaml
@@ -0,0 +1,22 @@
+name: data_2
+type: table
+title: My Second Resource
+
+path: reGon/metadata/data_2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+
+schema:
+  fields:
+    - name: h
+      type: integer
+      nullable: true
+    - name: i
+      type: integer
+      nullable: true
+    - name: o
+      type: string
+      nullable: true
+  primaryKey: [id]
diff --git a/tests/test_data/create/metadata/resources/powerplants/oemetadata_table.resource.yaml b/tests/test_data/create/metadata/resources/powerplants/oemetadata_table.resource.yaml
new file mode 100644
index 00000000..f28b8392
--- /dev/null
+++ b/tests/test_data/create/metadata/resources/powerplants/oemetadata_table.resource.yaml
@@ -0,0 +1,191 @@
+name: oemetadata_table
+type: table
+title: OEMetadata Table Template
+description: Example table used to illustrate the OEMetadata structure and features.
+"@id": https://databus.openenergyplatform.org/oeplatform/supply/wri_global_power_plant_database/2022-11-07/wri_global_power_plant_database_variant=data.csv
+
+# Resource-specific attributes (template will add licenses/context/topics/languages/keywords)
+path: http://openenergyplatform.org/dataedit/view/model_draft/oemetadata_table
+scheme: http
+format: CSV
+encoding: UTF-8
+
+dialect:
+  decimalSeparator: "."
+  delimiter: ";"
+
+schema:
+  fields:
+    - name: id
+      type: integer
+      description: Unique identifier
+      nullable: false
+      unit: null
+      isAbout:
+        - name: identifier
+          "@id": http://purl.obolibrary.org/obo/IAO_0020000
+      valueReference:
+        - value: null
+          name: null
+          "@id": null
+    - name: name
+      type: string
+      description: Technology Name
+      nullable: true
+      unit: null
+      isAbout:
+        - name: power generation technology
+          "@id": http://openenergy-platform.org/ontology/oeo/OEO_00010423
+      valueReference:
+        - value: wind
+          name: wind power technology
+          "@id": http://openenergyplatform.org/ontology/oeo/OEO_00010424
+    - name: type
+      type: string
+      description: Type of wind farm
+      nullable: true
+      unit: null
+      isAbout:
+        - name: wind farm
+          "@id": https://openenergyplatform.org/ontology/oeo/OEO_00000447/
+      valueReference:
+        - value: onshore
+          name: onshore wind farm
+          "@id": https://openenergyplatform.org/ontology/oeo/OEO_00000311/
+        - value: offshore
+          name: offshore wind farm
+          "@id": https://openenergyplatform.org/ontology/oeo/OEO_00000308/
+    - name: year
+      type: integer
+      description: Reference year
+      nullable: true
+      unit: null
+      isAbout:
+        - name: year
+          "@id": https://openenergyplatform.org/ontology/oeo/UO_0000036/
+      valueReference:
+        - value: null
+          name: null
+          "@id": null
+    - name: value
+      type: number
+      description: Bruttoleistung
+      nullable: true
+      unit: MW
+      isAbout:
+        - name: nameplate capacity
+          "@id": https://openenergyplatform.org/ontology/oeo/OEO_00230003/
+      valueReference:
+        - value: null
+          name: null
+          "@id": null
+    - name: is_active
+      type: boolean
+      description: Betriebsstatus
+      nullable: false
+      unit: null
+      isAbout:
+        - name: Operating Mode Status
+          "@id": https://ontology.brickschema.org/brick/Operating_Mode_Status
+      valueReference:
+        - value: null
+          name: null
+          "@id": null
+    - name: version
+      type: integer
+      description: Version
+      nullable: true
+      unit: null
+      isAbout:
+        - name: version number
+          "@id": http://purl.obolibrary.org/obo/IAO_0000129
+      valueReference:
+        - value: null
+          name: null
+          "@id": null
+    - name: comment
+      type: string
+      description: ""
+      nullable: true
+      unit: null
+      isAbout:
+        - name: comment
+          "@id": http://semanticscience.org/resource/SIO_001167
+      valueReference:
+        - value: null
+          name: null
+          "@id": null
+  primaryKey: [id]
+  foreignKeys:
+    - fields: [id, version]
+      reference:
+        resource: model_draft.oep_oemetadata_table_example_version
+        fields: [id, version]
+
+
+sources:
+  - title: IPCC Sixth Assessment Report (AR6) - Climate Change 2023 - Synthesis Report
+    authors: [Hoesung Lee, José Romero, The Core Writing Team]
+    description: A Report of the Intergovernmental Panel on Climate Change.
+    publicationYear: "2023"
+    path: https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_FullVolume.pdf
+    sourceLicenses:
+      - name: CC-BY-4.0
+        title: Creative Commons Attribution 4.0 International
+        path: https://creativecommons.org/licenses/by/4.0/legalcode
+        instruction: >
+          You are free to share and change, but you must attribute.
+          See https://tldrlegal.com/license/odc-open-database-license-odbl for further information.
+        attribution: © Intergovernmental Panel on Climate Change 2023
+        copyrightStatement: https://www.ipcc.ch/copyright/
+
+subject:
+  - name: energy
+    "@id": https://openenergyplatform.org/ontology/oeo/OEO_00000150
+
+publicationDate: "2024-10-15"
+
+# embargoPeriod:
+#   start: "2024-10-11"
+#   end: "2025-01-01"
+#   isActive: true
+
+spatial:
+  location:
+    address: Rudower Chaussee 12, 12489 Berlin
+    "@id": https://www.wikidata.org/wiki/Q77077223
+    latitude: "52.432822"
+    longitude: "13.5351004"
+  extent:
+    name: Berlin
+    "@id": https://www.wikidata.org/wiki/Q64
+    resolutionValue: "100"
+    resolutionUnit: m
+    boundingBox: [13.08825, 52.33859, 13.76104, 52.6754]
+    crs: EPSG:4326
+
+temporal:
+  referenceDate: "2020-01-01"
+  timeseries:
+    - start: "2020-01-01T00:00:00+01:00"
+      end: "2020-01-01T23:59:30+01:00"
+      resolutionValue: "15"
+      resolutionUnit: min
+      alignment: left
+      aggregationType: current
+
+contributors:
+  - title: Ludwig Hülk
+    path: https://github.com/Ludee
+    organization: Reiner Lemoine Institut
+    roles: [DataCollector]
+    date: "2024-11-19"
+    object: data
+    comment: Date of data creation
+  - title: Ludwig Hülk
+    path: https://github.com/Ludee
+    organization: Reiner Lemoine Institut
+    roles: [DataCurator]
+    date: "2024-11-30"
+    object: metadata
+    comment: Date of metadata creation
diff --git a/tests/test_inspection.py b/tests/test_inspection.py
index 8cf504ba..27d1afb1 100644
--- a/tests/test_inspection.py
+++ b/tests/test_inspection.py
@@ -34,3 +34,9 @@ def test_inspection():
     assert metadata["resources"][0]["schema"]["fields"][6]["type"] == "object"
     assert metadata["resources"][0]["schema"]["fields"][7]["type"] == "date"
     assert metadata["resources"][0]["schema"]["fields"][8]["type"] == "boolean"
+
+
+# TODO @jh-RLI: Add test for special cases in csv as e.g. this data will cause issues # noqa: TD003
+# cat objective.csv
+# ;0
+# objective;97356714.15339188
diff --git a/tests/test_metadata_creation.py b/tests/test_metadata_creation.py
new file mode 100644
index 00000000..0387906d
--- /dev/null
+++ b/tests/test_metadata_creation.py
@@ -0,0 +1,141 @@
+"""Test suite for the OEMetadataCreator class in the OMI creation module (split-files layout)."""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+
+import pytest
+import yaml
+
+from omi.creation.creator import OEMetadataCreator
+from omi.creation.utils import apply_template_to_resources, load_parts
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+@pytest.fixture()
+def sample_tree(tmp_path: Path) -> tuple[Path, str]:
+    """
+    Create a split-files metadata tree.
+
+    metadata/
+      datasets/
+        demo.dataset.yaml
+        demo.template.yaml
+      resources/
+        demo/
+          table.resource.yaml
+    """
+    base = tmp_path / "metadata"
+    ds_dir = base / "datasets"
+    rs_dir = base / "resources" / "demo"
+
+    ds_dir.mkdir(parents=True, exist_ok=True)
+    rs_dir.mkdir(parents=True, exist_ok=True)
+
+    # dataset yaml
+    (ds_dir / "demo.dataset.yaml").write_text(
+        yaml.safe_dump(
+            {
+                "version": "OEMetadata-2.0",
+                "dataset": {
+                    "name": "test_dataset",
+                    "title": "Test Dataset",
+                    "description": "For unit testing",
+                    "@id": "https://example.org/test_dataset",
+                },
+            },
+            sort_keys=False,
+            allow_unicode=True,
+        ),
+        encoding="utf-8",
+    )
+
+    # template yaml (applied to every resource)
+    (ds_dir / "demo.template.yaml").write_text(
+        yaml.safe_dump(
+            {
+                "languages": ["en-GB"],
+                "keywords": ["example"],
+                "context": {"publisher": "OEP", "contact": "contact@example.org"},
+            },
+            sort_keys=False,
+            allow_unicode=True,
+        ),
+        encoding="utf-8",
+    )
+
+    # one resource yaml
+    (rs_dir / "table.resource.yaml").write_text(
+        yaml.safe_dump(
+            {
+                "name": "test_resource",
+                "title": "Test Resource",
+                "type": "table",
+                "format": "CSV",
+                "schema": {
+                    "fields": [
+                        {"name": "id", "type": "integer", "nullable": False},
+                    ],
+                    "primaryKey": ["id"],
+                },
+            },
+            sort_keys=False,
+            allow_unicode=True,
+        ),
+        encoding="utf-8",
+    )
+
+    return base, "demo"
+
+
+def test_generate_oemetadata_from_split_files(sample_tree: tuple[Path, str]) -> None:
+    """End-to-end: load parts, apply template, generate metadata via creator."""
+    base_dir, dataset_id = sample_tree
+
+    # Load version/dataset/resources/template from split-files layout
+    version, dataset, resources, template = load_parts(base_dir, dataset_id)
+
+    # Deep-apply template to resources (dicts merge, lists concat for keywords/topics/languages)
+    merged_resources = apply_template_to_resources(resources, template)
+
+    creator = OEMetadataCreator(oem_version=version)
+    result = creator.generate_metadata(dataset, merged_resources)
+
+    # Basic assertions
+    assert result["@context"].startswith("https://")
+    assert result["name"] == "test_dataset"
+    assert "resources" in result
+    assert isinstance(result["resources"], list)
+    assert result["resources"][0]["name"] == "test_resource"
+
+    # Template has been applied deeply (languages concatenated / context merged)
+    r0 = result["resources"][0]
+    assert r0["languages"] == ["en-GB"]
+    assert r0["keywords"] == ["example"]
+    assert r0["context"]["publisher"] == "OEP"
+    assert r0["context"]["contact"] == "contact@example.org"
+
+    # Schema minimally intact
+    assert r0["schema"]["primaryKey"] == ["id"]
+    assert r0["schema"]["fields"][0]["name"] == "id"
+    assert r0["schema"]["fields"][0]["nullable"] is False
+
+
+def test_creator_save_writes_json(sample_tree: tuple[Path, str]) -> None:
+    """Ensure creator.save writes JSON and preserves unicode."""
+    base_dir, dataset_id = sample_tree
+    version, dataset, resources, template = load_parts(base_dir, dataset_id)
+    merged_resources = apply_template_to_resources(resources, template)
+
+    out = base_dir / "out.json"
+    creator = OEMetadataCreator(oem_version=version)
+    creator.save(dataset, merged_resources, out, ensure_ascii=False, indent=2)
+
+    assert out.exists()
+    data = json.loads(out.read_text(encoding="utf-8"))
+    assert data["name"] == "test_dataset"
+    # unicode preserved (no \u escapes because ensure_ascii=False)
+    assert "©" not in out.read_text(encoding="utf-8")  # sanity check; none present here by default
diff --git a/tests/test_metadata_validation.py b/tests/test_metadata_validation.py
index 2a0de492..278f251d 100644
--- a/tests/test_metadata_validation.py
+++ b/tests/test_metadata_validation.py
@@ -110,11 +110,10 @@ def deactivate__test_metadata_against_oep_table():
     validation.validate_oep_table_against_metadata(oep_table=table, oep_schema="model_draft", metadata=metadata)
 
 
-def test_metadata_against_oep_table_using_metadata_from_oep():
-    """Test OEP table definition against OEP metadata, where metadata is taken from OEP."""
-    table = "x2x_p2gas_soec_1"
-    with pytest.raises(validation.ValidationError, match="None is not of type 'object'"):
-        validation.validate_oep_table_against_metadata(oep_table=table, oep_schema="model_draft")
+# Test fails always as tables does not exist in OEP anymore
+# def test_metadata_against_oep_table_using_metadata_from_oep():
+#     """Test OEP table definition against OEP metadata, where metadata is taken from OEP."""
+#     with pytest.raises(validation.ValidationError, match="None is not of type 'object'"):
 
 
 def test_metadata_against_oep_table_invalid_name():
diff --git a/tests/test_metadata_yaml_generation.py b/tests/test_metadata_yaml_generation.py
new file mode 100644
index 00000000..21131af8
--- /dev/null
+++ b/tests/test_metadata_yaml_generation.py
@@ -0,0 +1 @@
+"""Test for metadata yaml generation."""