From 84de502955a8a0ac3ec2be4a88fe15d4a5ab8b64 Mon Sep 17 00:00:00 2001
From: Juha Huiskonen <juha.huiskonen@gmail.com>
Date: Sun, 12 Oct 2025 18:28:40 +0300
Subject: [PATCH 1/3] Support exporting multiple segmentation meshes

---
 docs/Data_Preparation.md                      |  9 ++++---
 src/membrain_pick/cli/convert_meshes_cli.py   | 18 +++++++++++++-
 src/membrain_pick/cli/mesh_conversion_cli.py  | 18 +++++++++++++-
 .../mesh_conversion_wrappers.py               | 13 +++++++++-
 .../mesh_projections/mesh_projection.py       | 24 ++++++++++++++++---
 5 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/docs/Data_Preparation.md b/docs/Data_Preparation.md
index 07793cb..43f7b81 100644
--- a/docs/Data_Preparation.md
+++ b/docs/Data_Preparation.md
@@ -59,8 +59,9 @@ This command will generate mesh representations of the membrane segmentations an
 - `--out-folder` (TEXT): Path to the folder where mesh projections should be stored. [default: ./mesh_data] 
 - `--step-numbers` (INTEGER): Step numbers for the normal vectors. [default: (-10, 10)] 
 - `--step-size` (FLOAT): Step size for the normal vectors. [default: 2.5] 
-- `--mesh-smoothing` (INTEGER): Smoothing factor for the mesh. [default: 1000] 
-- `--barycentric-area` (FLOAT): Barycentric area for the mesh. [default: 400.0] 
+- `--mesh-smoothing` (INTEGER): Smoothing factor for the mesh. [default: 1000]
+- `--barycentric-area` (FLOAT): Barycentric area for the mesh. [default: 400.0]
+- `--max-segmentations` (INTEGER): Maximum number of connected segmentations to convert per file. Use `None` to export all components. [default: None]
 
 Note: options may change faster than these docs. You can check the available options by running `membrain_pick convert_mb_folder`.
 
@@ -76,7 +77,9 @@ This command will generate a mesh representation of the membrane segmentation an
 
 #### More Options
 
-Other options are similar to the `convert_mb_folder` command. You can check the available options by running `membrain-pick convert_file`.
+Other options mirror the `convert_mb_folder` command, including `--max-segmentations`
+to control how many connected segmentations are exported per membrane file. You can
+check the available options by running `membrain-pick convert_file`.
 
 
 ### Outputs
diff --git a/src/membrain_pick/cli/convert_meshes_cli.py b/src/membrain_pick/cli/convert_meshes_cli.py
index 5a1c993..b6c8c4c 100644
--- a/src/membrain_pick/cli/convert_meshes_cli.py
+++ b/src/membrain_pick/cli/convert_meshes_cli.py
@@ -1,5 +1,5 @@
 import os
-from typing import List
+from typing import List, Optional
 
 from typer import Option
 from .cli import OPTION_PROMPT_KWARGS as PKWARGS
@@ -45,6 +45,13 @@ def convert_single_file(
         False,
         help="Should the mesh be generated using PyMeshLab? WARNING: This is highly experimental.",
     ),
+    max_segmentations: Optional[int] = Option(  # noqa: B008
+        None,
+        help=(
+            "Maximum number of connected segmentations to convert to meshes. "
+            "Leave empty or set to None to export all available segmentations."
+        ),
+    ),
 ):
     """Convert a single membrane segmentation to a mesh.
 
@@ -69,6 +76,7 @@ def convert_single_file(
         input_pixel_size=input_pixel_size,
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
+        max_segmentations=max_segmentations,
     )
 
 
@@ -109,6 +117,13 @@ def convert_mb_folder(
         False,
         help="Should the mesh be generated using PyMeshLab? WARNING: This is highly experimental.",
     ),
+    max_segmentations: Optional[int] = Option(  # noqa: B008
+        None,
+        help=(
+            "Maximum number of connected segmentations to convert to meshes. "
+            "Leave empty or set to None to export all available segmentations."
+        ),
+    ),
 ):
     """Convert a folder of membrane segmentations to meshes.
 
@@ -131,4 +146,5 @@ def convert_mb_folder(
         input_pixel_size=input_pixel_size,
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
+        max_segmentations=max_segmentations,
     )
diff --git a/src/membrain_pick/cli/mesh_conversion_cli.py b/src/membrain_pick/cli/mesh_conversion_cli.py
index 51ca068..5e2b226 100644
--- a/src/membrain_pick/cli/mesh_conversion_cli.py
+++ b/src/membrain_pick/cli/mesh_conversion_cli.py
@@ -3,7 +3,7 @@
 import typer
 from click import Context
 from typer.core import TyperGroup
-from typing import List
+from typing import List, Optional
 from typer import Option
 
 
@@ -82,6 +82,13 @@ def convert_single_file(
         False,
         help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.",
     ),
+    max_segmentations: Optional[int] = Option(  # noqa: B008
+        None,
+        help=(
+            "Maximum number of connected segmentations to convert to meshes. "
+            "Leave empty or set to None to export all available segmentations."
+        ),
+    ),
 ):
     """Convert a single membrane segmentation to a mesh.
 
@@ -103,6 +110,7 @@ def convert_single_file(
         barycentric_area=barycentric_area,
         input_pixel_size=input_pixel_size,
         imod_meshing=imod_meshing,
+        max_segmentations=max_segmentations,
     )
 
 
@@ -139,6 +147,13 @@ def convert_mb_folder(
         False,
         help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.",
     ),
+    max_segmentations: Optional[int] = Option(  # noqa: B008
+        None,
+        help=(
+            "Maximum number of connected segmentations to convert to meshes. "
+            "Leave empty or set to None to export all available segmentations."
+        ),
+    ),
 ):
     """Convert a folder of membrane segmentations to meshes.
 
@@ -158,6 +173,7 @@ def convert_mb_folder(
         barycentric_area=barycentric_area,
         input_pixel_size=input_pixel_size,
         imod_meshing=imod_meshing,
+        max_segmentations=max_segmentations,
     )
 
 
diff --git a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py
index 311dae0..2eb2eae 100644
--- a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py
+++ b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py
@@ -1,4 +1,6 @@
 import os
+from typing import Optional
+
 import numpy as np
 
 from membrain_seg.segmentation.dataloading.data_utils import load_tomogram
@@ -20,6 +22,7 @@ def meshes_for_folder_structure(
     min_connected_size=1e4,
     imod_meshing=False,
     pymeshlab_meshing=False,
+    max_segmentations: Optional[int] = None,
 ):
     """
     This assumes the following folder structure:
@@ -110,6 +113,7 @@ def meshes_for_folder_structure(
             min_connected_size=min_connected_size,
             imod_meshing=imod_meshing,
             pymeshlab_meshing=pymeshlab_meshing,
+            max_segmentations=max_segmentations,
         )
 
 
@@ -130,6 +134,7 @@ def mesh_for_tomo_mb_folder(
     min_connected_size=1e4,
     imod_meshing=False,
     pymeshlab_meshing=False,
+    max_segmentations: Optional[int] = None,
 ):
     """
     This function assumes the following folder structure:
@@ -188,6 +193,7 @@ def mesh_for_tomo_mb_folder(
             min_connected_size=min_connected_size,
             imod_meshing=imod_meshing,
             pymeshlab_meshing=pymeshlab_meshing,
+            max_segmentations=max_segmentations,
         )
 
 
@@ -208,6 +214,7 @@ def mesh_for_single_mb_file(
     min_connected_size=1e4,
     imod_meshing=False,
     pymeshlab_meshing=False,
+    max_segmentations: Optional[int] = None,
 ):
     """ """
     os.makedirs(out_folder, exist_ok=True)
@@ -224,6 +231,8 @@ def mesh_for_single_mb_file(
     if tomo_token is None:
         tomo_token = "Tomo"
 
+    # Allow exporting multiple connected components when requested.
+    multiple_components_requested = max_segmentations is None or max_segmentations > 1
     convert_to_mesh(
         mb_file,
         tomo_file,
@@ -237,8 +246,10 @@ def mesh_for_single_mb_file(
         barycentric_area=barycentric_area,
         input_pixel_size=input_pixel_size,
         crop_box_flag=crop_box_flag,
-        only_largest_component=only_largest_component,
+        only_largest_component=
+        only_largest_component and not multiple_components_requested,
         min_connected_size=min_connected_size,
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
+        max_segmentations=max_segmentations,
     )
diff --git a/src/membrain_pick/mesh_projections/mesh_projection.py b/src/membrain_pick/mesh_projections/mesh_projection.py
index 4dc0268..9c7d1be 100644
--- a/src/membrain_pick/mesh_projections/mesh_projection.py
+++ b/src/membrain_pick/mesh_projections/mesh_projection.py
@@ -1,4 +1,6 @@
 import os
+from typing import Optional
+
 import numpy as np
 from scipy.ndimage import zoom
 
@@ -172,6 +174,7 @@ def convert_to_mesh(
     min_connected_size: float = 1e4,
     imod_meshing: bool = False,
     pymeshlab_meshing: bool = False,
+    max_segmentations: Optional[int] = None,
 ) -> None:
     """
     Converts segmentation data into a mesh format and stores it.
@@ -208,6 +211,9 @@ def convert_to_mesh(
         Flag to process only the largest connected component.
     min_connected_size : float, optional
         Minimum size for connected components to be processed.
+    max_segmentations : int, optional
+        Maximum number of connected components to export. ``None`` exports all
+        available components.
 
     Returns
     -------
@@ -220,10 +226,22 @@ def convert_to_mesh(
         mb_file, only_largest_component, tomo_file, tomo, rescale_seg
     )
 
+    component_sizes = np.bincount(seg.reshape(-1))
+    component_labels = [
+        label
+        for label in range(1, len(component_sizes))
+        if component_sizes[label] >= min_connected_size
+    ]
+    component_labels.sort(key=lambda label: component_sizes[label], reverse=True)
+
+    if only_largest_component and component_labels:
+        component_labels = component_labels[:1]
+    if max_segmentations is not None:
+        component_labels = component_labels[:max(0, max_segmentations)]
+
     sub_seg_count = 0
-    for k in range(1, seg.max() + 1):
-        if np.sum(seg == k) < min_connected_size:
-            continue
+    normal_values = None
+    for k in component_labels:
         cur_mb_key, sub_seg_count = get_cur_mb_key(
             mb_key, only_largest_component, k, sub_seg_count, seg
         )

From 35994aaa9dc8e68182d908d782946433e3d19d3d Mon Sep 17 00:00:00 2001
From: Juha Huiskonen <juha.huiskonen@gmail.com>
Date: Sun, 12 Oct 2025 20:02:59 +0300
Subject: [PATCH 2/3] Add merged output option for multi-mesh exports

---
 src/membrain_pick/cli/convert_meshes_cli.py   |  16 +++
 src/membrain_pick/cli/mesh_conversion_cli.py  |  34 ++++-
 src/membrain_pick/cli/surforama_cli.py        |  17 ++-
 .../clustering/mean_shift_inference.py        |  47 +++++--
 src/membrain_pick/dataloading/data_utils.py   |  99 ++++++++++++--
 .../dataloading/diffusionnet_dataset.py       | 126 +++++++++---------
 .../mesh_projections/mesh_class.py            |  33 +++--
 .../mesh_conversion_wrappers.py               |   8 +-
 .../mesh_projections/mesh_projection.py       |  48 ++++++-
 9 files changed, 318 insertions(+), 110 deletions(-)

diff --git a/src/membrain_pick/cli/convert_meshes_cli.py b/src/membrain_pick/cli/convert_meshes_cli.py
index b6c8c4c..99cf847 100644
--- a/src/membrain_pick/cli/convert_meshes_cli.py
+++ b/src/membrain_pick/cli/convert_meshes_cli.py
@@ -52,6 +52,13 @@ def convert_single_file(
             "Leave empty or set to None to export all available segmentations."
         ),
     ),
+    merge_outputs: bool = Option(  # noqa: B008
+        False,
+        help=(
+            "Store all exported connected components in a single set of output "
+            "files (one H5/OBJ pair)."
+        ),
+    ),
 ):
     """Convert a single membrane segmentation to a mesh.
 
@@ -77,6 +84,7 @@ def convert_single_file(
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
         max_segmentations=max_segmentations,
+        merge_outputs=merge_outputs,
     )
 
 
@@ -124,6 +132,13 @@ def convert_mb_folder(
             "Leave empty or set to None to export all available segmentations."
         ),
     ),
+    merge_outputs: bool = Option(  # noqa: B008
+        False,
+        help=(
+            "Store all exported connected components in a single set of output "
+            "files (one H5/OBJ pair) per segmentation."
+        ),
+    ),
 ):
     """Convert a folder of membrane segmentations to meshes.
 
@@ -147,4 +162,5 @@ def convert_mb_folder(
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
         max_segmentations=max_segmentations,
+        merge_outputs=merge_outputs,
     )
diff --git a/src/membrain_pick/cli/mesh_conversion_cli.py b/src/membrain_pick/cli/mesh_conversion_cli.py
index 5e2b226..7deb408 100644
--- a/src/membrain_pick/cli/mesh_conversion_cli.py
+++ b/src/membrain_pick/cli/mesh_conversion_cli.py
@@ -89,6 +89,13 @@ def convert_single_file(
             "Leave empty or set to None to export all available segmentations."
         ),
     ),
+    merge_outputs: bool = Option(  # noqa: B008
+        False,
+        help=(
+            "Store all exported connected components in a single set of output "
+            "files (one H5/OBJ pair)."
+        ),
+    ),
 ):
     """Convert a single membrane segmentation to a mesh.
 
@@ -111,6 +118,7 @@ def convert_single_file(
         input_pixel_size=input_pixel_size,
         imod_meshing=imod_meshing,
         max_segmentations=max_segmentations,
+        merge_outputs=merge_outputs,
     )
 
 
@@ -154,6 +162,13 @@ def convert_mb_folder(
             "Leave empty or set to None to export all available segmentations."
         ),
     ),
+    merge_outputs: bool = Option(  # noqa: B008
+        False,
+        help=(
+            "Store all exported connected components in a single set of output "
+            "files (one H5/OBJ pair) per segmentation."
+        ),
+    ),
 ):
     """Convert a folder of membrane segmentations to meshes.
 
@@ -174,6 +189,7 @@ def convert_mb_folder(
         input_pixel_size=input_pixel_size,
         imod_meshing=imod_meshing,
         max_segmentations=max_segmentations,
+        merge_outputs=merge_outputs,
     )
 
 
@@ -576,7 +592,11 @@ def surforama(
     from surforama.app import QtSurforama
     import numpy as np
     from matplotlib.pyplot import get_cmap
-    from membrain_pick.dataloading.data_utils import load_mesh_from_hdf5, get_csv_data
+    from membrain_pick.dataloading.data_utils import (
+        get_csv_data,
+        iter_mesh_entries,
+        load_mesh_from_hdf5,
+    )
     from membrain_seg.segmentation.dataloading.data_utils import load_tomogram
     from membrain_pick.scalar_selection import ScalarSelectionWidget
 
@@ -603,17 +623,21 @@ def surforama(
     else:
         mesh_files = [h5_path]
 
-    for h5_nr, h5_path in enumerate(mesh_files):
+    mesh_entries = []
+    for h5_path in mesh_files:
         mesh_data = load_mesh_from_hdf5(h5_path)
+        for group_name, group_data in iter_mesh_entries(mesh_data):
+            mesh_entries.append((h5_path, group_name, group_data))
 
-        if h5_nr == 0:
+    for entry_idx, (h5_path, _group_name, mesh_data) in enumerate(mesh_entries):
+        if entry_idx == 0:
             volume_layer = display_tomo(viewer, mesh_data, tomogram_path)
             pixel_size = get_pixel_size(mesh_data, None)
 
         points, faces = get_points_and_faces(mesh_data, pixel_size)
         display_scores(viewer, mesh_data, points, faces)
 
-        if h5_nr == 0:
+        if entry_idx == 0:
             surforama_widget = initialize_surforama_widget(
                 points, faces, volume_layer, viewer, normal_offset=normal_offset
             )
@@ -628,7 +652,7 @@ def surforama(
                 viewer, mesh_data, pixel_size, point_size=point_size
             )
 
-        if h5_nr == 0:
+        if entry_idx == 0:
             display_input_normal_values(viewer, mesh_data, points, faces)
     if return_viewer:
         return viewer
diff --git a/src/membrain_pick/cli/surforama_cli.py b/src/membrain_pick/cli/surforama_cli.py
index 9f9b3f0..3f57883 100644
--- a/src/membrain_pick/cli/surforama_cli.py
+++ b/src/membrain_pick/cli/surforama_cli.py
@@ -33,7 +33,10 @@ def surforama(
     """
     import os
     import napari
-    from membrain_pick.dataloading.data_utils import load_mesh_from_hdf5
+    from membrain_pick.dataloading.data_utils import (
+        iter_mesh_entries,
+        load_mesh_from_hdf5,
+    )
     from membrain_pick.napari_utils.surforama_cli_utils import (
         display_tomo,
         get_pixel_size,
@@ -57,17 +60,21 @@ def surforama(
     else:
         mesh_files = [h5_path]
 
-    for h5_nr, h5_path in enumerate(mesh_files):
+    mesh_entries = []
+    for h5_path in mesh_files:
         mesh_data = load_mesh_from_hdf5(h5_path)
+        for group_name, group_data in iter_mesh_entries(mesh_data):
+            mesh_entries.append((h5_path, group_name, group_data))
 
-        if h5_nr == 0:
+    for entry_idx, (h5_path, _group_name, mesh_data) in enumerate(mesh_entries):
+        if entry_idx == 0:
             volume_layer = display_tomo(viewer, mesh_data, tomogram_path)
             pixel_size = get_pixel_size(mesh_data, None)
 
         points, faces = get_points_and_faces(mesh_data, pixel_size)
         display_scores(viewer, mesh_data, points, faces)
 
-        if h5_nr == 0:
+        if entry_idx == 0:
             surforama_widget = initialize_surforama_widget(
                 points, faces, volume_layer, viewer, normal_offset=normal_offset
             )
@@ -82,7 +89,7 @@ def surforama(
                 viewer, mesh_data, pixel_size, point_size=point_size
             )
 
-        if h5_nr == 0:
+        if entry_idx == 0:
             display_input_normal_values(viewer, mesh_data, points, faces)
     if return_viewer:
         return viewer
diff --git a/src/membrain_pick/clustering/mean_shift_inference.py b/src/membrain_pick/clustering/mean_shift_inference.py
index 79415fe..665f871 100644
--- a/src/membrain_pick/clustering/mean_shift_inference.py
+++ b/src/membrain_pick/clustering/mean_shift_inference.py
@@ -1,5 +1,6 @@
 from membrain_pick.dataloading.data_utils import (
     get_csv_data,
+    iter_mesh_entries,
     store_array_in_star,
     load_mesh_from_hdf5,
     store_mesh_in_hdf5,
@@ -58,22 +59,40 @@ def mean_shift_for_h5(
     method: str = "membrain_pick",
     score_threshold: float = 9.0,
 ):
-    mesh_data = load_mesh_from_hdf5(h5_file)
-    verts = mesh_data["points"]
-    scores = mesh_data["scores"]
-    out_pos, _ = mean_shift_for_scores(
-        verts, scores, bandwidth, max_iter, margin, device, score_threshold, method
-    )
-    # add positions to h5 container
-    mesh_data["cluster_centers"] = out_pos
-    out_file = os.path.join(out_dir, os.path.basename(h5_file))
+    mesh_entries = list(iter_mesh_entries(load_mesh_from_hdf5(h5_file)))
     os.makedirs(out_dir, exist_ok=True)
-    store_mesh_in_hdf5(
-        out_file,
-        **mesh_data,
-    )
+    out_file = os.path.join(out_dir, os.path.basename(h5_file))
+    if os.path.exists(out_file):
+        os.remove(out_file)
+
+    for group_name, mesh_data in mesh_entries:
+        verts = mesh_data["points"]
+        scores = mesh_data["scores"]
+        out_pos, _ = mean_shift_for_scores(
+            verts, scores, bandwidth, max_iter, margin, device, score_threshold, method
+        )
+        mesh_data = dict(mesh_data)
+        mesh_data["cluster_centers"] = out_pos
+
+        store_mesh_in_hdf5(
+            out_file,
+            group_name=group_name,
+            **mesh_data,
+        )
 
-    store_clusters(h5_file, out_dir, out_pos, np.zeros((0,)), verts, mesh_data["faces"])
+        group_file = h5_file
+        if group_name is not None:
+            base, ext = os.path.splitext(h5_file)
+            group_file = f"{base}_{group_name}{ext}"
+
+        store_clusters(
+            group_file,
+            out_dir,
+            out_pos,
+            np.zeros((0,)),
+            verts,
+            mesh_data["faces"],
+        )
 
 
 def mean_shift_for_csv(
diff --git a/src/membrain_pick/dataloading/data_utils.py b/src/membrain_pick/dataloading/data_utils.py
index d6ca9c5..f9fb3d2 100644
--- a/src/membrain_pick/dataloading/data_utils.py
+++ b/src/membrain_pick/dataloading/data_utils.py
@@ -1,4 +1,6 @@
 import lxml.etree as ET
+from typing import Optional
+
 import pandas as pd
 import numpy as np
 import vtk
@@ -158,12 +160,20 @@ def store_array_in_npy(out_file, data):
     np.save(out_file, data)
 
 
-def store_mesh_in_hdf5(out_file: str, points: np.ndarray, faces: np.ndarray, **kwargs):
+def store_mesh_in_hdf5(
+    out_file: str,
+    points: np.ndarray,
+    faces: np.ndarray,
+    group_name: Optional[str] = None,
+    **kwargs,
+):
     """
     Store mesh data in an HDF5 file.
 
     The points and vertices will be stored in separate hdf5 datasets.
-    Each kwargs key will be stored as a separate dataset.
+    Each kwargs key will be stored as a separate dataset. When ``group_name`` is
+    provided, the datasets are created inside that group, allowing multiple
+    meshes to share the same container.
 
     Parameters
     ----------
@@ -173,6 +183,9 @@ def store_mesh_in_hdf5(out_file: str, points: np.ndarray, faces: np.ndarray, **k
         The points data.
     faces : np.ndarray
         The faces data.
+    group_name : str, optional
+        Name of the group in which to place the datasets. If ``None`` (the
+        default) the datasets are written directly at the root of the file.
     kwargs : dict
         Additional data to store in the HDF5 file.
 
@@ -181,20 +194,27 @@ def store_mesh_in_hdf5(out_file: str, points: np.ndarray, faces: np.ndarray, **k
     None
     """
 
-    with h5py.File(out_file, "w") as f:
-        f.create_dataset("points", data=points)
-        f.create_dataset("faces", data=faces)
+    mode = "a" if group_name is not None else "w"
+    with h5py.File(out_file, mode) as f:
+        target = f
+        if group_name is not None:
+            if group_name in f:
+                del f[group_name]
+            target = f.create_group(group_name)
+
+        target.create_dataset("points", data=points)
+        target.create_dataset("faces", data=faces)
         for key, value in kwargs.items():
             if value is not None:
                 if isinstance(value, str):
                     # Convert string to numpy array of variable-length UTF-8 strings
                     dt = h5py.string_dtype(encoding="utf-8")
-                    f.create_dataset(key, data=np.array(value, dtype=dt))
+                    target.create_dataset(key, data=np.array(value, dtype=dt))
                 else:
-                    f.create_dataset(key, data=value)
+                    target.create_dataset(key, data=value)
 
 
-def load_mesh_from_hdf5(in_file: str):
+def load_mesh_from_hdf5(in_file: str, group_name: Optional[str] = None):
     """
     Load mesh data from an HDF5 file.
 
@@ -202,19 +222,76 @@ def load_mesh_from_hdf5(in_file: str):
     ----------
     in_file : str
         The path to the input HDF5 file.
+    group_name : str, optional
+        Name of a group to load from the file. When ``None`` the function
+        returns the datasets stored at the root of the file. If the file
+        contains only groups, a dictionary mapping each group name to its
+        datasets is returned.
 
     Returns
     -------
     dict
-        A dictionary containing the mesh data.
+        A dictionary containing the mesh data. When ``group_name`` is ``None``
+        and the file stores multiple groups, a dictionary of dictionaries is
+        returned where each key corresponds to a group name.
     """
     mesh_data = {}
     with h5py.File(in_file, "r") as f:
-        for key in f.keys():
-            mesh_data[key] = f[key][()]
+        if group_name is not None:
+            if group_name not in f:
+                raise KeyError(
+                    f"Group '{group_name}' not found in '{in_file}'. Available keys: {list(f.keys())}"
+                )
+            node = f[group_name]
+            for key in node.keys():
+                mesh_data[key] = node[key][()]
+            return mesh_data
+
+        datasets = [key for key in f.keys() if isinstance(f[key], h5py.Dataset)]
+        groups = [key for key in f.keys() if isinstance(f[key], h5py.Group)]
+
+        if datasets:
+            for key in datasets:
+                mesh_data[key] = f[key][()]
+            return mesh_data
+
+        if groups:
+            grouped_mesh_data = {}
+            for group in groups:
+                grouped_mesh_data[group] = {
+                    key: f[group][key][()] for key in f[group].keys()
+                }
+            return grouped_mesh_data
+
     return mesh_data
 
 
+def iter_mesh_entries(mesh_data: dict):
+    """Yield individual mesh entries from the loaded HDF5 content.
+
+    Parameters
+    ----------
+    mesh_data : dict
+        Output of :func:`load_mesh_from_hdf5`.
+
+    Yields
+    ------
+    tuple[Optional[str], dict]
+        Pairs of ``(group_name, mesh_dict)``. ``group_name`` is ``None`` when
+        the data corresponds to a single mesh stored at the root of the file.
+    """
+
+    if not isinstance(mesh_data, dict):
+        raise TypeError("mesh_data must be a dictionary returned by load_mesh_from_hdf5")
+
+    if "points" in mesh_data and isinstance(mesh_data["points"], np.ndarray):
+        yield None, mesh_data
+        return
+
+    for group_name, group_data in mesh_data.items():
+        yield group_name, group_data
+
+
 def store_point_and_vectors_in_vtp(
     out_path: str,
     in_points: np.ndarray,
diff --git a/src/membrain_pick/dataloading/diffusionnet_dataset.py b/src/membrain_pick/dataloading/diffusionnet_dataset.py
index 75c0dac..ae95616 100644
--- a/src/membrain_pick/dataloading/diffusionnet_dataset.py
+++ b/src/membrain_pick/dataloading/diffusionnet_dataset.py
@@ -15,6 +15,7 @@
     compute_nearest_distances,
 )
 from membrain_pick.dataloading.data_utils import (
+    iter_mesh_entries,
     load_mesh_from_hdf5,
     convert_to_torch,
     read_star_file,
@@ -290,69 +291,74 @@ def load_data(self) -> None:
             hdf5_path = entry[0]
             gt_path = entry[1]
 
-            mesh_data = load_mesh_from_hdf5(hdf5_path)
-            points = mesh_data["points"]
-            faces = mesh_data["faces"]
-            vert_normals = mesh_data["normals"]
-            normal_values = mesh_data["normal_values"]
-            tomo_file = (
-                "" if not "tomo_file" in mesh_data.keys() else mesh_data["tomo_file"]
-            )
+            mesh_container = load_mesh_from_hdf5(hdf5_path)
+            for _, mesh_data in iter_mesh_entries(mesh_container):
+                points = mesh_data["points"]
+                faces = mesh_data["faces"]
+                vert_normals = mesh_data["normals"]
+                normal_values = mesh_data["normal_values"]
+                tomo_file = (
+                    "" if not "tomo_file" in mesh_data.keys() else mesh_data["tomo_file"]
+                )
 
-            points = np.concatenate([points, normal_values], axis=1)
-            if os.path.isfile(gt_path):
-                gt_data = read_star_file(gt_path)  # pandas dataframe
-                # check size of gt_data
-                if gt_data.shape[0] == 0:
+                points = np.concatenate([points, normal_values], axis=1)
+                if os.path.isfile(gt_path):
+                    gt_data = read_star_file(gt_path)  # pandas dataframe
+                    # check size of gt_data
+                    if gt_data.shape[0] == 0:
+                        gt_pos = np.zeros((1, 3))
+                        gt_classes = np.ones(1) * -1
+                    else:
+                        gt_pos = gt_data[
+                            ["rlnCoordinateX", "rlnCoordinateY", "rlnCoordinateZ"]
+                        ].values
+                        assert (
+                            "rlnCoordinateX" in gt_data.columns
+                        ), "rlnCoordinateX not in columns"
+                        if "rlnClassNumber" in gt_data.columns:
+                            gt_classes = gt_data["rlnClassNumber"].values
+                        else:
+                            gt_classes = np.ones(gt_pos.shape[0])
+                        gt_pos = project_points_to_nearest_hyperplane(
+                            gt_pos, points[:, :3]
+                        )
+                else:
                     gt_pos = np.zeros((1, 3))
                     gt_classes = np.ones(1) * -1
-                else:
-                    gt_pos = gt_data[
-                        ["rlnCoordinateX", "rlnCoordinateY", "rlnCoordinateZ"]
-                    ].values
-                    assert (
-                        "rlnCoordinateX" in gt_data.columns
-                    ), "rlnCoordinateX not in columns"
-                    if "rlnClassNumber" in gt_data.columns:
-                        gt_classes = gt_data["rlnClassNumber"].values
-                    else:
-                        gt_classes = np.ones(gt_pos.shape[0])
-                    gt_pos = project_points_to_nearest_hyperplane(gt_pos, points[:, :3])
-            else:
-                gt_pos = np.zeros((1, 3))
-                gt_classes = np.ones(1) * -1
-
-            gt_mask = self._get_GT_mask(gt_pos, gt_classes)
-            gt_pos = gt_pos[gt_mask]
-
-            points = points * self.input_pixel_size / self.process_pixel_size
-            gt_pos = gt_pos * self.input_pixel_size / self.process_pixel_size
-
-            distances, nn_idcs = compute_nearest_distances(points[:, :3], gt_pos)
-
-            # Move GT along nearest normal
-            _, nn_idcs_psii = compute_nearest_distances(gt_pos, points[:, :3])
-            psii_normals = vert_normals[nn_idcs_psii]
-
-            nearest_PSII_pos = gt_pos[nn_idcs] + psii_normals[nn_idcs] * 20
-            connection_vectors = nearest_PSII_pos - (points[:, :3])
-
-            angle_to_normal = np.einsum("ij,ij->i", connection_vectors, vert_normals)
-            mask = angle_to_normal < 0
-
-            distances[distances > 10] = 10
-            distances[mask] = 10.05
-            points[:, :3] /= self.max_tomo_shape
-            points[:, :3] *= self.process_pixel_size
-            gt_pos /= self.max_tomo_shape
-            gt_pos *= self.process_pixel_size
-
-            self.membranes.append(points)
-            self.labels.append(distances)
-            self.faces.append(faces)
-            self.vert_normals.append(vert_normals)
-            self.gt_pos.append(gt_pos)
-            self.tomo_files.append(tomo_file)
+
+                gt_mask = self._get_GT_mask(gt_pos, gt_classes)
+                gt_pos = gt_pos[gt_mask]
+
+                points = points * self.input_pixel_size / self.process_pixel_size
+                gt_pos = gt_pos * self.input_pixel_size / self.process_pixel_size
+
+                distances, nn_idcs = compute_nearest_distances(points[:, :3], gt_pos)
+
+                # Move GT along nearest normal
+                _, nn_idcs_psii = compute_nearest_distances(gt_pos, points[:, :3])
+                psii_normals = vert_normals[nn_idcs_psii]
+
+                nearest_PSII_pos = gt_pos[nn_idcs] + psii_normals[nn_idcs] * 20
+                connection_vectors = nearest_PSII_pos - (points[:, :3])
+
+                angle_to_normal = np.einsum("ij,ij->i", connection_vectors, vert_normals)
+                mask = angle_to_normal < 0
+
+                distances[distances > 10] = 10
+                distances[mask] = 10.05
+                points[:, :3] /= self.max_tomo_shape
+                points[:, :3] *= self.process_pixel_size
+                gt_pos /= self.max_tomo_shape
+                gt_pos *= self.process_pixel_size
+
+                self.membranes.append(points)
+                self.labels.append(distances)
+                self.faces.append(faces)
+                self.vert_normals.append(vert_normals)
+                self.gt_pos.append(gt_pos)
+                self.tomo_files.append(tomo_file)
+                if self.overfit:
+                    break
             if self.overfit:
                 break
 
diff --git a/src/membrain_pick/mesh_projections/mesh_class.py b/src/membrain_pick/mesh_projections/mesh_class.py
index 989061e..a229b02 100644
--- a/src/membrain_pick/mesh_projections/mesh_class.py
+++ b/src/membrain_pick/mesh_projections/mesh_class.py
@@ -1,7 +1,8 @@
-import numpy as np
-from sklearn.neighbors import NearestNeighbors
 import csv
+from typing import Optional
 
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
 
 
 class Mesh(object):
@@ -57,15 +58,27 @@ def find_closest_triangles_batch(self, positions):
         dists = self.nn_entity.kneighbors(positions)
         return dists
 
-    def store_in_file(self, out_file):
-        with open(out_file, 'w') as out_csv:
-            csv_writer = csv.writer(out_csv, delimiter=' ')
+    def store_in_file(
+        self,
+        out_file,
+        append: bool = False,
+        vertex_offset: int = 0,
+        object_name: Optional[str] = None,
+    ):
+        mode = "a" if append else "w"
+        with open(out_file, mode) as out_csv:
+            if object_name is not None:
+                out_csv.write(f"o {object_name}\n")
+
             for vert in self.vertices:
-                row = ['v', str(vert[0]), str(vert[1]), str(vert[2])]
-                csv_writer.writerow(row)
-            for combo in self.triangle_combos:
-                row = ['f', str(int(combo[0])), str(int(combo[1])), str(int(combo[2]))]
-                csv_writer.writerow(row)
+                out_csv.write(f"v {vert[0]} {vert[1]} {vert[2]}\n")
+
+            combos = np.asarray(self.triangle_combos, dtype=int)
+            if append and vertex_offset:
+                combos = combos + int(vertex_offset)
+
+            for combo in combos:
+                out_csv.write(f"f {int(combo[0])} {int(combo[1])} {int(combo[2])}\n")
             
 
 
diff --git a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py
index 2eb2eae..483a3a1 100644
--- a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py
+++ b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py
@@ -23,6 +23,7 @@ def meshes_for_folder_structure(
     imod_meshing=False,
     pymeshlab_meshing=False,
     max_segmentations: Optional[int] = None,
+    merge_outputs: bool = False,
 ):
     """
     This assumes the following folder structure:
@@ -113,7 +114,8 @@ def meshes_for_folder_structure(
             min_connected_size=min_connected_size,
             imod_meshing=imod_meshing,
             pymeshlab_meshing=pymeshlab_meshing,
-            max_segmentations=max_segmentations,
+                max_segmentations=max_segmentations,
+                merge_outputs=merge_outputs,
         )
 
 
@@ -135,6 +137,7 @@ def mesh_for_tomo_mb_folder(
     imod_meshing=False,
     pymeshlab_meshing=False,
     max_segmentations: Optional[int] = None,
+    merge_outputs: bool = False,
 ):
     """
     This function assumes the following folder structure:
@@ -194,6 +197,7 @@ def mesh_for_tomo_mb_folder(
             imod_meshing=imod_meshing,
             pymeshlab_meshing=pymeshlab_meshing,
             max_segmentations=max_segmentations,
+            merge_outputs=merge_outputs,
         )
 
 
@@ -215,6 +219,7 @@ def mesh_for_single_mb_file(
     imod_meshing=False,
     pymeshlab_meshing=False,
     max_segmentations: Optional[int] = None,
+    merge_outputs: bool = False,
 ):
     """ """
     os.makedirs(out_folder, exist_ok=True)
@@ -252,4 +257,5 @@ def mesh_for_single_mb_file(
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
         max_segmentations=max_segmentations,
+        merge_outputs=merge_outputs,
     )
diff --git a/src/membrain_pick/mesh_projections/mesh_projection.py b/src/membrain_pick/mesh_projections/mesh_projection.py
index 9c7d1be..b14e308 100644
--- a/src/membrain_pick/mesh_projections/mesh_projection.py
+++ b/src/membrain_pick/mesh_projections/mesh_projection.py
@@ -105,6 +105,15 @@ def get_cur_mb_key(mb_key, only_largest_component, k, sub_seg_count, seg):
     return cur_mb_key, sub_seg_count
 
 
+def _count_vertices_in_obj(obj_path: str) -> int:
+    vertex_count = 0
+    with open(obj_path, "r") as obj_file:
+        for line in obj_file:
+            if line.startswith("v "):
+                vertex_count += 1
+    return vertex_count
+
+
 def save_mesh_data(
     out_file_base: str,
     points: np.ndarray,
@@ -114,6 +123,8 @@ def save_mesh_data(
     only_obj: bool,
     tomo_file: str = None,
     pixel_size: float = None,
+    merge_outputs: bool = False,
+    component_name: Optional[str] = None,
 ) -> None:
     """
     Save the mesh data to files.
@@ -140,19 +151,33 @@ def save_mesh_data(
     None
     """
 
+    h5_path = out_file_base + ".h5"
+    obj_path = out_file_base + ".obj"
+
     if not only_obj:
         store_mesh_in_hdf5(
-            out_file=out_file_base + ".h5",
+            out_file=h5_path,
             points=points,
             faces=faces,
             normals=point_normals,
             normal_values=normal_values,
-            tomo_file=os.path.abspath(tomo_file),
+            tomo_file=os.path.abspath(tomo_file) if tomo_file is not None else None,
             pixel_size=pixel_size,
+            group_name=component_name if merge_outputs else None,
         )
 
     mesh = Mesh(points, faces + 1)
-    mesh.store_in_file(out_file_base + ".obj")
+    if merge_outputs:
+        append = os.path.exists(obj_path)
+        vertex_offset = _count_vertices_in_obj(obj_path) if append else 0
+        mesh.store_in_file(
+            obj_path,
+            append=append,
+            vertex_offset=vertex_offset,
+            object_name=component_name,
+        )
+    else:
+        mesh.store_in_file(obj_path)
 
     # precompute spectrals and partitioning
 
@@ -175,6 +200,7 @@ def convert_to_mesh(
     imod_meshing: bool = False,
     pymeshlab_meshing: bool = False,
     max_segmentations: Optional[int] = None,
+    merge_outputs: bool = False,
 ) -> None:
     """
     Converts segmentation data into a mesh format and stores it.
@@ -214,6 +240,11 @@ def convert_to_mesh(
     max_segmentations : int, optional
         Maximum number of connected components to export. ``None`` exports all
         available components.
+    merge_outputs : bool, optional
+        When ``True``, store all exported components in a single pair of output
+        files by grouping meshes inside the HDF5 container and appending them to
+        the OBJ file. Each component will be written under a dedicated group or
+        object name.
 
     Returns
     -------
@@ -277,8 +308,15 @@ def convert_to_mesh(
                 verts=points,
                 normals=point_normals,
             )
+        if merge_outputs:
+            out_file_base = os.path.join(out_folder, token + "_" + mb_key)
+            component_name = cur_mb_key
+        else:
+            out_file_base = os.path.join(out_folder, token + "_" + cur_mb_key)
+            component_name = None
+
         save_mesh_data(
-            out_file_base=os.path.join(out_folder, token + "_" + cur_mb_key),
+            out_file_base=out_file_base,
             points=points,
             faces=faces,
             point_normals=point_normals,
@@ -286,4 +324,6 @@ def convert_to_mesh(
             only_obj=only_obj,
             tomo_file=tomo_file,
             pixel_size=1.0,  # points dimension corresponds already to tomogram dimensions
+            merge_outputs=merge_outputs,
+            component_name=component_name,
         )

From e27535e3071bead782e40dbbabd03c2e95c5a82e Mon Sep 17 00:00:00 2001
From: Juha Huiskonen <juha.huiskonen@gmail.com>
Date: Sun, 12 Oct 2025 20:42:58 +0300
Subject: [PATCH 3/3] Add mesh export threshold option

---
 docs/Data_Preparation.md                    | 15 +++++++++++----
 src/membrain_pick/cli/convert_meshes_cli.py | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/docs/Data_Preparation.md b/docs/Data_Preparation.md
index 43f7b81..e59884e 100644
--- a/docs/Data_Preparation.md
+++ b/docs/Data_Preparation.md
@@ -58,11 +58,17 @@ This command will generate mesh representations of the membrane segmentations an
 - `--tomo-path` (TEXT, required): Path to the tomogram to be projected. [default: None] 
 - `--out-folder` (TEXT): Path to the folder where mesh projections should be stored. [default: ./mesh_data] 
 - `--step-numbers` (INTEGER): Step numbers for the normal vectors. [default: (-10, 10)] 
-- `--step-size` (FLOAT): Step size for the normal vectors. [default: 2.5] 
+- `--step-size` (FLOAT): Step size for the normal vectors. [default: 2.5]
 - `--mesh-smoothing` (INTEGER): Smoothing factor for the mesh. [default: 1000]
 - `--barycentric-area` (FLOAT): Barycentric area for the mesh. [default: 400.0]
+- `--min-segmentation-size` (INTEGER): Export connected components whose voxel count
+  is greater than or equal to this threshold. [default: 10000]
 - `--max-segmentations` (INTEGER): Maximum number of connected segmentations to convert per file. Use `None` to export all components. [default: None]
 
+When both `--min-segmentation-size` and `--max-segmentations` are set, MemBrain Pick
+exports the largest components that exceed the size threshold, capped at the
+requested maximum.
+
 Note: options may change faster than these docs. You can check the available options by running `membrain_pick convert_mb_folder`.
 
 #### 2. You have a single membrane segmentation
@@ -77,9 +83,10 @@ This command will generate a mesh representation of the membrane segmentation an
 
 #### More Options
 
-Other options mirror the `convert_mb_folder` command, including `--max-segmentations`
-to control how many connected segmentations are exported per membrane file. You can
-check the available options by running `membrain-pick convert_file`.
+Other options mirror the `convert_mb_folder` command, including `--min-segmentation-size`
+to discard small connected components and `--max-segmentations` to control how many
+meshes are exported per membrane file. You can check the available options by running
+`membrain-pick convert_file`.
 
 
 ### Outputs
diff --git a/src/membrain_pick/cli/convert_meshes_cli.py b/src/membrain_pick/cli/convert_meshes_cli.py
index 99cf847..95a5783 100644
--- a/src/membrain_pick/cli/convert_meshes_cli.py
+++ b/src/membrain_pick/cli/convert_meshes_cli.py
@@ -37,6 +37,13 @@ def convert_single_file(
     barycentric_area: float = Option(  # noqa: B008
         400.0, help="Barycentric area for the mesh. Default: 1.0"
     ),
+    min_segmentation_size: int = Option(  # noqa: B008
+        10000,
+        help=(
+            "Export all connected components whose voxel count is greater than or "
+            "equal to this threshold."
+        ),
+    ),
     imod_meshing: bool = Option(  # noqa: B008
         False,
         help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.",
@@ -81,6 +88,7 @@ def convert_single_file(
         mesh_smoothing=mesh_smoothing,
         barycentric_area=barycentric_area,
         input_pixel_size=input_pixel_size,
+        min_connected_size=min_segmentation_size,
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
         max_segmentations=max_segmentations,
@@ -117,6 +125,13 @@ def convert_mb_folder(
     barycentric_area: float = Option(  # noqa: B008
         400.0, help="Barycentric area for the mesh. Default: 1.0"
     ),
+    min_segmentation_size: int = Option(  # noqa: B008
+        10000,
+        help=(
+            "Export all connected components whose voxel count is greater than or "
+            "equal to this threshold."
+        ),
+    ),
     imod_meshing: bool = Option(  # noqa: B008
         False,
         help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.",
@@ -159,6 +174,7 @@ def convert_mb_folder(
         mesh_smoothing=mesh_smoothing,
         barycentric_area=barycentric_area,
         input_pixel_size=input_pixel_size,
+        min_connected_size=min_segmentation_size,
         imod_meshing=imod_meshing,
         pymeshlab_meshing=pymeshlab_meshing,
         max_segmentations=max_segmentations,