From 84de502955a8a0ac3ec2be4a88fe15d4a5ab8b64 Mon Sep 17 00:00:00 2001 From: Juha Huiskonen Date: Sun, 12 Oct 2025 18:28:40 +0300 Subject: [PATCH 1/3] Support exporting multiple segmentation meshes --- docs/Data_Preparation.md | 9 ++++--- src/membrain_pick/cli/convert_meshes_cli.py | 18 +++++++++++++- src/membrain_pick/cli/mesh_conversion_cli.py | 18 +++++++++++++- .../mesh_conversion_wrappers.py | 13 +++++++++- .../mesh_projections/mesh_projection.py | 24 ++++++++++++++++--- 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/docs/Data_Preparation.md b/docs/Data_Preparation.md index 07793cb..43f7b81 100644 --- a/docs/Data_Preparation.md +++ b/docs/Data_Preparation.md @@ -59,8 +59,9 @@ This command will generate mesh representations of the membrane segmentations an - `--out-folder` (TEXT): Path to the folder where mesh projections should be stored. [default: ./mesh_data] - `--step-numbers` (INTEGER): Step numbers for the normal vectors. [default: (-10, 10)] - `--step-size` (FLOAT): Step size for the normal vectors. [default: 2.5] -- `--mesh-smoothing` (INTEGER): Smoothing factor for the mesh. [default: 1000] -- `--barycentric-area` (FLOAT): Barycentric area for the mesh. [default: 400.0] +- `--mesh-smoothing` (INTEGER): Smoothing factor for the mesh. [default: 1000] +- `--barycentric-area` (FLOAT): Barycentric area for the mesh. [default: 400.0] +- `--max-segmentations` (INTEGER): Maximum number of connected segmentations to convert per file. Use `None` to export all components. [default: None] Note: options may change faster than these docs. You can check the available options by running `membrain_pick convert_mb_folder`. @@ -76,7 +77,9 @@ This command will generate a mesh representation of the membrane segmentation an #### More Options -Other options are similar to the `convert_mb_folder` command. You can check the available options by running `membrain-pick convert_file`. +Other options mirror the `convert_mb_folder` command, including `--max-segmentations` +to control how many connected segmentations are exported per membrane file. You can +check the available options by running `membrain-pick convert_file`. ### Outputs diff --git a/src/membrain_pick/cli/convert_meshes_cli.py b/src/membrain_pick/cli/convert_meshes_cli.py index 5a1c993..b6c8c4c 100644 --- a/src/membrain_pick/cli/convert_meshes_cli.py +++ b/src/membrain_pick/cli/convert_meshes_cli.py @@ -1,5 +1,5 @@ import os -from typing import List +from typing import List, Optional from typer import Option from .cli import OPTION_PROMPT_KWARGS as PKWARGS @@ -45,6 +45,13 @@ def convert_single_file( False, help="Should the mesh be generated using PyMeshLab? WARNING: This is highly experimental.", ), + max_segmentations: Optional[int] = Option( # noqa: B008 + None, + help=( + "Maximum number of connected segmentations to convert to meshes. " + "Leave empty or set to None to export all available segmentations." + ), + ), ): """Convert a single membrane segmentation to a mesh. @@ -69,6 +76,7 @@ def convert_single_file( input_pixel_size=input_pixel_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, + max_segmentations=max_segmentations, ) @@ -109,6 +117,13 @@ def convert_mb_folder( False, help="Should the mesh be generated using PyMeshLab? WARNING: This is highly experimental.", ), + max_segmentations: Optional[int] = Option( # noqa: B008 + None, + help=( + "Maximum number of connected segmentations to convert to meshes. " + "Leave empty or set to None to export all available segmentations." + ), + ), ): """Convert a folder of membrane segmentations to meshes. @@ -131,4 +146,5 @@ def convert_mb_folder( input_pixel_size=input_pixel_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, + max_segmentations=max_segmentations, ) diff --git a/src/membrain_pick/cli/mesh_conversion_cli.py b/src/membrain_pick/cli/mesh_conversion_cli.py index 51ca068..5e2b226 100644 --- a/src/membrain_pick/cli/mesh_conversion_cli.py +++ b/src/membrain_pick/cli/mesh_conversion_cli.py @@ -3,7 +3,7 @@ import typer from click import Context from typer.core import TyperGroup -from typing import List +from typing import List, Optional from typer import Option @@ -82,6 +82,13 @@ def convert_single_file( False, help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.", ), + max_segmentations: Optional[int] = Option( # noqa: B008 + None, + help=( + "Maximum number of connected segmentations to convert to meshes. " + "Leave empty or set to None to export all available segmentations." + ), + ), ): """Convert a single membrane segmentation to a mesh. @@ -103,6 +110,7 @@ def convert_single_file( barycentric_area=barycentric_area, input_pixel_size=input_pixel_size, imod_meshing=imod_meshing, + max_segmentations=max_segmentations, ) @@ -139,6 +147,13 @@ def convert_mb_folder( False, help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.", ), + max_segmentations: Optional[int] = Option( # noqa: B008 + None, + help=( + "Maximum number of connected segmentations to convert to meshes. " + "Leave empty or set to None to export all available segmentations." + ), + ), ): """Convert a folder of membrane segmentations to meshes. @@ -158,6 +173,7 @@ def convert_mb_folder( barycentric_area=barycentric_area, input_pixel_size=input_pixel_size, imod_meshing=imod_meshing, + max_segmentations=max_segmentations, ) diff --git a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py index 311dae0..2eb2eae 100644 --- a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py +++ b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py @@ -1,4 +1,6 @@ import os +from typing import Optional + import numpy as np from membrain_seg.segmentation.dataloading.data_utils import load_tomogram @@ -20,6 +22,7 @@ def meshes_for_folder_structure( min_connected_size=1e4, imod_meshing=False, pymeshlab_meshing=False, + max_segmentations: Optional[int] = None, ): """ This assumes the following folder structure: @@ -110,6 +113,7 @@ def meshes_for_folder_structure( min_connected_size=min_connected_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, + max_segmentations=max_segmentations, ) @@ -130,6 +134,7 @@ def mesh_for_tomo_mb_folder( min_connected_size=1e4, imod_meshing=False, pymeshlab_meshing=False, + max_segmentations: Optional[int] = None, ): """ This function assumes the following folder structure: @@ -188,6 +193,7 @@ def mesh_for_tomo_mb_folder( min_connected_size=min_connected_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, + max_segmentations=max_segmentations, ) @@ -208,6 +214,7 @@ def mesh_for_single_mb_file( min_connected_size=1e4, imod_meshing=False, pymeshlab_meshing=False, + max_segmentations: Optional[int] = None, ): """ """ os.makedirs(out_folder, exist_ok=True) @@ -224,6 +231,8 @@ def mesh_for_single_mb_file( if tomo_token is None: tomo_token = "Tomo" + # Allow exporting multiple connected components when requested. + multiple_components_requested = max_segmentations is None or max_segmentations > 1 convert_to_mesh( mb_file, tomo_file, @@ -237,8 +246,10 @@ def mesh_for_single_mb_file( barycentric_area=barycentric_area, input_pixel_size=input_pixel_size, crop_box_flag=crop_box_flag, - only_largest_component=only_largest_component, + only_largest_component= + only_largest_component and not multiple_components_requested, min_connected_size=min_connected_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, + max_segmentations=max_segmentations, ) diff --git a/src/membrain_pick/mesh_projections/mesh_projection.py b/src/membrain_pick/mesh_projections/mesh_projection.py index 4dc0268..9c7d1be 100644 --- a/src/membrain_pick/mesh_projections/mesh_projection.py +++ b/src/membrain_pick/mesh_projections/mesh_projection.py @@ -1,4 +1,6 @@ import os +from typing import Optional + import numpy as np from scipy.ndimage import zoom @@ -172,6 +174,7 @@ def convert_to_mesh( min_connected_size: float = 1e4, imod_meshing: bool = False, pymeshlab_meshing: bool = False, + max_segmentations: Optional[int] = None, ) -> None: """ Converts segmentation data into a mesh format and stores it. @@ -208,6 +211,9 @@ def convert_to_mesh( Flag to process only the largest connected component. min_connected_size : float, optional Minimum size for connected components to be processed. + max_segmentations : int, optional + Maximum number of connected components to export. ``None`` exports all + available components. Returns ------- @@ -220,10 +226,22 @@ def convert_to_mesh( mb_file, only_largest_component, tomo_file, tomo, rescale_seg ) + component_sizes = np.bincount(seg.reshape(-1)) + component_labels = [ + label + for label in range(1, len(component_sizes)) + if component_sizes[label] >= min_connected_size + ] + component_labels.sort(key=lambda label: component_sizes[label], reverse=True) + + if only_largest_component and component_labels: + component_labels = component_labels[:1] + if max_segmentations is not None: + component_labels = component_labels[:max(0, max_segmentations)] + sub_seg_count = 0 - for k in range(1, seg.max() + 1): - if np.sum(seg == k) < min_connected_size: - continue + normal_values = None + for k in component_labels: cur_mb_key, sub_seg_count = get_cur_mb_key( mb_key, only_largest_component, k, sub_seg_count, seg ) From 35994aaa9dc8e68182d908d782946433e3d19d3d Mon Sep 17 00:00:00 2001 From: Juha Huiskonen Date: Sun, 12 Oct 2025 20:02:59 +0300 Subject: [PATCH 2/3] Add merged output option for multi-mesh exports --- src/membrain_pick/cli/convert_meshes_cli.py | 16 +++ src/membrain_pick/cli/mesh_conversion_cli.py | 34 ++++- src/membrain_pick/cli/surforama_cli.py | 17 ++- .../clustering/mean_shift_inference.py | 47 +++++-- src/membrain_pick/dataloading/data_utils.py | 99 ++++++++++++-- .../dataloading/diffusionnet_dataset.py | 126 +++++++++--------- .../mesh_projections/mesh_class.py | 33 +++-- .../mesh_conversion_wrappers.py | 8 +- .../mesh_projections/mesh_projection.py | 48 ++++++- 9 files changed, 318 insertions(+), 110 deletions(-) diff --git a/src/membrain_pick/cli/convert_meshes_cli.py b/src/membrain_pick/cli/convert_meshes_cli.py index b6c8c4c..99cf847 100644 --- a/src/membrain_pick/cli/convert_meshes_cli.py +++ b/src/membrain_pick/cli/convert_meshes_cli.py @@ -52,6 +52,13 @@ def convert_single_file( "Leave empty or set to None to export all available segmentations." ), ), + merge_outputs: bool = Option( # noqa: B008 + False, + help=( + "Store all exported connected components in a single set of output " + "files (one H5/OBJ pair)." + ), + ), ): """Convert a single membrane segmentation to a mesh. @@ -77,6 +84,7 @@ def convert_single_file( imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) @@ -124,6 +132,13 @@ def convert_mb_folder( "Leave empty or set to None to export all available segmentations." ), ), + merge_outputs: bool = Option( # noqa: B008 + False, + help=( + "Store all exported connected components in a single set of output " + "files (one H5/OBJ pair) per segmentation." + ), + ), ): """Convert a folder of membrane segmentations to meshes. @@ -147,4 +162,5 @@ def convert_mb_folder( imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) diff --git a/src/membrain_pick/cli/mesh_conversion_cli.py b/src/membrain_pick/cli/mesh_conversion_cli.py index 5e2b226..7deb408 100644 --- a/src/membrain_pick/cli/mesh_conversion_cli.py +++ b/src/membrain_pick/cli/mesh_conversion_cli.py @@ -89,6 +89,13 @@ def convert_single_file( "Leave empty or set to None to export all available segmentations." ), ), + merge_outputs: bool = Option( # noqa: B008 + False, + help=( + "Store all exported connected components in a single set of output " + "files (one H5/OBJ pair)." + ), + ), ): """Convert a single membrane segmentation to a mesh. @@ -111,6 +118,7 @@ def convert_single_file( input_pixel_size=input_pixel_size, imod_meshing=imod_meshing, max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) @@ -154,6 +162,13 @@ def convert_mb_folder( "Leave empty or set to None to export all available segmentations." ), ), + merge_outputs: bool = Option( # noqa: B008 + False, + help=( + "Store all exported connected components in a single set of output " + "files (one H5/OBJ pair) per segmentation." + ), + ), ): """Convert a folder of membrane segmentations to meshes. @@ -174,6 +189,7 @@ def convert_mb_folder( input_pixel_size=input_pixel_size, imod_meshing=imod_meshing, max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) @@ -576,7 +592,11 @@ def surforama( from surforama.app import QtSurforama import numpy as np from matplotlib.pyplot import get_cmap - from membrain_pick.dataloading.data_utils import load_mesh_from_hdf5, get_csv_data + from membrain_pick.dataloading.data_utils import ( + get_csv_data, + iter_mesh_entries, + load_mesh_from_hdf5, + ) from membrain_seg.segmentation.dataloading.data_utils import load_tomogram from membrain_pick.scalar_selection import ScalarSelectionWidget @@ -603,17 +623,21 @@ def surforama( else: mesh_files = [h5_path] - for h5_nr, h5_path in enumerate(mesh_files): + mesh_entries = [] + for h5_path in mesh_files: mesh_data = load_mesh_from_hdf5(h5_path) + for group_name, group_data in iter_mesh_entries(mesh_data): + mesh_entries.append((h5_path, group_name, group_data)) - if h5_nr == 0: + for entry_idx, (h5_path, _group_name, mesh_data) in enumerate(mesh_entries): + if entry_idx == 0: volume_layer = display_tomo(viewer, mesh_data, tomogram_path) pixel_size = get_pixel_size(mesh_data, None) points, faces = get_points_and_faces(mesh_data, pixel_size) display_scores(viewer, mesh_data, points, faces) - if h5_nr == 0: + if entry_idx == 0: surforama_widget = initialize_surforama_widget( points, faces, volume_layer, viewer, normal_offset=normal_offset ) @@ -628,7 +652,7 @@ def surforama( viewer, mesh_data, pixel_size, point_size=point_size ) - if h5_nr == 0: + if entry_idx == 0: display_input_normal_values(viewer, mesh_data, points, faces) if return_viewer: return viewer diff --git a/src/membrain_pick/cli/surforama_cli.py b/src/membrain_pick/cli/surforama_cli.py index 9f9b3f0..3f57883 100644 --- a/src/membrain_pick/cli/surforama_cli.py +++ b/src/membrain_pick/cli/surforama_cli.py @@ -33,7 +33,10 @@ def surforama( """ import os import napari - from membrain_pick.dataloading.data_utils import load_mesh_from_hdf5 + from membrain_pick.dataloading.data_utils import ( + iter_mesh_entries, + load_mesh_from_hdf5, + ) from membrain_pick.napari_utils.surforama_cli_utils import ( display_tomo, get_pixel_size, @@ -57,17 +60,21 @@ def surforama( else: mesh_files = [h5_path] - for h5_nr, h5_path in enumerate(mesh_files): + mesh_entries = [] + for h5_path in mesh_files: mesh_data = load_mesh_from_hdf5(h5_path) + for group_name, group_data in iter_mesh_entries(mesh_data): + mesh_entries.append((h5_path, group_name, group_data)) - if h5_nr == 0: + for entry_idx, (h5_path, _group_name, mesh_data) in enumerate(mesh_entries): + if entry_idx == 0: volume_layer = display_tomo(viewer, mesh_data, tomogram_path) pixel_size = get_pixel_size(mesh_data, None) points, faces = get_points_and_faces(mesh_data, pixel_size) display_scores(viewer, mesh_data, points, faces) - if h5_nr == 0: + if entry_idx == 0: surforama_widget = initialize_surforama_widget( points, faces, volume_layer, viewer, normal_offset=normal_offset ) @@ -82,7 +89,7 @@ def surforama( viewer, mesh_data, pixel_size, point_size=point_size ) - if h5_nr == 0: + if entry_idx == 0: display_input_normal_values(viewer, mesh_data, points, faces) if return_viewer: return viewer diff --git a/src/membrain_pick/clustering/mean_shift_inference.py b/src/membrain_pick/clustering/mean_shift_inference.py index 79415fe..665f871 100644 --- a/src/membrain_pick/clustering/mean_shift_inference.py +++ b/src/membrain_pick/clustering/mean_shift_inference.py @@ -1,5 +1,6 @@ from membrain_pick.dataloading.data_utils import ( get_csv_data, + iter_mesh_entries, store_array_in_star, load_mesh_from_hdf5, store_mesh_in_hdf5, @@ -58,22 +59,40 @@ def mean_shift_for_h5( method: str = "membrain_pick", score_threshold: float = 9.0, ): - mesh_data = load_mesh_from_hdf5(h5_file) - verts = mesh_data["points"] - scores = mesh_data["scores"] - out_pos, _ = mean_shift_for_scores( - verts, scores, bandwidth, max_iter, margin, device, score_threshold, method - ) - # add positions to h5 container - mesh_data["cluster_centers"] = out_pos - out_file = os.path.join(out_dir, os.path.basename(h5_file)) + mesh_entries = list(iter_mesh_entries(load_mesh_from_hdf5(h5_file))) os.makedirs(out_dir, exist_ok=True) - store_mesh_in_hdf5( - out_file, - **mesh_data, - ) + out_file = os.path.join(out_dir, os.path.basename(h5_file)) + if os.path.exists(out_file): + os.remove(out_file) + + for group_name, mesh_data in mesh_entries: + verts = mesh_data["points"] + scores = mesh_data["scores"] + out_pos, _ = mean_shift_for_scores( + verts, scores, bandwidth, max_iter, margin, device, score_threshold, method + ) + mesh_data = dict(mesh_data) + mesh_data["cluster_centers"] = out_pos + + store_mesh_in_hdf5( + out_file, + group_name=group_name, + **mesh_data, + ) - store_clusters(h5_file, out_dir, out_pos, np.zeros((0,)), verts, mesh_data["faces"]) + group_file = h5_file + if group_name is not None: + base, ext = os.path.splitext(h5_file) + group_file = f"{base}_{group_name}{ext}" + + store_clusters( + group_file, + out_dir, + out_pos, + np.zeros((0,)), + verts, + mesh_data["faces"], + ) def mean_shift_for_csv( diff --git a/src/membrain_pick/dataloading/data_utils.py b/src/membrain_pick/dataloading/data_utils.py index d6ca9c5..f9fb3d2 100644 --- a/src/membrain_pick/dataloading/data_utils.py +++ b/src/membrain_pick/dataloading/data_utils.py @@ -1,4 +1,6 @@ import lxml.etree as ET +from typing import Optional + import pandas as pd import numpy as np import vtk @@ -158,12 +160,20 @@ def store_array_in_npy(out_file, data): np.save(out_file, data) -def store_mesh_in_hdf5(out_file: str, points: np.ndarray, faces: np.ndarray, **kwargs): +def store_mesh_in_hdf5( + out_file: str, + points: np.ndarray, + faces: np.ndarray, + group_name: Optional[str] = None, + **kwargs, +): """ Store mesh data in an HDF5 file. The points and vertices will be stored in separate hdf5 datasets. - Each kwargs key will be stored as a separate dataset. + Each kwargs key will be stored as a separate dataset. When ``group_name`` is + provided, the datasets are created inside that group, allowing multiple + meshes to share the same container. Parameters ---------- @@ -173,6 +183,9 @@ def store_mesh_in_hdf5(out_file: str, points: np.ndarray, faces: np.ndarray, **k The points data. faces : np.ndarray The faces data. + group_name : str, optional + Name of the group in which to place the datasets. If ``None`` (the + default) the datasets are written directly at the root of the file. kwargs : dict Additional data to store in the HDF5 file. @@ -181,20 +194,27 @@ def store_mesh_in_hdf5(out_file: str, points: np.ndarray, faces: np.ndarray, **k None """ - with h5py.File(out_file, "w") as f: - f.create_dataset("points", data=points) - f.create_dataset("faces", data=faces) + mode = "a" if group_name is not None else "w" + with h5py.File(out_file, mode) as f: + target = f + if group_name is not None: + if group_name in f: + del f[group_name] + target = f.create_group(group_name) + + target.create_dataset("points", data=points) + target.create_dataset("faces", data=faces) for key, value in kwargs.items(): if value is not None: if isinstance(value, str): # Convert string to numpy array of variable-length UTF-8 strings dt = h5py.string_dtype(encoding="utf-8") - f.create_dataset(key, data=np.array(value, dtype=dt)) + target.create_dataset(key, data=np.array(value, dtype=dt)) else: - f.create_dataset(key, data=value) + target.create_dataset(key, data=value) -def load_mesh_from_hdf5(in_file: str): +def load_mesh_from_hdf5(in_file: str, group_name: Optional[str] = None): """ Load mesh data from an HDF5 file. @@ -202,19 +222,76 @@ def load_mesh_from_hdf5(in_file: str): ---------- in_file : str The path to the input HDF5 file. + group_name : str, optional + Name of a group to load from the file. When ``None`` the function + returns the datasets stored at the root of the file. If the file + contains only groups, a dictionary mapping each group name to its + datasets is returned. Returns ------- dict - A dictionary containing the mesh data. + A dictionary containing the mesh data. When ``group_name`` is ``None`` + and the file stores multiple groups, a dictionary of dictionaries is + returned where each key corresponds to a group name. """ mesh_data = {} with h5py.File(in_file, "r") as f: - for key in f.keys(): - mesh_data[key] = f[key][()] + if group_name is not None: + if group_name not in f: + raise KeyError( + f"Group '{group_name}' not found in '{in_file}'. Available keys: {list(f.keys())}" + ) + node = f[group_name] + for key in node.keys(): + mesh_data[key] = node[key][()] + return mesh_data + + datasets = [key for key in f.keys() if isinstance(f[key], h5py.Dataset)] + groups = [key for key in f.keys() if isinstance(f[key], h5py.Group)] + + if datasets: + for key in datasets: + mesh_data[key] = f[key][()] + return mesh_data + + if groups: + grouped_mesh_data = {} + for group in groups: + grouped_mesh_data[group] = { + key: f[group][key][()] for key in f[group].keys() + } + return grouped_mesh_data + return mesh_data +def iter_mesh_entries(mesh_data: dict): + """Yield individual mesh entries from the loaded HDF5 content. + + Parameters + ---------- + mesh_data : dict + Output of :func:`load_mesh_from_hdf5`. + + Yields + ------ + tuple[Optional[str], dict] + Pairs of ``(group_name, mesh_dict)``. ``group_name`` is ``None`` when + the data corresponds to a single mesh stored at the root of the file. + """ + + if not isinstance(mesh_data, dict): + raise TypeError("mesh_data must be a dictionary returned by load_mesh_from_hdf5") + + if "points" in mesh_data and isinstance(mesh_data["points"], np.ndarray): + yield None, mesh_data + return + + for group_name, group_data in mesh_data.items(): + yield group_name, group_data + + def store_point_and_vectors_in_vtp( out_path: str, in_points: np.ndarray, diff --git a/src/membrain_pick/dataloading/diffusionnet_dataset.py b/src/membrain_pick/dataloading/diffusionnet_dataset.py index 75c0dac..ae95616 100644 --- a/src/membrain_pick/dataloading/diffusionnet_dataset.py +++ b/src/membrain_pick/dataloading/diffusionnet_dataset.py @@ -15,6 +15,7 @@ compute_nearest_distances, ) from membrain_pick.dataloading.data_utils import ( + iter_mesh_entries, load_mesh_from_hdf5, convert_to_torch, read_star_file, @@ -290,69 +291,74 @@ def load_data(self) -> None: hdf5_path = entry[0] gt_path = entry[1] - mesh_data = load_mesh_from_hdf5(hdf5_path) - points = mesh_data["points"] - faces = mesh_data["faces"] - vert_normals = mesh_data["normals"] - normal_values = mesh_data["normal_values"] - tomo_file = ( - "" if not "tomo_file" in mesh_data.keys() else mesh_data["tomo_file"] - ) + mesh_container = load_mesh_from_hdf5(hdf5_path) + for _, mesh_data in iter_mesh_entries(mesh_container): + points = mesh_data["points"] + faces = mesh_data["faces"] + vert_normals = mesh_data["normals"] + normal_values = mesh_data["normal_values"] + tomo_file = ( + "" if not "tomo_file" in mesh_data.keys() else mesh_data["tomo_file"] + ) - points = np.concatenate([points, normal_values], axis=1) - if os.path.isfile(gt_path): - gt_data = read_star_file(gt_path) # pandas dataframe - # check size of gt_data - if gt_data.shape[0] == 0: + points = np.concatenate([points, normal_values], axis=1) + if os.path.isfile(gt_path): + gt_data = read_star_file(gt_path) # pandas dataframe + # check size of gt_data + if gt_data.shape[0] == 0: + gt_pos = np.zeros((1, 3)) + gt_classes = np.ones(1) * -1 + else: + gt_pos = gt_data[ + ["rlnCoordinateX", "rlnCoordinateY", "rlnCoordinateZ"] + ].values + assert ( + "rlnCoordinateX" in gt_data.columns + ), "rlnCoordinateX not in columns" + if "rlnClassNumber" in gt_data.columns: + gt_classes = gt_data["rlnClassNumber"].values + else: + gt_classes = np.ones(gt_pos.shape[0]) + gt_pos = project_points_to_nearest_hyperplane( + gt_pos, points[:, :3] + ) + else: gt_pos = np.zeros((1, 3)) gt_classes = np.ones(1) * -1 - else: - gt_pos = gt_data[ - ["rlnCoordinateX", "rlnCoordinateY", "rlnCoordinateZ"] - ].values - assert ( - "rlnCoordinateX" in gt_data.columns - ), "rlnCoordinateX not in columns" - if "rlnClassNumber" in gt_data.columns: - gt_classes = gt_data["rlnClassNumber"].values - else: - gt_classes = np.ones(gt_pos.shape[0]) - gt_pos = project_points_to_nearest_hyperplane(gt_pos, points[:, :3]) - else: - gt_pos = np.zeros((1, 3)) - gt_classes = np.ones(1) * -1 - - gt_mask = self._get_GT_mask(gt_pos, gt_classes) - gt_pos = gt_pos[gt_mask] - - points = points * self.input_pixel_size / self.process_pixel_size - gt_pos = gt_pos * self.input_pixel_size / self.process_pixel_size - - distances, nn_idcs = compute_nearest_distances(points[:, :3], gt_pos) - - # Move GT along nearest normal - _, nn_idcs_psii = compute_nearest_distances(gt_pos, points[:, :3]) - psii_normals = vert_normals[nn_idcs_psii] - - nearest_PSII_pos = gt_pos[nn_idcs] + psii_normals[nn_idcs] * 20 - connection_vectors = nearest_PSII_pos - (points[:, :3]) - - angle_to_normal = np.einsum("ij,ij->i", connection_vectors, vert_normals) - mask = angle_to_normal < 0 - - distances[distances > 10] = 10 - distances[mask] = 10.05 - points[:, :3] /= self.max_tomo_shape - points[:, :3] *= self.process_pixel_size - gt_pos /= self.max_tomo_shape - gt_pos *= self.process_pixel_size - - self.membranes.append(points) - self.labels.append(distances) - self.faces.append(faces) - self.vert_normals.append(vert_normals) - self.gt_pos.append(gt_pos) - self.tomo_files.append(tomo_file) + + gt_mask = self._get_GT_mask(gt_pos, gt_classes) + gt_pos = gt_pos[gt_mask] + + points = points * self.input_pixel_size / self.process_pixel_size + gt_pos = gt_pos * self.input_pixel_size / self.process_pixel_size + + distances, nn_idcs = compute_nearest_distances(points[:, :3], gt_pos) + + # Move GT along nearest normal + _, nn_idcs_psii = compute_nearest_distances(gt_pos, points[:, :3]) + psii_normals = vert_normals[nn_idcs_psii] + + nearest_PSII_pos = gt_pos[nn_idcs] + psii_normals[nn_idcs] * 20 + connection_vectors = nearest_PSII_pos - (points[:, :3]) + + angle_to_normal = np.einsum("ij,ij->i", connection_vectors, vert_normals) + mask = angle_to_normal < 0 + + distances[distances > 10] = 10 + distances[mask] = 10.05 + points[:, :3] /= self.max_tomo_shape + points[:, :3] *= self.process_pixel_size + gt_pos /= self.max_tomo_shape + gt_pos *= self.process_pixel_size + + self.membranes.append(points) + self.labels.append(distances) + self.faces.append(faces) + self.vert_normals.append(vert_normals) + self.gt_pos.append(gt_pos) + self.tomo_files.append(tomo_file) + if self.overfit: + break if self.overfit: break diff --git a/src/membrain_pick/mesh_projections/mesh_class.py b/src/membrain_pick/mesh_projections/mesh_class.py index 989061e..a229b02 100644 --- a/src/membrain_pick/mesh_projections/mesh_class.py +++ b/src/membrain_pick/mesh_projections/mesh_class.py @@ -1,7 +1,8 @@ -import numpy as np -from sklearn.neighbors import NearestNeighbors import csv +from typing import Optional +import numpy as np +from sklearn.neighbors import NearestNeighbors class Mesh(object): @@ -57,15 +58,27 @@ def find_closest_triangles_batch(self, positions): dists = self.nn_entity.kneighbors(positions) return dists - def store_in_file(self, out_file): - with open(out_file, 'w') as out_csv: - csv_writer = csv.writer(out_csv, delimiter=' ') + def store_in_file( + self, + out_file, + append: bool = False, + vertex_offset: int = 0, + object_name: Optional[str] = None, + ): + mode = "a" if append else "w" + with open(out_file, mode) as out_csv: + if object_name is not None: + out_csv.write(f"o {object_name}\n") + for vert in self.vertices: - row = ['v', str(vert[0]), str(vert[1]), str(vert[2])] - csv_writer.writerow(row) - for combo in self.triangle_combos: - row = ['f', str(int(combo[0])), str(int(combo[1])), str(int(combo[2]))] - csv_writer.writerow(row) + out_csv.write(f"v {vert[0]} {vert[1]} {vert[2]}\n") + + combos = np.asarray(self.triangle_combos, dtype=int) + if append and vertex_offset: + combos = combos + int(vertex_offset) + + for combo in combos: + out_csv.write(f"f {int(combo[0])} {int(combo[1])} {int(combo[2])}\n") diff --git a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py index 2eb2eae..483a3a1 100644 --- a/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py +++ b/src/membrain_pick/mesh_projections/mesh_conversion_wrappers.py @@ -23,6 +23,7 @@ def meshes_for_folder_structure( imod_meshing=False, pymeshlab_meshing=False, max_segmentations: Optional[int] = None, + merge_outputs: bool = False, ): """ This assumes the following folder structure: @@ -113,7 +114,8 @@ def meshes_for_folder_structure( min_connected_size=min_connected_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, - max_segmentations=max_segmentations, + max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) @@ -135,6 +137,7 @@ def mesh_for_tomo_mb_folder( imod_meshing=False, pymeshlab_meshing=False, max_segmentations: Optional[int] = None, + merge_outputs: bool = False, ): """ This function assumes the following folder structure: @@ -194,6 +197,7 @@ def mesh_for_tomo_mb_folder( imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) @@ -215,6 +219,7 @@ def mesh_for_single_mb_file( imod_meshing=False, pymeshlab_meshing=False, max_segmentations: Optional[int] = None, + merge_outputs: bool = False, ): """ """ os.makedirs(out_folder, exist_ok=True) @@ -252,4 +257,5 @@ def mesh_for_single_mb_file( imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, max_segmentations=max_segmentations, + merge_outputs=merge_outputs, ) diff --git a/src/membrain_pick/mesh_projections/mesh_projection.py b/src/membrain_pick/mesh_projections/mesh_projection.py index 9c7d1be..b14e308 100644 --- a/src/membrain_pick/mesh_projections/mesh_projection.py +++ b/src/membrain_pick/mesh_projections/mesh_projection.py @@ -105,6 +105,15 @@ def get_cur_mb_key(mb_key, only_largest_component, k, sub_seg_count, seg): return cur_mb_key, sub_seg_count +def _count_vertices_in_obj(obj_path: str) -> int: + vertex_count = 0 + with open(obj_path, "r") as obj_file: + for line in obj_file: + if line.startswith("v "): + vertex_count += 1 + return vertex_count + + def save_mesh_data( out_file_base: str, points: np.ndarray, @@ -114,6 +123,8 @@ def save_mesh_data( only_obj: bool, tomo_file: str = None, pixel_size: float = None, + merge_outputs: bool = False, + component_name: Optional[str] = None, ) -> None: """ Save the mesh data to files. @@ -140,19 +151,33 @@ def save_mesh_data( None """ + h5_path = out_file_base + ".h5" + obj_path = out_file_base + ".obj" + if not only_obj: store_mesh_in_hdf5( - out_file=out_file_base + ".h5", + out_file=h5_path, points=points, faces=faces, normals=point_normals, normal_values=normal_values, - tomo_file=os.path.abspath(tomo_file), + tomo_file=os.path.abspath(tomo_file) if tomo_file is not None else None, pixel_size=pixel_size, + group_name=component_name if merge_outputs else None, ) mesh = Mesh(points, faces + 1) - mesh.store_in_file(out_file_base + ".obj") + if merge_outputs: + append = os.path.exists(obj_path) + vertex_offset = _count_vertices_in_obj(obj_path) if append else 0 + mesh.store_in_file( + obj_path, + append=append, + vertex_offset=vertex_offset, + object_name=component_name, + ) + else: + mesh.store_in_file(obj_path) # precompute spectrals and partitioning @@ -175,6 +200,7 @@ def convert_to_mesh( imod_meshing: bool = False, pymeshlab_meshing: bool = False, max_segmentations: Optional[int] = None, + merge_outputs: bool = False, ) -> None: """ Converts segmentation data into a mesh format and stores it. @@ -214,6 +240,11 @@ def convert_to_mesh( max_segmentations : int, optional Maximum number of connected components to export. ``None`` exports all available components. + merge_outputs : bool, optional + When ``True``, store all exported components in a single pair of output + files by grouping meshes inside the HDF5 container and appending them to + the OBJ file. Each component will be written under a dedicated group or + object name. Returns ------- @@ -277,8 +308,15 @@ def convert_to_mesh( verts=points, normals=point_normals, ) + if merge_outputs: + out_file_base = os.path.join(out_folder, token + "_" + mb_key) + component_name = cur_mb_key + else: + out_file_base = os.path.join(out_folder, token + "_" + cur_mb_key) + component_name = None + save_mesh_data( - out_file_base=os.path.join(out_folder, token + "_" + cur_mb_key), + out_file_base=out_file_base, points=points, faces=faces, point_normals=point_normals, @@ -286,4 +324,6 @@ def convert_to_mesh( only_obj=only_obj, tomo_file=tomo_file, pixel_size=1.0, # points dimension corresponds already to tomogram dimensions + merge_outputs=merge_outputs, + component_name=component_name, ) From e27535e3071bead782e40dbbabd03c2e95c5a82e Mon Sep 17 00:00:00 2001 From: Juha Huiskonen Date: Sun, 12 Oct 2025 20:42:58 +0300 Subject: [PATCH 3/3] Add mesh export threshold option --- docs/Data_Preparation.md | 15 +++++++++++---- src/membrain_pick/cli/convert_meshes_cli.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/docs/Data_Preparation.md b/docs/Data_Preparation.md index 43f7b81..e59884e 100644 --- a/docs/Data_Preparation.md +++ b/docs/Data_Preparation.md @@ -58,11 +58,17 @@ This command will generate mesh representations of the membrane segmentations an - `--tomo-path` (TEXT, required): Path to the tomogram to be projected. [default: None] - `--out-folder` (TEXT): Path to the folder where mesh projections should be stored. [default: ./mesh_data] - `--step-numbers` (INTEGER): Step numbers for the normal vectors. [default: (-10, 10)] -- `--step-size` (FLOAT): Step size for the normal vectors. [default: 2.5] +- `--step-size` (FLOAT): Step size for the normal vectors. [default: 2.5] - `--mesh-smoothing` (INTEGER): Smoothing factor for the mesh. [default: 1000] - `--barycentric-area` (FLOAT): Barycentric area for the mesh. [default: 400.0] +- `--min-segmentation-size` (INTEGER): Export connected components whose voxel count + is greater than or equal to this threshold. [default: 10000] - `--max-segmentations` (INTEGER): Maximum number of connected segmentations to convert per file. Use `None` to export all components. [default: None] +When both `--min-segmentation-size` and `--max-segmentations` are set, MemBrain Pick +exports the largest components that exceed the size threshold, capped at the +requested maximum. + Note: options may change faster than these docs. You can check the available options by running `membrain_pick convert_mb_folder`. #### 2. You have a single membrane segmentation @@ -77,9 +83,10 @@ This command will generate a mesh representation of the membrane segmentation an #### More Options -Other options mirror the `convert_mb_folder` command, including `--max-segmentations` -to control how many connected segmentations are exported per membrane file. You can -check the available options by running `membrain-pick convert_file`. +Other options mirror the `convert_mb_folder` command, including `--min-segmentation-size` +to discard small connected components and `--max-segmentations` to control how many +meshes are exported per membrane file. You can check the available options by running +`membrain-pick convert_file`. ### Outputs diff --git a/src/membrain_pick/cli/convert_meshes_cli.py b/src/membrain_pick/cli/convert_meshes_cli.py index 99cf847..95a5783 100644 --- a/src/membrain_pick/cli/convert_meshes_cli.py +++ b/src/membrain_pick/cli/convert_meshes_cli.py @@ -37,6 +37,13 @@ def convert_single_file( barycentric_area: float = Option( # noqa: B008 400.0, help="Barycentric area for the mesh. Default: 1.0" ), + min_segmentation_size: int = Option( # noqa: B008 + 10000, + help=( + "Export all connected components whose voxel count is greater than or " + "equal to this threshold." + ), + ), imod_meshing: bool = Option( # noqa: B008 False, help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.", @@ -81,6 +88,7 @@ def convert_single_file( mesh_smoothing=mesh_smoothing, barycentric_area=barycentric_area, input_pixel_size=input_pixel_size, + min_connected_size=min_segmentation_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, max_segmentations=max_segmentations, @@ -117,6 +125,13 @@ def convert_mb_folder( barycentric_area: float = Option( # noqa: B008 400.0, help="Barycentric area for the mesh. Default: 1.0" ), + min_segmentation_size: int = Option( # noqa: B008 + 10000, + help=( + "Export all connected components whose voxel count is greater than or " + "equal to this threshold." + ), + ), imod_meshing: bool = Option( # noqa: B008 False, help="Should the mesh be generated using IMOD? WARNING: This is highly experimental.", @@ -159,6 +174,7 @@ def convert_mb_folder( mesh_smoothing=mesh_smoothing, barycentric_area=barycentric_area, input_pixel_size=input_pixel_size, + min_connected_size=min_segmentation_size, imod_meshing=imod_meshing, pymeshlab_meshing=pymeshlab_meshing, max_segmentations=max_segmentations,