diff --git a/eitprocessing/roi_selection/gridselection.py b/eitprocessing/roi_selection/gridselection.py index f01e2a12b..63f966c0b 100644 --- a/eitprocessing/roi_selection/gridselection.py +++ b/eitprocessing/roi_selection/gridselection.py @@ -3,17 +3,27 @@ import warnings from dataclasses import dataclass from dataclasses import field +from enum import auto from typing import Literal import numpy as np from numpy.typing import NDArray +from strenum import LowercaseStrEnum from . import ROISelection +class DivisionMethod(LowercaseStrEnum): + geometrical = auto() + geometrical_split_pixels = auto() + physiological = auto() + + +# DivisionMethod.geometrical_split_pixels == "geometrical_split_pixels" + @dataclass class GridSelection(ROISelection): """Create regions of interest by division into a grid. - GridSelection allows for the creation a list of 2D arrays that can be used to divide a two- or + GridSelection allows for the creation of a list of 2D arrays that can be used to divide a two- or higher-dimensional array into several regions structured in a grid. An instance of GridSelection contains information about how to subdivide an input matrix. Calling `find_grid(data)`, where data is a 2D array, results in a list of arrays with the same @@ -26,18 +36,22 @@ class GridSelection(ROISelection): only contain NaN are split as if it is a (28, 28) array. The resulting arrays have the shape (32, 32) with the same cells as the input data containing NaN values. - If the number of rows or columns can not split evenly, a row or column can be split among two + If the number of rows or columns can not be split evenly, a row or column can be split among two regions. This behaviour is controlled by `split_rows` and `split_columns`. - If `split_rows` is `False` (default), rows will not be split between two groups. A warning will + If `split_rows` is `geometrical` (default), rows will not be split between two groups. A warning will be shown stating regions don't contain equal numbers of rows. The regions towards the top will be larger. E.g., when a (5, 2) array is split in two vertical regions, the first region will contain the first three rows, and the second region the last two rows. - If `split_rows` is `True`, e.g. a (5, 2) array that is split in two vertical regions, the first - region will contain the first two rows and half of each pixel of the third row. The second + If `split_rows` is `geometrical_split_pixels`, e.g. a (5, 2) array that is split in two vertical regions, + the first region will contain the first two rows and half of each pixel of the third row. The second region contains half of each pixel in the third row, and the last two rows. + If `split_rows` is `physiological`, an array is split so that the cumulative sum of the rows in each region + is exactly equal. For instance, if an array is split in two regions, a pixel row can contribute for 20% to + the first region and for 80% to the second region. + `split_columns` has the same effect on columns as `split_rows` has on rows. Regions are ordered according to C indexing order. The `matrix_layout()` method provides a map @@ -92,8 +106,8 @@ class GridSelection(ROISelection): v_split: int h_split: int - split_rows: bool = False - split_columns: bool = False + split_rows: DivisionMethod = DivisionMethod.geometrical + split_columns: DivisionMethod = DivisionMethod.geometrical ignore_nan_rows: bool = True ignore_nan_columns: bool = True @@ -115,8 +129,8 @@ def __post_init__(self): if self.h_split < 1: raise InvalidHorizontalDivision("`h_split` can't be smaller than 1.") - self._check_attribute_type("split_columns", bool) - self._check_attribute_type("split_rows", bool) + self._check_attribute_type("split_columns", DivisionMethod) + self._check_attribute_type("split_rows", DivisionMethod) self._check_attribute_type("ignore_nan_columns", bool) self._check_attribute_type("ignore_nan_rows", bool) @@ -136,8 +150,9 @@ def find_grid(self, data: NDArray) -> list[NDArray]: h_split`. """ grouping_method = { - True: self._create_grouping_vector_split_pixels, - False: self._create_grouping_vector_no_split_pixels, + DivisionMethod.geometrical: self._create_grouping_vector_no_split_pixels, + DivisionMethod.geometrical_split_pixels: self._create_grouping_vector_split_pixels, + DivisionMethod.physiological: self._create_grouping_vector_physiological } horizontal_grouping_vectors = grouping_method[self.split_columns]( @@ -306,6 +321,105 @@ def _create_grouping_vector_split_pixels( # pylint: disable=too-many-locals return final + def _create_grouping_vector_physiological( # pylint: disable=too-many-locals + self, + matrix: NDArray, + horizontal: bool, + n_groups: int, + ) -> list[NDArray]: + + """Create a grouping vector to split vector into `n` groups allowing + split elements.""" + + axis = 0 if horizontal else 1 + + # create a vector that is nan if the entire column/row is nan, 1 otherwise + vector_is_nan = np.all(np.isnan(matrix), axis=axis) + vector = np.ones(vector_is_nan.shape) + + if (horizontal and self.ignore_nan_columns) or ( + not horizontal and self.ignore_nan_rows + ): + vector[vector_is_nan] = np.nan + + # remove non-numeric (nan) elements at vector ends + # nan elements between numeric elements are kept + numeric_element_indices = np.argwhere(~np.isnan(vector)) + first_num_element = numeric_element_indices.min() + last_num_element = numeric_element_indices.max() + else: + first_num_element = 0 + last_num_element = len(vector) - 1 + + n_elements = last_num_element - first_num_element + 1 + + group_size = n_elements / n_groups + + if group_size < 1: + if horizontal: + warnings.warn( + f"The number horizontal regions ({n_groups}) is larger than the " + f"number of available columns ({n_elements}).", + MoreHorizontalGroupsThanColumns, + ) + else: + warnings.warn( + f"The number vertical regions ({n_groups}) is larger than the " + f"number of available rows ({n_elements}).", + MoreVerticalGroupsThanRows, + ) + + sum_along_axis = np.nansum(matrix, axis=axis) + relative_sum_along_axis = sum_along_axis / np.nansum(matrix) + relative_cumsum_along_axis = np.cumsum(relative_sum_along_axis) + + lower_bounds = np.arange(n_groups) / n_groups + upper_bounds = (np.arange(n_groups) + 1) / n_groups + + # Otherwise the first row will not fall in the first region (because they are 0) + # and last rows will not fall in the last region, because they reach 1.0 + lower_bounds[0] = -np.inf + upper_bounds[-1] = np.inf + + row_in_region = [] + + for lower_bound, upper_bound in zip(lower_bounds, upper_bounds): + row_in_region.append( + np.logical_and( + relative_cumsum_along_axis > lower_bound, relative_cumsum_along_axis <= upper_bound + ) + ) + + row_in_region = np.array(row_in_region).T + final = row_in_region.astype(float) + + # find initial region for each row + initial_regions = np.apply_along_axis(np.flatnonzero, 1, row_in_region).flatten() + + # find transitions between regions + region_borders = np.flatnonzero(np.diff(initial_regions)) + + # finds overlap in transition region + for previous_region, (ventral_row, upper_bound) in enumerate( + zip(region_borders, upper_bounds) + ): + dorsal_row = ventral_row + 1 + next_region = previous_region + 1 + a, b = relative_cumsum_along_axis[ventral_row], relative_cumsum_along_axis[dorsal_row] + diff = b - a + to_a = upper_bound - a + fraction_to_a = to_a / diff + fraction_to_b = 1 - fraction_to_a + + final[dorsal_row, previous_region] = fraction_to_a + final[dorsal_row, next_region] = fraction_to_b + final = final.T + final = final * vector + # convert to list of vectors + final = [final[n, :] for n in range(final.shape[0])] + + return final + def matrix_layout(self) -> NDArray: """Returns a 2D array showing the layout of the matrices returned by `find_grid`.""" diff --git a/notebooks/physiological_roi.ipynb b/notebooks/physiological_roi.ipynb new file mode 100644 index 000000000..6eac629cb --- /dev/null +++ b/notebooks/physiological_roi.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from eitprocessing.binreader import Sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "file_path = '' #input path to test file \n", + "FRAMERATE = 50 # Standard framerate\n", + "VENDOR = 'Timpel' # Vendor of EIT device used for data acquisition " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "ignore_nan_columns = True\n", + "ignore_nan_rows = True" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/juliettefrancovich/Documents/KT/TM3/eitprocessing/eitprocessing/binreader/sequence.py:515: UserWarning: The number of frames requested (None) is largerthan the available number (180288) of frames after the first frame selected (0).\n", + "180288 frames have been loaded.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "measurement = Sequence.from_path(path=file_path, vendor=VENDOR, framerate=FRAMERATE)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "short_section = measurement[80140:81600]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/juliettefrancovich/Documents/KT/TM3/eitprocessing/eitprocessing/binreader/frameset.py:82: RuntimeWarning: All-NaN slice encountered\n", + " return np.nanmin(self.pixel_values, axis=0)\n" + ] + } + ], + "source": [ + "bc = short_section.framesets['raw'].deepcopy()\n", + "bc.name = 'baseline_corrected'\n", + "bc.description = 'raw data with a baseline correction for this section'\n", + "bc.pixel_values = bc.pixel_values - bc.pixel_baseline\n", + "bc.params.update({'pre-processing': 'pixel baseline correction'})\n", + "short_section.framesets['bc'] = bc" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "pixel_values = short_section.framesets['bc'].pixel_values" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "matrix = pixel_values[300, :, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "horizontal = True" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "n_groups = 4" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "## Test geometrical split\n", + "\n", + "axis = 0 if horizontal else 1\n", + "\n", + "# create a vector that is nan if the entire column/row is nan, 1 otherwise\n", + "vector_is_nan = np.all(np.isnan(matrix), axis=axis)\n", + "vector = np.ones(vector_is_nan.shape)\n", + "\n", + "if (horizontal and ignore_nan_columns) or (\n", + " not horizontal and ignore_nan_rows\n", + "):\n", + " vector[vector_is_nan] = np.nan\n", + "\n", + " # remove non-numeric (nan) elements at vector ends\n", + " # nan elements between numeric elements are kept\n", + " numeric_element_indices = np.argwhere(~np.isnan(vector))\n", + " first_num_element = numeric_element_indices.min()\n", + " last_num_element = numeric_element_indices.max()\n", + "else:\n", + " first_num_element = 0\n", + " last_num_element = len(vector) - 1\n", + "\n", + "n_elements = last_num_element - first_num_element + 1\n", + "\n", + "group_size = n_elements / n_groups\n", + "\n", + "if group_size < 1:\n", + " if horizontal:\n", + " print(f\"The number of horizontal regions ({n_groups}) is larger than the number of available columns ({n_elements}).\")\n", + " else:\n", + " print(f\"The number of vertical regions ({n_groups}) is larger than the number of available columns ({n_elements}).\")\n", + "\n", + "# find the right boundaries (upper values) of each group\n", + "right_boundaries = (np.arange(n_groups) + 1) * group_size\n", + "right_boundaries = right_boundaries[:, np.newaxis] # converts to row vector\n", + "\n", + "# each row in the base represents one group\n", + "base = np.tile(np.arange(n_elements), (n_groups, 1))\n", + "\n", + "# if the element number is higher than the split, it does not belong in this group\n", + "element_contribution_to_group = right_boundaries - base\n", + "element_contribution_to_group[element_contribution_to_group < 0] = 0\n", + "\n", + "# if the element to the right is a full group size, this element is ruled out\n", + "rule_out = element_contribution_to_group[:, 1:] >= group_size\n", + "element_contribution_to_group[:, :-1][rule_out] = 0\n", + "\n", + "# elements have a maximum value of 1\n", + "element_contribution_to_group = np.fmin(element_contribution_to_group, 1)\n", + "\n", + "# if this element is already represented in the previous group (row), subtract that\n", + "element_contribution_to_group[1:] -= element_contribution_to_group[:-1]\n", + "element_contribution_to_group[element_contribution_to_group < 0] = 0\n", + "\n", + "# element_contribution_to_group only represents non-nan elements\n", + "# insert into final including non-nan elements\n", + "final = np.full((n_groups, len(vector)), np.nan)\n", + "final[\n", + " :, first_num_element : last_num_element + 1\n", + "] = element_contribution_to_group\n", + "\n", + "# convert to list of vectors\n", + "final = [final[n, :] for n in range(final.shape[0])]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "## Test physiological split\n", + "\n", + "axis = 0 if horizontal else 1\n", + "\n", + "# create a vector that is nan if the entire column/row is nan, 1 otherwise\n", + "vector_is_nan = np.all(np.isnan(matrix), axis=axis)\n", + "vector = np.ones(vector_is_nan.shape)\n", + "\n", + "if (horizontal and ignore_nan_columns) or (\n", + " not horizontal and ignore_nan_rows\n", + "):\n", + " vector[vector_is_nan] = np.nan\n", + "\n", + " # remove non-numeric (nan) elements at vector ends\n", + " # nan elements between numeric elements are kept\n", + " numeric_element_indices = np.argwhere(~np.isnan(vector))\n", + " first_num_element = numeric_element_indices.min()\n", + " last_num_element = numeric_element_indices.max()\n", + "else:\n", + " first_num_element = 0\n", + " last_num_element = len(vector) - 1\n", + "\n", + "n_elements = last_num_element - first_num_element + 1\n", + "\n", + "group_size = n_elements / n_groups\n", + "\n", + "if group_size < 1:\n", + " if horizontal:\n", + " print(f\"The number of horizontal regions ({n_groups}) is larger than the number of available columns ({n_elements}).\")\n", + " else:\n", + " print(f\"The number of vertical regions ({n_groups}) is larger than the number of available columns ({n_elements}).\")\n", + "\n", + "sum_along_axis = np.nansum(matrix, axis=axis)\n", + "relative_sum_along_axis = sum_along_axis / np.nansum(matrix)\n", + "relative_cumsum_along_axis = np.cumsum(relative_sum_along_axis)\n", + "\n", + "lower_bounds = np.arange(n_groups) / n_groups\n", + "upper_bounds = (np.arange(n_groups) + 1) / n_groups\n", + "\n", + "# Otherwise the first row will not fall in the first region (because they are 0) \n", + "# and last rows will not fall in the last region, because they reach 1.0\n", + "lower_bounds[0] = -np.inf\n", + "upper_bounds[-1] = np.inf\n", + "\n", + "row_in_region = []\n", + "\n", + "for lower_bound, upper_bound in zip(lower_bounds, upper_bounds):\n", + " row_in_region.append(\n", + " np.logical_and(\n", + " relative_cumsum_along_axis > lower_bound, relative_cumsum_along_axis <= upper_bound\n", + " )\n", + " )\n", + "\n", + "row_in_region = np.array(row_in_region).T\n", + "final = row_in_region.astype(float)\n", + "\n", + "# find initial region for each row\n", + "initial_regions = np.apply_along_axis(np.flatnonzero, 1, row_in_region).flatten()\n", + "\n", + "# find transitions between regions\n", + "region_borders = np.flatnonzero(np.diff(initial_regions))\n", + "\n", + "divide_y_positions = []\n", + "\n", + "# finds overlap in transition region\n", + "for previous_region, (ventral_row, upper_bound) in enumerate(\n", + " zip(region_borders, upper_bounds)\n", + "):\n", + " dorsal_row = ventral_row + 1\n", + " next_region = previous_region + 1\n", + " a, b = relative_cumsum_along_axis[ventral_row], relative_cumsum_along_axis[dorsal_row]\n", + " diff = b - a\n", + " to_a = upper_bound - a\n", + " fraction_to_a = to_a / diff\n", + " fraction_to_b = 1 - fraction_to_a\n", + "\n", + " final[dorsal_row, previous_region] = fraction_to_a\n", + " final[dorsal_row, next_region] = fraction_to_b\n", + "final = final.T\n", + "final = final * vector\n", + "# convert to list of vectors\n", + "final = [final[n, :] for n in range(final.shape[0])]" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "from eitprocessing.roi_selection.gridselection import GridSelection, DivisionMethod\n", + "\n", + "grid_select = GridSelection(2,2,DivisionMethod.physiological, DivisionMethod.physiological)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "matrices = grid_select.find_grid(matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "a = matrices[0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ALIVE", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}