diff --git a/thicket/tests/test_concat_thickets.py b/thicket/tests/test_concat_thickets.py index 132e86e0..7c2a3030 100644 --- a/thicket/tests/test_concat_thickets.py +++ b/thicket/tests/test_concat_thickets.py @@ -134,3 +134,31 @@ def test_query_concat_thickets_columns(thicket_axis_columns): ) check_query(combined_th, hnids, query) + + +def test_filter_profile_concat_thickets_columns(thicket_axis_columns): + thickets, thickets_cp, combined_th = thicket_axis_columns + + rm_profs = [ + (1048576.0, "default"), + (1048576.0, "block_128"), + (1048576.0, "block_256"), + ] + keep_profs = [ + (2097152.0, "block_256"), + (2097152.0, "default"), + (2097152.0, "block_128"), + ] + + tk_filt = combined_th.filter_profile(keep_profs) + + for component in [tk_filt.profile, tk_filt.profile_mapping.keys()]: + assert all([prof not in component for prof in rm_profs]) + assert all([prof in component for prof in keep_profs]) + + assert 1048576.0 not in tk_filt.dataframe.index.get_level_values( + "ProblemSizeRunParam" + ) + assert 2097152.0 in tk_filt.dataframe.index.get_level_values("ProblemSizeRunParam") + assert 1048576.0 not in tk_filt.metadata.index + assert 2097152.0 in tk_filt.metadata.index diff --git a/thicket/tests/test_filter_profile.py b/thicket/tests/test_filter_profile.py new file mode 100644 index 00000000..216b1d7a --- /dev/null +++ b/thicket/tests/test_filter_profile.py @@ -0,0 +1,26 @@ +# Copyright 2022 Lawrence Livermore National Security, LLC and other +# Thicket Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +from thicket import Thicket + + +def test_filter_profile(rajaperf_cali_1trial): + tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) + + # Split profile list into two halves + rm_profs = tk.profile[len(tk.profile) // 2 :] + keep_profs = tk.profile[: len(tk.profile) // 2] + + tk_filt = tk.filter_profile(keep_profs) + + # Check each component that uses profiles + for component in [ + tk_filt.profile, + tk_filt.profile_mapping.keys(), + tk_filt.metadata.index, + tk_filt.dataframe.index.get_level_values("profile"), + ]: + assert all([prof not in component for prof in rm_profs]) + assert all([prof in component for prof in keep_profs]) diff --git a/thicket/thicket.py b/thicket/thicket.py index 438118aa..22859ba0 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1198,6 +1198,27 @@ def filter_metadata(self, select_function): return new_thicket + def filter_profile(self, profile_list): + """Filter thicket object based on a list of profiles. + + Arguments: + profile_list (list): list of profiles to filter on + + Returns: + (thicket): new thicket object with selected profiles + """ + new_thicket = self.deepcopy() + + new_thicket._sync_profile_components(profile_list) + validate_profile(new_thicket) + + if len(new_thicket.graph) != len( + new_thicket.dataframe.index.get_level_values("node").unique() + ): + new_thicket = new_thicket.squash() + + return new_thicket + def filter(self, filter_func): """Overloaded generic filter function. @@ -1468,22 +1489,21 @@ def get_unique_metadata(self): def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and - profile mapping objects based on the component's index. This is useful when a - non-Thicket function modifies the profiles in an object and those changes need - to be reflected in the other objects. + profile mapping objects based on the component's index or a list of profiles. + This is useful when a non-Thicket function modifies the profiles in an object + and those changes need to be reflected in the other objects. Arguments: - component (DataFrame) -> (Thicket.dataframe or Thicket.metadata): The index + component (list or DataFrame) -> (list, Thicket.dataframe, or Thicket.metadata): The index of this component is used to synchronize the other objects. - - Returns: - (thicket): self """ def _profile_truth_from_component(component): """Derive the profiles from the component index.""" + if isinstance(component, list): + return component # Option A: Columnar-indexed Thicket - if isinstance(component.columns, pd.MultiIndex): + elif isinstance(component.columns, pd.MultiIndex): # Performance DataFrame if isinstance(component.index, pd.MultiIndex): row_idx = component.index.droplevel(level="node") @@ -1505,7 +1525,7 @@ def _profile_truth_from_component(component): profile_truth = component.index return list(set(profile_truth)) - def _sync_indices(component, profile_truth): + def _sync_indices(profile_truth): """Sync the Thicket attributes""" self.profile = profile_truth self.profile_mapping = OrderedDict( @@ -1517,12 +1537,12 @@ def _sync_indices(component, profile_truth): ) # For Columnar-indexed Thicket - if isinstance(component.columns, pd.MultiIndex): + if isinstance(self.dataframe.columns, pd.MultiIndex): # Create powerset from all profiles pset = set() for p in profile_truth: pset.update(helpers._powerset_from_tuple(p)) - profile_truth = pset + profile_truth = list(pset) self.dataframe = self.dataframe[ self.dataframe.index.droplevel(level="node").isin(profile_truth) @@ -1531,16 +1551,14 @@ def _sync_indices(component, profile_truth): return self - if not isinstance(component, pd.DataFrame): + if isinstance(component, list) or isinstance(component, pd.DataFrame): + profile_truth = _profile_truth_from_component(component) + self = _sync_indices(profile_truth) + else: raise ValueError( - "Component must be either Thicket.dataframe or Thicket.metadata" + "Component must be either list, Thicket.dataframe, or Thicket.metadata" ) - profile_truth = _profile_truth_from_component(component) - self = _sync_indices(component, profile_truth) - - return self - class InvalidFilter(Exception): """Raised when an invalid argument is passed to the filter function."""