From ffdb4c8d1f4fbd3531e14009383b69a996a9a3ba Mon Sep 17 00:00:00 2001 From: Michael Richard Mckinsey Date: Tue, 11 Jun 2024 18:07:28 -0700 Subject: [PATCH 01/13] Add new function --- thicket/thicket.py | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 438118aa..f7628600 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1198,6 +1198,27 @@ def filter_metadata(self, select_function): return new_thicket + def filter_profile(self, profile_list): + """Filter thicket object based on a list of profiles. + + Arguments: + profile_list (list): list of profiles to filter on + + Returns: + (thicket): new thicket object with selected profiles + """ + new_thicket = self.copy() + + new_thicket._sync_profile_components(profile_list) + validate_profile(new_thicket) + + if len(new_thicket.graph) != len( + new_thicket.dataframe.index.get_level_values("node").unique() + ): + new_thicket.squash() + + return new_thicket + def filter(self, filter_func): """Overloaded generic filter function. @@ -1468,12 +1489,12 @@ def get_unique_metadata(self): def _sync_profile_components(self, component): """Synchronize the Performance DataFrame, Metadata Dataframe, profile and - profile mapping objects based on the component's index. This is useful when a - non-Thicket function modifies the profiles in an object and those changes need - to be reflected in the other objects. + profile mapping objects based on the component's index or a list of profiles. + This is useful when a non-Thicket function modifies the profiles in an object + and those changes need to be reflected in the other objects. Arguments: - component (DataFrame) -> (Thicket.dataframe or Thicket.metadata): The index + component (list or DataFrame) -> (list, Thicket.dataframe, or Thicket.metadata): The index of this component is used to synchronize the other objects. Returns: @@ -1516,8 +1537,10 @@ def _sync_indices(component, profile_truth): } ) + if isinstance(component, list): + pass # For Columnar-indexed Thicket - if isinstance(component.columns, pd.MultiIndex): + elif isinstance(component.columns, pd.MultiIndex): # Create powerset from all profiles pset = set() for p in profile_truth: @@ -1531,14 +1554,16 @@ def _sync_indices(component, profile_truth): return self - if not isinstance(component, pd.DataFrame): + if isinstance(component, list): + self = _sync_indices(component, component) + elif isinstance(component, pd.DataFrame): + profile_truth = _profile_truth_from_component(component) + self = _sync_indices(component, profile_truth) + else: raise ValueError( - "Component must be either Thicket.dataframe or Thicket.metadata" + "Component must be either list, Thicket.dataframe, or Thicket.metadata" ) - profile_truth = _profile_truth_from_component(component) - self = _sync_indices(component, profile_truth) - return self From d3da41a0ce4cba876f96fda3bda7d4f389425f66 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 13 Jun 2024 13:31:30 -0500 Subject: [PATCH 02/13] Change copy to deepcopy --- thicket/thicket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index f7628600..fed5d72d 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1207,7 +1207,7 @@ def filter_profile(self, profile_list): Returns: (thicket): new thicket object with selected profiles """ - new_thicket = self.copy() + new_thicket = self.deepcopy() new_thicket._sync_profile_components(profile_list) validate_profile(new_thicket) From a1725f46dd75233c1365ff86d40c7e5a0dc79bb7 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Thu, 13 Jun 2024 14:27:36 -0500 Subject: [PATCH 03/13] Cast type for consistency --- thicket/thicket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index fed5d72d..957a63cb 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1545,7 +1545,7 @@ def _sync_indices(component, profile_truth): pset = set() for p in profile_truth: pset.update(helpers._powerset_from_tuple(p)) - profile_truth = pset + profile_truth = list(pset) self.dataframe = self.dataframe[ self.dataframe.index.droplevel(level="node").isin(profile_truth) From 13f920797715d5f619a0337468822ac820bd4be9 Mon Sep 17 00:00:00 2001 From: Michael Richard Mckinsey Date: Tue, 18 Jun 2024 10:30:01 -0700 Subject: [PATCH 04/13] Change _sync_profile_components to void --- thicket/thicket.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index 957a63cb..e50d975d 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1496,9 +1496,6 @@ def _sync_profile_components(self, component): Arguments: component (list or DataFrame) -> (list, Thicket.dataframe, or Thicket.metadata): The index of this component is used to synchronize the other objects. - - Returns: - (thicket): self """ def _profile_truth_from_component(component): @@ -1564,8 +1561,6 @@ def _sync_indices(component, profile_truth): "Component must be either list, Thicket.dataframe, or Thicket.metadata" ) - return self - class InvalidFilter(Exception): """Raised when an invalid argument is passed to the filter function.""" From d82a365ba06acae800833f3fa51a649ab15f3e22 Mon Sep 17 00:00:00 2001 From: Michael Richard Mckinsey Date: Thu, 20 Jun 2024 22:56:51 -0700 Subject: [PATCH 05/13] Fix bug --- thicket/thicket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index e50d975d..e6cfd7f5 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1215,7 +1215,7 @@ def filter_profile(self, profile_list): if len(new_thicket.graph) != len( new_thicket.dataframe.index.get_level_values("node").unique() ): - new_thicket.squash() + new_thicket = new_thicket.squash() return new_thicket From 8386e1df2c675471ac80e31e97dcd55aeafde002 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 1 Jul 2024 22:23:36 -0500 Subject: [PATCH 06/13] Fix bug --- thicket/thicket.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index e6cfd7f5..cdae7c22 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1523,7 +1523,7 @@ def _profile_truth_from_component(component): profile_truth = component.index return list(set(profile_truth)) - def _sync_indices(component, profile_truth): + def _sync_indices(profile_truth): """Sync the Thicket attributes""" self.profile = profile_truth self.profile_mapping = OrderedDict( @@ -1534,10 +1534,8 @@ def _sync_indices(component, profile_truth): } ) - if isinstance(component, list): - pass # For Columnar-indexed Thicket - elif isinstance(component.columns, pd.MultiIndex): + if isinstance(self.dataframe.columns, pd.MultiIndex): # Create powerset from all profiles pset = set() for p in profile_truth: From a270ed9346ad30d32daae17d74444ff69c7df819 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 1 Jul 2024 22:24:07 -0500 Subject: [PATCH 07/13] Add unit tests --- thicket/tests/test_concat_thickets.py | 21 +++++++++++++++++++++ thicket/tests/test_filter_profile.py | 25 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 thicket/tests/test_filter_profile.py diff --git a/thicket/tests/test_concat_thickets.py b/thicket/tests/test_concat_thickets.py index 132e86e0..4e2e239b 100644 --- a/thicket/tests/test_concat_thickets.py +++ b/thicket/tests/test_concat_thickets.py @@ -134,3 +134,24 @@ def test_query_concat_thickets_columns(thicket_axis_columns): ) check_query(combined_th, hnids, query) + + +def test_filter_profile_concat_thickets_columns(thicket_axis_columns): + thickets, thickets_cp, combined_th = thicket_axis_columns + + rm_profs = [ + (2097152.0, "block_128"), + (1048576.0, "block_128"), + (1048576.0, "block_256"), + ] + keep_profs = [ + (2097152.0, "block_256"), + (2097152.0, "default"), + (1048576.0, "default"), + ] + + tk_filt = combined_th.filter_profile(keep_profs) + + for component in [tk_filt.profile, tk_filt.profile_mapping.keys()]: + assert all([prof not in component for prof in rm_profs]) + assert all([prof in component for prof in keep_profs]) diff --git a/thicket/tests/test_filter_profile.py b/thicket/tests/test_filter_profile.py new file mode 100644 index 00000000..a0f75d00 --- /dev/null +++ b/thicket/tests/test_filter_profile.py @@ -0,0 +1,25 @@ +# Copyright 2022 Lawrence Livermore National Security, LLC and other +# Thicket Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +from thicket import Thicket + + +def test_filter_profile(rajaperf_cali_1trial): + tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) + + rm_profs = [2139808518, 2783439032, 1389420692] + keep_profs = [3031003747, 847237663, 3347816895] + + tk_filt = tk.filter_profile(keep_profs) + + # Check each component that uses profiles + for component in [ + tk_filt.profile, + tk_filt.profile_mapping.keys(), + tk_filt.metadata.index, + tk_filt.dataframe.index.get_level_values("profile"), + ]: + assert all([prof not in component for prof in rm_profs]) + assert all([prof in component for prof in keep_profs]) From 8b1f8c3fe8861945ebd6f1788e0f51c171ba5a6e Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 1 Jul 2024 22:28:34 -0500 Subject: [PATCH 08/13] Fix bug --- thicket/thicket.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index cdae7c22..cff670db 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1550,10 +1550,10 @@ def _sync_indices(profile_truth): return self if isinstance(component, list): - self = _sync_indices(component, component) + self = _sync_indices(component) elif isinstance(component, pd.DataFrame): profile_truth = _profile_truth_from_component(component) - self = _sync_indices(component, profile_truth) + self = _sync_indices(profile_truth) else: raise ValueError( "Component must be either list, Thicket.dataframe, or Thicket.metadata" From 0237d5a48c39b1215b16779dc84e1be1f9156813 Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 1 Jul 2024 22:56:20 -0500 Subject: [PATCH 09/13] improve unit test --- thicket/tests/test_concat_thickets.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/thicket/tests/test_concat_thickets.py b/thicket/tests/test_concat_thickets.py index 4e2e239b..db1f9f24 100644 --- a/thicket/tests/test_concat_thickets.py +++ b/thicket/tests/test_concat_thickets.py @@ -140,14 +140,14 @@ def test_filter_profile_concat_thickets_columns(thicket_axis_columns): thickets, thickets_cp, combined_th = thicket_axis_columns rm_profs = [ - (2097152.0, "block_128"), + (1048576.0, "default"), (1048576.0, "block_128"), (1048576.0, "block_256"), ] keep_profs = [ (2097152.0, "block_256"), (2097152.0, "default"), - (1048576.0, "default"), + (2097152.0, "block_128"), ] tk_filt = combined_th.filter_profile(keep_profs) @@ -155,3 +155,8 @@ def test_filter_profile_concat_thickets_columns(thicket_axis_columns): for component in [tk_filt.profile, tk_filt.profile_mapping.keys()]: assert all([prof not in component for prof in rm_profs]) assert all([prof in component for prof in keep_profs]) + + assert 1048576.0 not in tk_filt.dataframe.index.get_level_values("ProblemSizeRunParam") + assert 2097152.0 in tk_filt.dataframe.index.get_level_values("ProblemSizeRunParam") + assert 1048576.0 not in tk_filt.metadata.index + assert 2097152.0 in tk_filt.metadata.index From 28e19fafd94132a229241201061f1805210818de Mon Sep 17 00:00:00 2001 From: Michael McKinsey Date: Mon, 1 Jul 2024 22:58:59 -0500 Subject: [PATCH 10/13] black --- thicket/tests/test_concat_thickets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/thicket/tests/test_concat_thickets.py b/thicket/tests/test_concat_thickets.py index db1f9f24..7c2a3030 100644 --- a/thicket/tests/test_concat_thickets.py +++ b/thicket/tests/test_concat_thickets.py @@ -156,7 +156,9 @@ def test_filter_profile_concat_thickets_columns(thicket_axis_columns): assert all([prof not in component for prof in rm_profs]) assert all([prof in component for prof in keep_profs]) - assert 1048576.0 not in tk_filt.dataframe.index.get_level_values("ProblemSizeRunParam") + assert 1048576.0 not in tk_filt.dataframe.index.get_level_values( + "ProblemSizeRunParam" + ) assert 2097152.0 in tk_filt.dataframe.index.get_level_values("ProblemSizeRunParam") assert 1048576.0 not in tk_filt.metadata.index assert 2097152.0 in tk_filt.metadata.index From afdc3178481eb17b3132cbdb6d3468604daa5f1d Mon Sep 17 00:00:00 2001 From: Michael Richard Mckinsey Date: Tue, 2 Jul 2024 10:41:16 -0700 Subject: [PATCH 11/13] Simplify logic --- thicket/thicket.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/thicket/thicket.py b/thicket/thicket.py index cff670db..22859ba0 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1500,8 +1500,10 @@ def _sync_profile_components(self, component): def _profile_truth_from_component(component): """Derive the profiles from the component index.""" + if isinstance(component, list): + return component # Option A: Columnar-indexed Thicket - if isinstance(component.columns, pd.MultiIndex): + elif isinstance(component.columns, pd.MultiIndex): # Performance DataFrame if isinstance(component.index, pd.MultiIndex): row_idx = component.index.droplevel(level="node") @@ -1549,9 +1551,7 @@ def _sync_indices(profile_truth): return self - if isinstance(component, list): - self = _sync_indices(component) - elif isinstance(component, pd.DataFrame): + if isinstance(component, list) or isinstance(component, pd.DataFrame): profile_truth = _profile_truth_from_component(component) self = _sync_indices(profile_truth) else: From ff8fa3596250ebe6e6fd70d737d28c0ba7dd9b34 Mon Sep 17 00:00:00 2001 From: Michael Richard Mckinsey Date: Tue, 2 Jul 2024 10:45:58 -0700 Subject: [PATCH 12/13] Fix unit test --- thicket/tests/test_filter_profile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/thicket/tests/test_filter_profile.py b/thicket/tests/test_filter_profile.py index a0f75d00..6af04cc9 100644 --- a/thicket/tests/test_filter_profile.py +++ b/thicket/tests/test_filter_profile.py @@ -9,8 +9,9 @@ def test_filter_profile(rajaperf_cali_1trial): tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) - rm_profs = [2139808518, 2783439032, 1389420692] - keep_profs = [3031003747, 847237663, 3347816895] + # Split profile list into two halves + rm_profs = tk.profile[len(tk.profile)//2:] + keep_profs = tk.profile[:len(tk.profile)//2] tk_filt = tk.filter_profile(keep_profs) From 7b776bd3a67f2b26307ccb39ac9e5789a4f86cb3 Mon Sep 17 00:00:00 2001 From: Michael Richard Mckinsey Date: Tue, 2 Jul 2024 10:48:35 -0700 Subject: [PATCH 13/13] Black --- thicket/tests/test_filter_profile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thicket/tests/test_filter_profile.py b/thicket/tests/test_filter_profile.py index 6af04cc9..216b1d7a 100644 --- a/thicket/tests/test_filter_profile.py +++ b/thicket/tests/test_filter_profile.py @@ -10,8 +10,8 @@ def test_filter_profile(rajaperf_cali_1trial): tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) # Split profile list into two halves - rm_profs = tk.profile[len(tk.profile)//2:] - keep_profs = tk.profile[:len(tk.profile)//2] + rm_profs = tk.profile[len(tk.profile) // 2 :] + keep_profs = tk.profile[: len(tk.profile) // 2] tk_filt = tk.filter_profile(keep_profs)