Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions thicket/tests/test_concat_thickets.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,31 @@ def test_query_concat_thickets_columns(thicket_axis_columns):
)

check_query(combined_th, hnids, query)


def test_filter_profile_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns

rm_profs = [
(1048576.0, "default"),
(1048576.0, "block_128"),
(1048576.0, "block_256"),
]
keep_profs = [
(2097152.0, "block_256"),
(2097152.0, "default"),
(2097152.0, "block_128"),
]

tk_filt = combined_th.filter_profile(keep_profs)

for component in [tk_filt.profile, tk_filt.profile_mapping.keys()]:
assert all([prof not in component for prof in rm_profs])
assert all([prof in component for prof in keep_profs])

assert 1048576.0 not in tk_filt.dataframe.index.get_level_values(
"ProblemSizeRunParam"
)
assert 2097152.0 in tk_filt.dataframe.index.get_level_values("ProblemSizeRunParam")
assert 1048576.0 not in tk_filt.metadata.index
assert 2097152.0 in tk_filt.metadata.index
26 changes: 26 additions & 0 deletions thicket/tests/test_filter_profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2022 Lawrence Livermore National Security, LLC and other
# Thicket Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT

from thicket import Thicket


def test_filter_profile(rajaperf_cali_1trial):
tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True)

# Split profile list into two halves
rm_profs = tk.profile[len(tk.profile) // 2 :]
keep_profs = tk.profile[: len(tk.profile) // 2]

tk_filt = tk.filter_profile(keep_profs)

# Check each component that uses profiles
for component in [
tk_filt.profile,
tk_filt.profile_mapping.keys(),
tk_filt.metadata.index,
tk_filt.dataframe.index.get_level_values("profile"),
]:
assert all([prof not in component for prof in rm_profs])
assert all([prof in component for prof in keep_profs])
54 changes: 36 additions & 18 deletions thicket/thicket.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,6 +1198,27 @@ def filter_metadata(self, select_function):

return new_thicket

def filter_profile(self, profile_list):
"""Filter thicket object based on a list of profiles.

Arguments:
profile_list (list): list of profiles to filter on

Returns:
(thicket): new thicket object with selected profiles
"""
new_thicket = self.deepcopy()

new_thicket._sync_profile_components(profile_list)
validate_profile(new_thicket)

if len(new_thicket.graph) != len(
new_thicket.dataframe.index.get_level_values("node").unique()
):
new_thicket = new_thicket.squash()

return new_thicket

def filter(self, filter_func):
"""Overloaded generic filter function.

Expand Down Expand Up @@ -1468,22 +1489,21 @@ def get_unique_metadata(self):

def _sync_profile_components(self, component):
"""Synchronize the Performance DataFrame, Metadata Dataframe, profile and
profile mapping objects based on the component's index. This is useful when a
non-Thicket function modifies the profiles in an object and those changes need
to be reflected in the other objects.
profile mapping objects based on the component's index or a list of profiles.
This is useful when a non-Thicket function modifies the profiles in an object
and those changes need to be reflected in the other objects.

Arguments:
component (DataFrame) -> (Thicket.dataframe or Thicket.metadata): The index
component (list or DataFrame) -> (list, Thicket.dataframe, or Thicket.metadata): The index
of this component is used to synchronize the other objects.

Returns:
(thicket): self
"""

def _profile_truth_from_component(component):
"""Derive the profiles from the component index."""
if isinstance(component, list):
return component
# Option A: Columnar-indexed Thicket
if isinstance(component.columns, pd.MultiIndex):
elif isinstance(component.columns, pd.MultiIndex):
# Performance DataFrame
if isinstance(component.index, pd.MultiIndex):
row_idx = component.index.droplevel(level="node")
Expand All @@ -1505,7 +1525,7 @@ def _profile_truth_from_component(component):
profile_truth = component.index
return list(set(profile_truth))

def _sync_indices(component, profile_truth):
def _sync_indices(profile_truth):
"""Sync the Thicket attributes"""
self.profile = profile_truth
self.profile_mapping = OrderedDict(
Expand All @@ -1517,12 +1537,12 @@ def _sync_indices(component, profile_truth):
)

# For Columnar-indexed Thicket
if isinstance(component.columns, pd.MultiIndex):
if isinstance(self.dataframe.columns, pd.MultiIndex):
# Create powerset from all profiles
pset = set()
for p in profile_truth:
pset.update(helpers._powerset_from_tuple(p))
profile_truth = pset
profile_truth = list(pset)

self.dataframe = self.dataframe[
self.dataframe.index.droplevel(level="node").isin(profile_truth)
Expand All @@ -1531,16 +1551,14 @@ def _sync_indices(component, profile_truth):

return self

if not isinstance(component, pd.DataFrame):
if isinstance(component, list) or isinstance(component, pd.DataFrame):
profile_truth = _profile_truth_from_component(component)
self = _sync_indices(profile_truth)
else:
raise ValueError(
"Component must be either Thicket.dataframe or Thicket.metadata"
"Component must be either list, Thicket.dataframe, or Thicket.metadata"
)

profile_truth = _profile_truth_from_component(component)
self = _sync_indices(component, profile_truth)

return self


class InvalidFilter(Exception):
"""Raised when an invalid argument is passed to the filter function."""
Expand Down