From 048d91d57740d0130b97feeae77f623ddac4aebe Mon Sep 17 00:00:00 2001 From: shunsuke-iwashita Date: Tue, 18 Nov 2025 13:14:49 +0900 Subject: [PATCH 1/3] Adds tracking header formatting for single-header output Introduces a utility to flatten multi-index tracking headers into a single-header format for easier downstream processing and analysis. Applies the header formatting step to both home and away team tracking data, ensuring column consistency and clarity. Facilitates integration with systems expecting simplified header naming conventions. --- .../ultimate/ultimate_space_class.py | 6 +- .../ultimate/ultimate_space_preprocessing.py | 55 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/preprocessing/sports/space_data/ultimate/ultimate_space_class.py b/preprocessing/sports/space_data/ultimate/ultimate_space_class.py index dbd5576..89f78f7 100644 --- a/preprocessing/sports/space_data/ultimate/ultimate_space_class.py +++ b/preprocessing/sports/space_data/ultimate/ultimate_space_class.py @@ -43,6 +43,7 @@ def preprocessing(self): from .ultimate_space_preprocessing import ( convert_to_metrica_format, create_intermediate_file, + format_tracking_headers, ) home_tracking_dict = {} @@ -55,7 +56,7 @@ def preprocessing(self): os.path.splitext(os.path.basename(tracking_path_i))[0] )[0] match_tracking_df = pd.read_csv(tracking_path_i) - print(match_tracking_df) + # Create intermediate DataFrame with all required columns intermidiate_df = create_intermediate_file(match_tracking_df) @@ -64,6 +65,9 @@ def preprocessing(self): intermidiate_df, self.tracking_herz ) + home_df = format_tracking_headers(home_df, team_prefix="Home") + away_df = format_tracking_headers(away_df, team_prefix="Away") + home_tracking_dict[match_i] = home_df away_tracking_dict[match_i] = away_df event_data_dict[match_i] = events_df diff --git a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py index bc6fa1f..6a29336 100644 --- a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py +++ b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py @@ -353,3 +353,58 @@ def create_tracking_metrica(df, team, tracking_herz): tracking_df.columns = multi_columns return tracking_df + + +def format_tracking_headers(tracking_df, team_prefix="Home"): + """Convert the multi-index tracking output into a single-header format.""" + if tracking_df.empty: + return tracking_df + + flattened_columns = [] + active_columns = [] + player_counts = {} + + for column in tracking_df.columns: + # MultiIndex columns are returned as tuples + level2_name = column[2] if isinstance(column, tuple) else column + + if level2_name == "Frame": + continue + + if level2_name == "Period": + flattened_columns.append("Period") + active_columns.append(column) + continue + + if level2_name == "Time [s]": + flattened_columns.append("Time [s]") + active_columns.append(column) + continue + + if level2_name == "Disc__": + disc_count = player_counts.get("disc", 0) + flattened_columns.append("disc_x" if disc_count == 0 else "disc_y") + active_columns.append(column) + player_counts["disc"] = disc_count + 1 + continue + + if ( + isinstance(column, tuple) + and column[0] == team_prefix + and level2_name.startswith("Player") + ): + player_index = int(level2_name.replace("Player", "")) + 1 + count = player_counts.get(player_index, 0) + suffix = "_x" if count == 0 else "_y" + flattened_columns.append(f"{team_prefix}_{player_index}{suffix}") + active_columns.append(column) + player_counts[player_index] = count + 1 + continue + + formatted_df = tracking_df[active_columns].copy() + formatted_df.columns = flattened_columns + + if "Period" in formatted_df.columns and formatted_df["Period"].isna().all(): + formatted_df["Period"] = 1 + + return formatted_df From 08eabcf4b944517db233d63264338c7335978f66 Mon Sep 17 00:00:00 2001 From: shunsuke-iwashita Date: Tue, 18 Nov 2025 13:29:26 +0900 Subject: [PATCH 2/3] Simplifies header flattening for tracking data Removes unnecessary player and disc counting logic, ensuring columns for disc and players are consistently added for both x and y coordinates. Prevents duplicate columns using a set to track processed columns, improving header formatting reliability and future maintainability. --- .../ultimate/ultimate_space_preprocessing.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py index 6a29336..6978eff 100644 --- a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py +++ b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py @@ -362,30 +362,35 @@ def format_tracking_headers(tracking_df, team_prefix="Home"): flattened_columns = [] active_columns = [] - player_counts = {} + seen_columns = set() for column in tracking_df.columns: # MultiIndex columns are returned as tuples level2_name = column[2] if isinstance(column, tuple) else column + if column in seen_columns: + continue + if level2_name == "Frame": continue if level2_name == "Period": flattened_columns.append("Period") active_columns.append(column) + seen_columns.add(column) continue if level2_name == "Time [s]": flattened_columns.append("Time [s]") active_columns.append(column) + seen_columns.add(column) continue if level2_name == "Disc__": - disc_count = player_counts.get("disc", 0) - flattened_columns.append("disc_x" if disc_count == 0 else "disc_y") + flattened_columns.append("disc_x") + flattened_columns.append("disc_y") active_columns.append(column) - player_counts["disc"] = disc_count + 1 + seen_columns.add(column) continue if ( @@ -394,11 +399,10 @@ def format_tracking_headers(tracking_df, team_prefix="Home"): and level2_name.startswith("Player") ): player_index = int(level2_name.replace("Player", "")) + 1 - count = player_counts.get(player_index, 0) - suffix = "_x" if count == 0 else "_y" - flattened_columns.append(f"{team_prefix}_{player_index}{suffix}") + for suffix in ["_x", "_y"]: + flattened_columns.append(f"{team_prefix}_{player_index}{suffix}") active_columns.append(column) - player_counts[player_index] = count + 1 + seen_columns.add(column) continue formatted_df = tracking_df[active_columns].copy() From a62a8d0338bc0ba9f48fa1653a1a577e5bebecd0 Mon Sep 17 00:00:00 2001 From: shunsuke-iwashita Date: Tue, 18 Nov 2025 14:16:24 +0900 Subject: [PATCH 3/3] Corrects offense ID mapping to use 1-based index Updates the offense ID mapping logic to increment the index by one, ensuring alignment with expected 1-based indexing for downstream processes. Prevents off-by-one errors during event creation. --- .../sports/space_data/ultimate/ultimate_space_preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py index 6978eff..a6e9253 100644 --- a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py +++ b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py @@ -219,7 +219,7 @@ def create_events_metrica(df, tracking_herz): if not holder_data.empty: to_id = ( holder_data["id"] - .map(lambda x: offense_ids.index(x) if x in offense_ids else np.nan) + .map(lambda x: offense_ids.index(x) + 1 if x in offense_ids else np.nan) .reset_index(drop=True) ) else: