From 8ce1bb36b7f5fe0df1f03b6c2683b70070c90d61 Mon Sep 17 00:00:00 2001 From: shunsuke-iwashita Date: Tue, 11 Nov 2025 11:16:30 +0900 Subject: [PATCH] Adds Ultimate sport data provider support Introduces comprehensive Ultimate frisbee tracking data processing capabilities with support for UltimateTrack and UFA data providers. Implements motion feature calculations including velocity/acceleration magnitudes and angles, plus differential angle tracking for enhanced movement analysis. Converts Ultimate tracking data to standardized Metrica format for consistent downstream processing with existing basketball and soccer workflows. Enables processing of both individual files and directory batches with optional testing mode for development efficiency. --- .../sports/space_data/space_class.py | 14 +- .../sports/space_data/ultimate/__init__.py | 0 .../ultimate/ultimate_space_class.py | 93 +++++ .../ultimate/ultimate_space_preprocessing.py | 355 ++++++++++++++++++ 4 files changed, 459 insertions(+), 3 deletions(-) create mode 100644 preprocessing/sports/space_data/ultimate/__init__.py create mode 100644 preprocessing/sports/space_data/ultimate/ultimate_space_class.py create mode 100644 preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py diff --git a/preprocessing/sports/space_data/space_class.py b/preprocessing/sports/space_data/space_class.py index aef2173..d7ded26 100644 --- a/preprocessing/sports/space_data/space_class.py +++ b/preprocessing/sports/space_data/space_class.py @@ -1,17 +1,25 @@ class Space_data: # Modified the sports list to only include fully supported providers - basketball_data_provider = ['SportVU_NBA'] - soccer_data_provider = ['fifa_wc_2022'] + basketball_data_provider = ["SportVU_NBA"] + soccer_data_provider = ["fifa_wc_2022"] + ultimate_data_provider = ["UltimateTrack", "UFA"] def __new__(cls, data_provider, *args, **kwargs): if data_provider in cls.basketball_data_provider: from .basketball.basketball_space_class import Basketball_space_data + # If the data_provider is in the supported list, return an instance of Basketball_space_data return Basketball_space_data(data_provider, *args, **kwargs) elif data_provider in cls.soccer_data_provider: from .soccer.soccer_space_class import Soccer_space_data + # If the data_provider is in the supported list, return an instance of Soccer_space_data return Soccer_space_data(data_provider, *args, **kwargs) + elif data_provider in cls.ultimate_data_provider: + from .ultimate.ultimate_space_class import Ultimate_space_data + + # If the data_provider is in the supported list, return an instance of Ultimate_space_data + return Ultimate_space_data(data_provider, *args, **kwargs) else: # If the data_provider is unrecognized, raise a ValueError - raise ValueError(f'Unknown data provider: {data_provider}') + raise ValueError(f"Unknown data provider: {data_provider}") diff --git a/preprocessing/sports/space_data/ultimate/__init__.py b/preprocessing/sports/space_data/ultimate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/preprocessing/sports/space_data/ultimate/ultimate_space_class.py b/preprocessing/sports/space_data/ultimate/ultimate_space_class.py new file mode 100644 index 0000000..dbd5576 --- /dev/null +++ b/preprocessing/sports/space_data/ultimate/ultimate_space_class.py @@ -0,0 +1,93 @@ +import os + +import pandas as pd +from tqdm import tqdm + + +class Ultimate_space_data: + def __init__( + self, + data_provider, + tracking_data_path, + out_path=None, + testing_mode=False, + ): + self.data_provider = data_provider + self.tracking_path = tracking_data_path + self.testing_mode = testing_mode + self.out_path = out_path + if self.data_provider == "UltimateTrack": + self.tracking_herz = 15 + elif self.data_provider == "UFA": + self.tracking_herz = 10 + + def get_files(self): + if os.path.isdir(self.tracking_path): + data_files = [ + os.path.join(self.tracking_path, f) + for f in os.listdir(self.tracking_path) + if f.endswith(".csv") + ] + elif os.path.isfile(self.tracking_path) and self.tracking_path.endswith(".csv"): + data_files = [self.tracking_path] + else: + raise ValueError(f"Invalid data path: {self.tracking_path}") + return data_files + + def preprocessing(self): + tracking_files = self.get_files() + if self.testing_mode: + tracking_files = tracking_files[:2] + print("Running in testing mode. Limited files will be processed.") + + from .ultimate_space_preprocessing import ( + convert_to_metrica_format, + create_intermediate_file, + ) + + home_tracking_dict = {} + away_tracking_dict = {} + event_data_dict = {} + for tracking_path_i in tqdm( + tracking_files, total=len(tracking_files), desc="Processing tracking files" + ): + match_i = os.path.splitext( + os.path.splitext(os.path.basename(tracking_path_i))[0] + )[0] + match_tracking_df = pd.read_csv(tracking_path_i) + print(match_tracking_df) + # Create intermediate DataFrame with all required columns + intermidiate_df = create_intermediate_file(match_tracking_df) + + # Convert to Metrica format + home_df, away_df, events_df = convert_to_metrica_format( + intermidiate_df, self.tracking_herz + ) + + home_tracking_dict[match_i] = home_df + away_tracking_dict[match_i] = away_df + event_data_dict[match_i] = events_df + + if self.out_path: + # create output directory if not exists + os.makedirs(self.out_path + "/event", exist_ok=True) + os.makedirs(self.out_path + "/home_tracking", exist_ok=True) + os.makedirs(self.out_path + "/away_tracking", exist_ok=True) + + for match_id, df in event_data_dict.items(): + df.to_csv( + os.path.join(self.out_path, "event", f"{match_id}.csv"), + index=False, + ) + for match_id, df in home_tracking_dict.items(): + df.to_csv( + os.path.join(self.out_path, "home_tracking", f"{match_id}.csv"), + index=False, + ) + for match_id, df in away_tracking_dict.items(): + df.to_csv( + os.path.join(self.out_path, "away_tracking", f"{match_id}.csv"), + index=False, + ) + + return event_data_dict, home_tracking_dict, away_tracking_dict diff --git a/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py new file mode 100644 index 0000000..bc6fa1f --- /dev/null +++ b/preprocessing/sports/space_data/ultimate/ultimate_space_preprocessing.py @@ -0,0 +1,355 @@ +import numpy as np +import pandas as pd + + +def create_intermediate_file(raw_data): + """ + Create intermediate file with calculated motion features from raw Ultimate Track data + + Processes frame-by-frame to calculate velocity/acceleration magnitudes and angles, + including differential angle features for each tracked entity. + + Args: + raw_data: Raw Ultimate Track data DataFrame with columns: + frame, id, x, y, vx, vy, ax, ay, class, holder, closest + + Returns: + DataFrame: Intermediate data with calculated features including: + v_mag, a_mag, v_angle, a_angle, diff_v_a_angle, diff_v_angle, diff_a_angle + """ + intermediate_data = [] + + # Group by id to track previous angles for each entity + entity_prev_angles = {} + + # Process data frame by frame + for frame in sorted(raw_data["frame"].unique()): + frame_data = raw_data[raw_data["frame"] == frame].copy() + + for _, row in frame_data.iterrows(): + entity_id = row["id"] + entity_key = f"{entity_id}_{row['class']}" + + # Get previous angles for this entity + prev_v_angle = entity_prev_angles.get(f"{entity_key}_v", None) + prev_a_angle = entity_prev_angles.get(f"{entity_key}_a", None) + + # Calculate magnitude and angle features + ( + v_mag, + a_mag, + v_angle, + a_angle, + diff_v_a_angle, + diff_v_angle, + diff_a_angle, + ) = calculate_magnitude_angle_features( + row["vx"], + row["vy"], + row["ax"], + row["ay"], + prev_v_angle, + prev_a_angle, + ) + + # Create intermediate row + intermediate_row = { + "frame": row["frame"], + "id": row["id"], + "x": row["x"], + "y": row["y"], + "vx": row["vx"], + "vy": row["vy"], + "ax": row["ax"], + "ay": row["ay"], + "v_mag": v_mag, + "a_mag": a_mag, + "v_angle": v_angle, + "a_angle": a_angle, + "diff_v_a_angle": diff_v_a_angle, + "diff_v_angle": diff_v_angle, + "diff_a_angle": diff_a_angle, + "class": row["class"], + "holder": row["holder"], + "closest": row["closest"], + } + + intermediate_data.append(intermediate_row) + + # Update previous angles + entity_prev_angles[f"{entity_key}_v"] = v_angle + entity_prev_angles[f"{entity_key}_a"] = a_angle + + return pd.DataFrame(intermediate_data) + + +def calculate_magnitude_angle_features( + vx, vy, ax, ay, prev_v_angle=None, prev_a_angle=None +): + """Calculate magnitude and angle features""" + # Velocity magnitude and angle + v_mag = ( + round(np.sqrt(vx**2 + vy**2), 2) + if not (np.isnan(vx) or np.isnan(vy)) + else np.nan + ) + v_angle = ( + round(np.arctan2(vy, vx), 2) if not (np.isnan(vx) or np.isnan(vy)) else np.nan + ) + + # Acceleration magnitude and angle + a_mag = ( + round(np.sqrt(ax**2 + ay**2), 2) + if not (np.isnan(ax) or np.isnan(ay)) + else np.nan + ) + a_angle = ( + round(np.arctan2(ay, ax), 2) if not (np.isnan(ax) or np.isnan(ay)) else np.nan + ) + + # Angle differences + diff_v_a_angle = np.nan + if not (np.isnan(v_angle) or np.isnan(a_angle)): + diff_v_a_angle = round( + np.arctan2(np.sin(v_angle - a_angle), np.cos(v_angle - a_angle)), 2 + ) + + diff_v_angle = np.nan + if prev_v_angle is not None and not (np.isnan(v_angle) or np.isnan(prev_v_angle)): + diff_v_angle = round( + np.arctan2(np.sin(v_angle - prev_v_angle), np.cos(v_angle - prev_v_angle)), + 2, + ) + + diff_a_angle = np.nan + if prev_a_angle is not None and not (np.isnan(a_angle) or np.isnan(prev_a_angle)): + diff_a_angle = round( + np.arctan2(np.sin(a_angle - prev_a_angle), np.cos(a_angle - prev_a_angle)), + 2, + ) + + return ( + v_mag, + a_mag, + v_angle, + a_angle, + diff_v_a_angle, + diff_v_angle, + diff_a_angle, + ) + + +def convert_to_metrica_format(intermediate_df, tracking_herz): + """ + Convert Ultimate Track intermediate data to Metrica format + + Args: + intermediate_df: DataFrame with intermediate format containing calculated motion features + tracking_herz: Frequency of tracking data (frames per second) + + Returns: + Tuple of (home_df, away_df, events_df): Metrica format DataFrames + - home_df: Home team tracking data with MultiIndex columns + - away_df: Away team tracking data with MultiIndex columns + - events_df: Events data with disc position and holder information + """ + # Create the Metrica DataFrame for events + events_df = create_events_metrica(intermediate_df, tracking_herz) + + # Create the Metrica DataFrame for Home and Away + home_df = create_tracking_metrica(intermediate_df, "Home", tracking_herz) + away_df = create_tracking_metrica(intermediate_df, "Away", tracking_herz) + + # Drop non-data columns + events_df.dropna(subset=["Start Frame"], inplace=True) + home_df.dropna(subset=[("", "", "Frame")], inplace=True) + away_df.dropna(subset=[("", "", "Frame")], inplace=True) + + return home_df, away_df, events_df + + +def create_events_metrica(df, tracking_herz): + """ + Create the Metrica DataFrame for events + + Args: + df (DataFrame): The DataFrame containing the data + tracking_herz (int): Frequency of tracking data (frames per second) + + Returns: + DataFrame: The DataFrame containing the events + """ + # Define the columns of the DataFrame + columns = [ + "Team", + "Type", + "Subtype", + "Period", + "Start Frame", + "Start Time [s]", + "End Frame", + "End Time [s]", + "From", + "To", + "Start X", + "Start Y", + "End X", + "End Y", + ] + + # Get the min and max frame + min_frame = df["frame"].min() + max_frame = df["frame"].max() + + # Get the DataFrame of the disc + disc_df = df[df["class"] == "disc"] + + # Create NaN column + nan_column = pd.Series([np.nan] * (max_frame - min_frame + 1)) + + # Create columns + start_frame = pd.Series(np.arange(min_frame, max_frame + 1)) + start_time = (start_frame / tracking_herz).round(6) + start_x = disc_df["x"].round(2).reset_index(drop=True) + start_y = disc_df["y"].round(2).reset_index(drop=True) + offense_ids = sorted(df.loc[df["class"] == "offense", "id"].unique()) + + # Get holder information + holder_data = df.loc[df["holder"]] + if not holder_data.empty: + to_id = ( + holder_data["id"] + .map(lambda x: offense_ids.index(x) if x in offense_ids else np.nan) + .reset_index(drop=True) + ) + else: + to_id = pd.Series([np.nan] * len(start_frame)) + + # Create the DataFrame for events + events_df = pd.concat( + [ + nan_column, + nan_column, + nan_column, + nan_column, + start_frame, + start_time, + nan_column, + nan_column, + to_id, + nan_column, + start_x, + start_y, + nan_column, + nan_column, + ], + axis=1, + ) + events_df.columns = columns + + return events_df + + +def create_tracking_metrica(df, team, tracking_herz): + """ + Create the Metrica format DataFrame for team tracking data from UFA data + + Args: + df (DataFrame): The UFA intermediate DataFrame containing tracking data + with columns: frame, class, x, y, id, closest + team (str): Team designation ("Home" for offense, "Away" for defense) + tracking_herz (int): Frequency of tracking data (frames per second) + + Returns: + DataFrame: Tracking DataFrame in Metrica format with MultiIndex columns: + - Level 0: "" for general columns, team name for player columns + - Level 1: Player indices for player columns + - Level 2: "Period", "Frame", "Time [s]", player position names, "Disc__" + Contains position data for up to 7 players plus disc position. + """ + # Define the levels of the MultiIndex using config values + player_columns = 7 * 2 # x, y for each player + level_0 = [""] * 3 + [team] * player_columns + [""] * 2 + level_1 = [""] * 3 + [i // 2 for i in range(player_columns)] + [""] * 2 + + # Generate player column names using config + player_names = [] + for i in range(7): + player_names.extend([f"Player{i}", f"Player{i}"]) + + level_2 = ( + [ + "Period", + "Frame", + "Time [s]", + ] + + player_names + + [ + "Disc__", + "Disc__", + ] + ) + + # Create the MultiIndex + multi_columns = pd.MultiIndex.from_arrays([level_0, level_1, level_2]) + + min_frame = df["frame"].min() + max_frame = df["frame"].max() + + nan_column = pd.Series([np.nan] * (max_frame - min_frame + 1)) + + frame = pd.Series(np.arange(min_frame, max_frame + 1)) + time = (frame / tracking_herz).round(6) + + offense_ids = sorted(df.loc[df["class"] == "offense", "id"].unique()) + if team == "Home": + player_ids = offense_ids + else: + # For Away team, use defense players closest to each offense player + player_ids = [] + for offense_id in offense_ids: + closest_defense = ( + df.loc[ + (df["class"] == "offense") & (df["id"] == offense_id), "closest" + ].iloc[0] + if len(df.loc[(df["class"] == "offense") & (df["id"] == offense_id)]) + > 0 + else None + ) + if closest_defense is not None: + player_ids.append(closest_defense) + + positions = [] + for i, player_id in enumerate( + player_ids[:7] + ): # Limit to config-defined player count + if team == "Home": + player_df = df[(df["id"] == player_id) & (df["class"] == "offense")] + else: + player_df = df[(df["id"] == player_id) & (df["class"] == "defense")] + + if not player_df.empty: + x = player_df["x"].round(2).reset_index(drop=True) + y = player_df["y"].round(2).reset_index(drop=True) + else: + x = pd.Series([np.nan] * len(frame)) + y = pd.Series([np.nan] * len(frame)) + + positions.append(x) + positions.append(y) + + # Add remaining player columns if less than 7 players + while len(positions) < 7 * 2: + positions.append(pd.Series([np.nan] * len(frame))) + + disc_x = df.loc[df["class"] == "disc", "x"].round(2).reset_index(drop=True) + disc_y = df.loc[df["class"] == "disc", "y"].round(2).reset_index(drop=True) + positions.append(disc_x) + positions.append(disc_y) + + positions_df = pd.concat(positions, axis=1) + + tracking_df = pd.concat([nan_column, frame, time, positions_df], axis=1) + tracking_df.columns = multi_columns + + return tracking_df