diff --git a/kloppy/_providers/cdf.py b/kloppy/_providers/cdf.py new file mode 100644 index 000000000..e69de29bb diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index e0ab88ce1..caf7c868e 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -116,6 +116,7 @@ class Provider(Enum): DATAFACTORY (Provider): STATSPERFORM (Provider): SPORTVU (Provider): + CDF (Provider): OTHER (Provider): """ @@ -134,6 +135,7 @@ class Provider(Enum): HAWKEYE = "hawkeye" SPORTVU = "sportvu" SIGNALITY = "signality" + CDF = "common_data_format" OTHER = "other" def __str__(self): @@ -1183,6 +1185,45 @@ def pitch_dimensions(self) -> PitchDimensions: pitch_width=None, standardized=False, ) + + +class CDFCoordinateSystem(ProviderCoordinateSystem): + """ + CDFCoordinateSystem coordinate system. + + Uses a pitch with the origin at the center and the y-axis oriented + from bottom to top. The coordinates are in meters. + """ + + @property + def provider(self) -> Provider: + return Provider.CDF + + @property + def origin(self) -> Origin: + return Origin.CENTER + + @property + def vertical_orientation(self) -> VerticalOrientation: + return VerticalOrientation.BOTTOM_TO_TOP + + @property + def pitch_dimensions(self) -> PitchDimensions: + return NormalizedPitchDimensions( + x_dim=Dimension( + -1 * self._pitch_length / 2, self._pitch_length / 2 + ), + y_dim=Dimension( + -1 * self._pitch_width / 2, self._pitch_width / 2 + ), + pitch_length = self._pitch_length, + pitch_width=self._pitch_width, + standardized=False, + ) + + def __init__(self, base_coordinate_system: ProviderCoordinateSystem): + self._pitch_length = base_coordinate_system.pitch_dimensions.pitch_length + self._pitch_width = base_coordinate_system.pitch_dimensions.pitch_width class SignalityCoordinateSystem(ProviderCoordinateSystem): @@ -1390,6 +1431,7 @@ def build_coordinate_system( Provider.HAWKEYE: HawkEyeCoordinateSystem, Provider.SPORTVU: SportVUCoordinateSystem, Provider.SIGNALITY: SignalityCoordinateSystem, + Provider.CDF: CDFCoordinateSystem, } if provider in coordinate_systems: diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py index df60be76f..a34cb7405 100644 --- a/kloppy/domain/models/tracking.py +++ b/kloppy/domain/models/tracking.py @@ -83,6 +83,7 @@ def frame_rate(self): @deprecated( "to_pandas will be removed in the future. Please use to_df instead." ) + def to_pandas( self, record_converter: Optional[Callable[[Frame], Dict]] = None, @@ -118,6 +119,10 @@ def generic_record_converter(frame: Frame): return pd.DataFrame.from_records( map(generic_record_converter, self.records) ) - + + @property + def to_common_data_format(self)->[object]: + + return [] __all__ = ["Frame", "TrackingDataset", "PlayerData"] diff --git a/kloppy/infra/serializers/tracking/cdf/__init__.py b/kloppy/infra/serializers/tracking/cdf/__init__.py new file mode 100644 index 000000000..21944af32 --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/__init__.py @@ -0,0 +1,3 @@ +from kloppy.domain.models.common import CDFCoordinateSystem + +__all__ = ["CDFCoordinateSystem"] \ No newline at end of file diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py new file mode 100644 index 000000000..4c682b4ef --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -0,0 +1,456 @@ +import json +import tempfile +from typing import IO, NamedTuple + +from kloppy.domain import Provider, TrackingDataset, PositionType +from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer + + +class CDFOutputs(NamedTuple): + meta_data: IO[bytes] + tracking_data: list[IO[bytes]] + + +class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): + provider = Provider.CDF + + # to infer the starting formation if not given + @staticmethod + def get_starting_formation(team_players) -> str: + """ + determine the starting formation if not define. + + Args: + team: The team on which we want to infer the formation. + + Returns: + formation: the infered formation. + """ + formation = "" + defender = midfielder = attacker = 0 + for player in team_players: + if player.starting_position.position_group == None: + continue + elif ( + player.starting_position.position_group + == PositionType.Attacker + ): + attacker += 1 + elif ( + player.starting_position.position_group + == PositionType.Midfielder + ): + midfielder += 1 + elif ( + player.starting_position.position_group + == PositionType.Defender + ): + defender += 1 + if defender + midfielder + attacker == 10: + formation = f"{defender}-{midfielder}-{attacker}" + return formation + + def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: + """ + Serialize a TrackingDataset to Common Data Format. + + Args: + dataset: The tracking dataset to serialize + outputs: CDFOutputs containing file handles for metadata and tracking data + + Returns: + bool: True if serialization was successful, False otherwise + """ + + from kloppy.domain import ( + Orientation, + BallState, + ) + + # builded coordinateSystem class. + from kloppy.domain.models.common import CDFCoordinateSystem + + # setting it as coordinate system of the imported data + dataset = dataset.transform( + to_coordinate_system=CDFCoordinateSystem( + dataset.metadata.coordinate_system + ), + to_orientation=Orientation.STATIC_HOME_AWAY, + ) + + ## building Tracking jsonl + # list of different periods within a game define by the cdf + periods = { + 1: "first_half", + 2: "second_half", + 3: "first_half_extratime", + 4: "second_half_extratime", + 5: "shootout", + } + + # container for start and end frame_id + period_start_frame_id = { + period.id: None for period in dataset.metadata.periods + } + period_end_frame_id = { + period.id: None for period in dataset.metadata.periods + } + + # container for start and end normalized frame_id + normalized_period_start_frame_id = { + period.id: None for period in dataset.metadata.periods + } + normalized_period_end_frame_id = { + period.id: None for period in dataset.metadata.periods + } + + # diffence of ids between frame_ids + period_offset = {period.id: 0 for period in dataset.metadata.periods} + + # Get home and away team data + home_team, away_team = dataset.metadata.teams + + # Get the players Id. + home_player_ids, away_player_ids = ( + [player.player_id for player in home_team.players], + [player.player_id for player in away_team.players], + ) + + frame_id = 0 # Use for the cdf_frame_ids.. + for frame in dataset.frames: + frame_data = {} + # Frame ID specified by the CDF + frame_data["frame_id"] = frame_id + # Original frame_id + frame_data["Original_frame_id"] = frame.frame_id + # Timestamp + frame_data["timestamp"] = str( + dataset.metadata.date + frame.timestamp + ) + # Period + frame_data["period"] = periods.get(frame.period.id, "unknownn") + period_id = frame.period.id + # Update the start and end id for this period + if period_start_frame_id[period_id] is None: + period_start_frame_id[period_id] = frame_data[ + "Original_frame_id" + ] + + if ( + period_id > 1 + and period_end_frame_id[period_id - 1] is not None + ): + prev_period_length = ( + period_end_frame_id[period_id - 1] + - period_start_frame_id[period_id - 1] + + 1 + ) + period_offset[period_id] = ( + period_offset[period_id - 1] + prev_period_length + ) + + # Set normalized start frame id + normalized_period_start_frame_id[period_id] = period_offset[ + period_id + ] + + period_end_frame_id[period_id] = frame_data["Original_frame_id"] + + normalized_frame_id = ( + frame_data["Original_frame_id"] + - period_start_frame_id[period_id] + ) + period_offset[period_id] + + # Update normalized end frame id + normalized_period_end_frame_id[period_id] = normalized_frame_id + + # Match ID + frame_data["match"] = {"id": str(dataset.metadata.game_id)} + # Ball status + frame_data["ball_status"] = frame.ball_state == BallState.ALIVE + + # Teams and players + home_players = [] + for player, coordinates in frame.players_coordinates.items(): + if player.player_id in home_player_ids: + try: + x = coordinates.x + y = coordinates.x + home_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + "position": player.starting_position.code, + } + ) + except KeyError: + continue + + away_players = [] + for player, coordinates in frame.players_coordinates.items(): + if player.player_id in away_player_ids: + try: + x = coordinates.x + y = coordinates.x + away_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + "position": player.starting_position.code, + } + ) + except KeyError: + continue + + # teams within the tracking data. + + home_players_id = [] + away_players_id = [] + for player, _ in frame.players_coordinates.items(): + if player.team == home_team: + home_players_id.append(player.player_id) + if player.team == away_team: + away_players_id.append(player.player_id) + set_of_home_players_id_in_the_frame = set(home_players_id) + set_of_away_players_id_in_the_frame = set(away_players_id) + + frame_data["teams"] = { + "home": { + "id": home_team.team_id, + "players": home_players, + "jersey_color": " ", # + "name": home_team.name, + "formation": ( + home_team.formations.at_start() + if home_team.formations.items + else self.get_starting_formation( + [ + p + for p in home_team.players + if p.player_id + in set_of_home_players_id_in_the_frame + ] + ) + ), + }, + "away": { + "id": away_team.team_id, + "players": away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": ( + away_team.formations.at_start() + if away_team.formations.items + else self.get_starting_formation( + [ + p + for p in away_team.players + if p.player_id + in set_of_away_players_id_in_the_frame + ] + ) + ), + }, + } + + # Ball + if ( + frame_data["ball_status"] == True + and frame.ball_coordinates is not None + ): + try: + ball_x = round(frame.ball_coordinates.x, 3) + ball_y = round(frame.ball_coordinates.y, 3) + ball_z = round(frame.ball_coordinates.z, 3) + except KeyError: + ball_x = ball_y = ball_z = None + else: + ball_x = ( + ball_y + ) = ball_z = 404 # default missing value for ball coordinates + + frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} + + # update the frame_id + frame_id += 1 + + # build a temporary jsonl for each frame + frame_file = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) + frame_file.write((json.dumps(frame_data) + "\n").encode("utf-8")) + frame_file.flush() # make sure data is written + + # Add to tracking list + outputs.tracking_data.append(frame_file) + + ###################### build now the metadata. + # Output containers + metadata_json = {} + # Competition infos. + metadata_json["competition"] = { + "id": "MISSING_MANDATORY_COMPETITION_ID", + "name": "", + "format": "", + "age_restriction": "", + "type": "", + } + + # season infos. + metadata_json["season"] = { + "id": "MISSING_MANDATORY_SEASON_ID", + "name": "", + } + + # match infos. + periods_info = [] + for period in dataset.metadata.periods: + curent_period = { + "period": periods[period.id], + "play_direction": "left_right", + "start_time": str( + dataset.metadata.date + period.start_timestamp + ), + "end_time": str(dataset.metadata.date + period.end_timestamp), + "start_frame_id": normalized_period_start_frame_id[period.id], + "end_frame_id": normalized_period_end_frame_id[period.id], + "left_team_id": home_team.team_id, + "right_team_id": away_team.team_id, + } + periods_info.append(curent_period) + + ## building team_players for metadata + meta_home_players = [] + starters_ids = [] + for player, coordinates in dataset[0].players_coordinates.items(): + starters_ids.append(player.player_id) + + for player in home_team.players: + try: + meta_home_players.append( + { + "id": player.player_id, + "team_id": home_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + meta_away_players = [] + for player in away_team.players: + try: + meta_away_players.append( + { + "id": player.player_id, + "team_id": away_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + # get whistles related to period directly from them. + whistles = [] + for period in periods_info: + whistle_start = {} + whistle_end = {} + # type + whistle_start["type"] = period["period"] + whistle_end["type"] = period["period"] + # sub_type + whistle_start["sub_type"] = "start" + whistle_end["sub_type"] = "end" + # time + whistle_start["time"] = period["start_time"] + whistle_end["time"] = period["end_time"] + whistles.append(whistle_start) + whistles.append(whistle_end) + + metadata_json["match"] = { + "id": str(dataset.metadata.game_id), + "kickoff_time": str( + dataset.metadata.date + + dataset.metadata.periods[0].start_timestamp + ), + "periods": periods_info, + "whistles": whistles, + "round": "", + "scheduled_kickoff_time": str(dataset.metadata.date), + "local_kickoff_time": "", + "misc": { + "country": "", + "city": "", + "percipitation": 0, + "is_open_roof": True, # Asume as default value + }, + } + + home_players_id_in_meta = [] + away_players_id_in_meta = [] + for player, _ in dataset[0].players_coordinates.items(): + if player.team == home_team: + home_players_id_in_meta.append(player.player_id) + if player.team == away_team: + away_players_id_in_meta.append(player.player_id) + meta_set_of_home_players_id_in_the_frame = set(home_players_id_in_meta) + meta_set_of_away_players_id_in_the_frame = set(away_players_id_in_meta) + + metadata_json["teams"] = { + "home": { + "id": home_team.team_id, + "players": meta_home_players, + "jersey_color": " ", + "name": home_team.name, + "formation": home_team.starting_formation + or self.get_starting_formation( + [ + p + for p in home_team.players + if p.player_id + in meta_set_of_home_players_id_in_the_frame + ] + ), + }, + "away": { + "id": away_team.team_id, + "players": meta_away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": away_team.starting_formation + or self.get_starting_formation( + [ + p + for p in away_team.players + if p.player_id + in meta_set_of_away_players_id_in_the_frame + ] + ), + }, + } + + metadata_json["stadium"] = { + "id": "MISSING_MANDATORY_STADIUM_ID", + "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, + "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, + "name": "", + "turf": "", + } + + metadata_json["meta"] = { + "video": None, + "tracking": None, + "landmarks": None, + "meta": None, + "cdf": None, + } + + outputs.meta_data.write( + (json.dumps(metadata_json) + "\n").encode("utf-8") + ) + + return True diff --git a/kloppy/infra/serializers/tracking/serializer.py b/kloppy/infra/serializers/tracking/serializer.py new file mode 100644 index 000000000..a7bc72e4c --- /dev/null +++ b/kloppy/infra/serializers/tracking/serializer.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod +from typing import Generic, TypeVar + +from kloppy.domain import Provider, TrackingDataset + +T = TypeVar("T") + + +class TrackingDataSerializer(ABC, Generic[T]): + @property + @abstractmethod + def provider(self) -> Provider: + raise NotImplementedError + + @abstractmethod + def serialize(self, dataset: TrackingDataset, outputs: T) -> bool: + raise NotImplementedError diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py new file mode 100644 index 000000000..ea7525656 --- /dev/null +++ b/kloppy/tests/test_cdf.py @@ -0,0 +1,66 @@ +import tempfile +from pathlib import Path + +import pytest +import cdf +from cdf import VERSION + +from kloppy import sportec +from kloppy.domain import TrackingDataset +from kloppy.infra.serializers.tracking.cdf.serializer import ( + CDFTrackingDataSerializer, + CDFOutputs, +) + +class TestCDFSerializer: + @pytest.fixture + def raw_data(self, base_dir) -> Path: + return base_dir / "files/sportec_positional.xml" + + @pytest.fixture + def meta_data(self, base_dir) -> Path: + return base_dir / "files/sportec_meta.xml" + + @pytest.fixture + def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: + """Load a small Sportec tracking data snippet for testing CDF serialization.""" + return sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + limit=None, + only_alive=False, + ) + + def test_produces_valid_cdf_output(self, dataset): + """Test that CDFTrackingDataSerializer produces valid CDF output.""" + serializer = CDFTrackingDataSerializer() + + # Instantiate Validators + meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/v{cdf.VERSION}/schema/meta.json") + tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/v{cdf.VERSION}/schema/tracking.json") + + with tempfile.NamedTemporaryFile(mode="w+b", suffix=".json", delete=False) as meta_file: + # Initialize empty list for tracking files + tracking_files: list[tempfile._TemporaryFileWrapper] = [] + # Instantiate the named tuple for outputs + outputs = CDFOutputs( + meta_data=meta_file, + tracking_data=tracking_files + ) + # Serialize the dataset + success = serializer.serialize(dataset, outputs) + assert success is True + # Save paths for validation after leaving the block + meta_path = meta_file.name + tracking_paths = [f.name for f in outputs.tracking_data] + + # Validate metadata + meta_validator.validate_schema(sample=meta_path) + # Validate all tracking frame files + for path in tracking_paths: + tracking_validator.validate_schema(sample=path) + + Path(meta_path).unlink() + for path in tracking_paths: + Path(path).unlink() \ No newline at end of file diff --git a/kloppy/tests/test_it_works.py b/kloppy/tests/test_it_works.py new file mode 100644 index 000000000..17e0d3f23 --- /dev/null +++ b/kloppy/tests/test_it_works.py @@ -0,0 +1,5 @@ +def it_works(): + return "it works" + +def test_it_works(): + assert it_works() == "it works" \ No newline at end of file diff --git a/setup.py b/setup.py index c8d7757cd..7b74060c1 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ def setup_package(): "flask", "flask-cors", "pytest-httpserver", + "common-data-format-validator @ git+https://github.com/koenvo/common-data-format-validator.git@bugfix/packaging", ], "development": ["pre-commit==2.6.0"], "query": ["networkx>=2.4,<3"],