From 4749a78c13ef2b1c664b4a158bbc276e153ef73b Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 25 May 2025 15:22:58 -0300 Subject: [PATCH 1/3] feat: add load_event function for deserializing pff event data --- kloppy/_providers/pff.py | 35 ++++ .../infra/serializers/event/pff/__init__.py | 6 + .../serializers/event/pff/deserializer.py | 153 ++++++++++++++++++ kloppy/pff.py | 4 +- main.py | 12 ++ 5 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 kloppy/infra/serializers/event/pff/__init__.py create mode 100644 kloppy/infra/serializers/event/pff/deserializer.py create mode 100644 main.py diff --git a/kloppy/_providers/pff.py b/kloppy/_providers/pff.py index 71c2fbe6e..ca6aebe3e 100644 --- a/kloppy/_providers/pff.py +++ b/kloppy/_providers/pff.py @@ -3,8 +3,43 @@ PFF_TrackingDeserializer, PFF_TrackingInputs, ) +from kloppy.domain import EventDataset from kloppy.io import FileLike, open_as_file +from kloppy.infra.serializers.event.pff import ( + PFFEventDeserializer, + PFFEventDataInput +) + +def load_event( + event_data: FileLike, + meta_data: FileLike, + roster_data: FileLike, + coordinates: Optional[str] = None, +) -> EventDataset: + """ + Load and deserialize event data from the provided event data, lineup data, and optional three-sixty data files. + Args: + event_data (FileLike): A file-like object containing the event data. + meta_data (FileLike): A file-like object containing metadata about the tracking data. + roster_data (FileLike): A file-like object containing roster metadata, such as player details. + coordinates (Optional[str], optional): The coordinate system to use for the tracking data (e.g., "pff"). Defaults to None. + Returns: + EventDataset: A deserialized EventDataset object containing the processed event data. + """ + deserializer = PFFEventDeserializer( + coordinate_system=coordinates + ) + with open_as_file(event_data) as event_data_fp, open_as_file( + roster_data + ) as roster_data_fp, open_as_file(meta_data) as meta_data_fp: + return deserializer.deserialize( + inputs=PFFEventDataInput( + event_data=event_data_fp, + meta_data=meta_data_fp, + roster_data=roster_data_fp, + ) + ) def load_tracking( meta_data: FileLike, diff --git a/kloppy/infra/serializers/event/pff/__init__.py b/kloppy/infra/serializers/event/pff/__init__.py new file mode 100644 index 000000000..effb76051 --- /dev/null +++ b/kloppy/infra/serializers/event/pff/__init__.py @@ -0,0 +1,6 @@ +from .deserializer import PFFEventDeserializer, PFFEventDataInput + +__all__ = [ + "PFFEventDeserializer", + "PFFEventDataInput", +] \ No newline at end of file diff --git a/kloppy/infra/serializers/event/pff/deserializer.py b/kloppy/infra/serializers/event/pff/deserializer.py new file mode 100644 index 000000000..1b7db13b7 --- /dev/null +++ b/kloppy/infra/serializers/event/pff/deserializer.py @@ -0,0 +1,153 @@ +import json + +from typing import Any, Dict, List, NamedTuple, IO, Optional, Tuple, Union + +from kloppy.domain import Provider, EventDataset, Metadata, Team, Player +from kloppy.infra.serializers.event.deserializer import EventDataDeserializer +from kloppy.exceptions import DeserializationError + + +class PFFEventDataInput(NamedTuple): + """ + Input data for PFF event deserialization. + """ + + event_data: IO[bytes] + meta_data: IO[bytes] + roster_data: IO[bytes] + + +class PFFEventDeserializer(EventDataDeserializer[PFFEventDataInput]): + """ + Deserialize PFF events. + """ + + def __init__(self, coordinate_system: Optional[Union[str, Provider]] = None): + super().__init__( + coordinate_system=coordinate_system, + ) + + def load_data( + self, inputs: PFFEventDataInput + ) -> tuple[IO[bytes], IO[bytes], IO[bytes]]: + """ + Load data from the input files. + """ + return ( + json.load(inputs.event_data), + json.load(inputs.meta_data), + json.load(inputs.roster_data), + ) + + def get_match_information(self, metadata: List) -> Dict[str, Any]: + """ + Get metadata from the input files. + """ + return { + "home_team": metadata["homeTeam"], + "away_team": metadata["awayTeam"], + "stadium": metadata["stadium"], + "game_week": metadata["week"], + } + + def get_pitch_information( + self, stadium_metadata: Dict[str, Any] + ) -> Tuple[float, float]: + """ + Get pitch information from the metadata. + """ + + pitches: Dict[str, Any] = stadium_metadata["pitches"].pop() + pitch_size_length = pitches["length"] + pitch_size_width = pitches["width"] + + return pitch_size_width, pitch_size_length + + + def build_player(self, player: Dict[str, Any], team: Team) -> Player: + + player = Player( + player_id=player["player"]["id"], + team=team, + name=player["player"]["nickname"], + jersey_no=int(player["shirtNumber"]), + starting=player["started"], + starting_position=player["positionGroupType"], + ) + + return player + + def build_squad(self, rooster_data: Dict[str, Any], team: Team) -> List[Player]: + team_id = team.team_id + + players: List[Player] = [ + self.build_player(player_data, team_id) + for player_data in rooster_data + if player_data["team"]["id"] == team_id + ] + return players + + def build_team(self, team_data: Dict[str, Any], rooster_data: Dict[str, Any], ground_type: str) -> Team: + + team_id = team_data["id"] + + team = Team( + team_id=team_id, + name=team_data["name"], + ground=ground_type, + ) + + team.players = self.build_squad(rooster_data, team) + + return team + + # def get_metadata_information(self) -> Metadata: + + # metadata = Metadata( + # teams=teams, + # periods=periods, + # pitch_dimensions=self.transformer.get_to_coordinate_system().pitch_dimensions, + # frame_rate=None, + # orientation=Orientation.ACTION_EXECUTING_TEAM, + # flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, + # score=None, + # provider=Provider.STATSBOMB, + # coordinate_system=self.transformer.get_to_coordinate_system(), + # **additional_metadata, + # ) + + # return metadata + + @property + def provider(self) -> Provider: + return Provider.PFF + + def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: + """ + Deserialize the PFF event. + """ + try: + raw_events, meta_data, roster_data = self.load_data(inputs) + + metadata_information = self.get_match_information(meta_data.pop()) + + pitch_size_width, pitch_size_length = self.get_pitch_information( + metadata_information["stadium"] + ) + + self.transformer = self.get_transformer( + pitch_length=pitch_size_length, + pitch_width=pitch_size_width, + provider=self.provider, + ) + + home_team = self.build_team( + team_data=metadata_information["home_team"], + rooster_data=roster_data, + ground_type="home", + ) + + except Exception as e: + raise DeserializationError( + "Failed to create transformer for PFF event data" + ) from e diff --git a/kloppy/pff.py b/kloppy/pff.py index a0ce8a58c..77287fc40 100644 --- a/kloppy/pff.py +++ b/kloppy/pff.py @@ -1,5 +1,5 @@ """Functions for loading PFF FC data.""" -from ._providers.pff import load_tracking +from ._providers.pff import load_tracking, load_event -__all__ = ["load_tracking"] +__all__ = ["load_tracking", "load_event"] diff --git a/main.py b/main.py new file mode 100644 index 000000000..1a5bec3a7 --- /dev/null +++ b/main.py @@ -0,0 +1,12 @@ +from kloppy import pff + +def main(): + dataset = pff.load_event( + event_data="/home/jupiter/ufmg/thesis/gandula-expected-goals/data/01_raw/event_data/3812.json", + meta_data="/home/jupiter/ufmg/thesis/gandula-expected-goals/data/01_raw/metadata/3812.json", + roster_data="/home/jupiter/ufmg/thesis/gandula-expected-goals/data/01_raw/rosters/3812.json", + coordinates="pff" + ) + +if __name__ == "__main__": + main() \ No newline at end of file From 58cdc7df0aa110a60c4c4254c2ca8f2c0ffa4816 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 25 May 2025 16:00:43 -0300 Subject: [PATCH 2/3] feat(PFFEventDeserializer): enhance metadata handling and add period data extraction --- .../serializers/event/pff/deserializer.py | 139 ++++++++++++++---- 1 file changed, 110 insertions(+), 29 deletions(-) diff --git a/kloppy/infra/serializers/event/pff/deserializer.py b/kloppy/infra/serializers/event/pff/deserializer.py index 1b7db13b7..716b44f19 100644 --- a/kloppy/infra/serializers/event/pff/deserializer.py +++ b/kloppy/infra/serializers/event/pff/deserializer.py @@ -2,7 +2,15 @@ from typing import Any, Dict, List, NamedTuple, IO, Optional, Tuple, Union -from kloppy.domain import Provider, EventDataset, Metadata, Team, Player +from kloppy.domain import ( + Provider, + EventDataset, + Metadata, + Team, + Player, + DatasetFlag, + Period, +) from kloppy.infra.serializers.event.deserializer import EventDataDeserializer from kloppy.exceptions import DeserializationError @@ -27,6 +35,10 @@ def __init__(self, coordinate_system: Optional[Union[str, Provider]] = None): coordinate_system=coordinate_system, ) + @property + def provider(self) -> Provider: + return Provider.PFF + def load_data( self, inputs: PFFEventDataInput ) -> tuple[IO[bytes], IO[bytes], IO[bytes]]: @@ -39,7 +51,7 @@ def load_data( json.load(inputs.roster_data), ) - def get_match_information(self, metadata: List) -> Dict[str, Any]: + def get_match_information(self, metadata: Dict[str, Any]) -> Dict[str, Any]: """ Get metadata from the input files. """ @@ -48,6 +60,8 @@ def get_match_information(self, metadata: List) -> Dict[str, Any]: "away_team": metadata["awayTeam"], "stadium": metadata["stadium"], "game_week": metadata["week"], + "game_id": metadata["id"], + "game_date": metadata["date"], } def get_pitch_information( @@ -63,9 +77,7 @@ def get_pitch_information( return pitch_size_width, pitch_size_length - def build_player(self, player: Dict[str, Any], team: Team) -> Player: - player = Player( player_id=player["player"]["id"], team=team, @@ -76,7 +88,7 @@ def build_player(self, player: Dict[str, Any], team: Team) -> Player: ) return player - + def build_squad(self, rooster_data: Dict[str, Any], team: Team) -> List[Player]: team_id = team.team_id @@ -87,8 +99,9 @@ def build_squad(self, rooster_data: Dict[str, Any], team: Team) -> List[Player]: ] return players - def build_team(self, team_data: Dict[str, Any], rooster_data: Dict[str, Any], ground_type: str) -> Team: - + def build_team( + self, team_data: Dict[str, Any], rooster_data: Dict[str, Any], ground_type: str + ) -> Team: team_id = team_data["id"] team = Team( @@ -101,26 +114,74 @@ def build_team(self, team_data: Dict[str, Any], rooster_data: Dict[str, Any], gr return team - # def get_metadata_information(self) -> Metadata: + def get_orientation(self, metadata: Dict[str, Any]) -> str: + """ + Get the orientation of the event data. + """ - # metadata = Metadata( - # teams=teams, - # periods=periods, - # pitch_dimensions=self.transformer.get_to_coordinate_system().pitch_dimensions, - # frame_rate=None, - # orientation=Orientation.ACTION_EXECUTING_TEAM, - # flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, - # score=None, - # provider=Provider.STATSBOMB, - # coordinate_system=self.transformer.get_to_coordinate_system(), - # **additional_metadata, - # ) + is_home_team_left = metadata["homeTeamStartLeft"] + + orientation = "home-away" if is_home_team_left else "away-home" + + return orientation + + def get_metadata_information( + self, + match_information: Dict[str, Any], + teams: List[Team], + orientation: str, + periods: str, + ) -> Metadata: + additional_metadata = {} + + metadata = Metadata( + game_id=match_information["game_id"], + game_week=match_information["game_week"], + date=match_information["game_date"], + teams=teams, + pitch_dimensions=self.transformer.get_to_coordinate_system().pitch_dimensions, + frame_rate=None, + orientation=orientation, + flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, + score=None, + provider=self.provider, + coordinate_system=self.transformer.get_to_coordinate_system(), + periods=periods, + **additional_metadata, + ) - # return metadata + return metadata - @property - def provider(self) -> Provider: - return Provider.PFF + def get_period_data(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """ + Get the period data from the metadata. + """ + + period_data = { + "first_period": { + "id": 1, + "start_timestamp": metadata["startPeriod1"], + "end_timestamp": metadata["endPeriod1"], + }, + "second_period": { + "id": 2, + "start_timestamp": metadata["startPeriod2"], + "end_timestamp": metadata["endPeriod2"], + }, + } + + return period_data + + def build_periods(self, metadata: Dict[str, Any]) -> List[Period]: + """ + Get the periods of the event data. + """ + + period_data = self.get_period_data(metadata) + + periods = [Period(**data) for _, data in period_data.items()] + + return periods def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: """ @@ -129,7 +190,9 @@ def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: try: raw_events, meta_data, roster_data = self.load_data(inputs) - metadata_information = self.get_match_information(meta_data.pop()) + actual_meta_data = meta_data.pop() + + metadata_information = self.get_match_information(actual_meta_data) pitch_size_width, pitch_size_length = self.get_pitch_information( metadata_information["stadium"] @@ -141,12 +204,30 @@ def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: provider=self.provider, ) - home_team = self.build_team( - team_data=metadata_information["home_team"], - rooster_data=roster_data, - ground_type="home", + teams = [ + self.build_team( + team_data=metadata_information["home_team"], + rooster_data=roster_data, + ground_type="home" if team == "home_team" else "away", + ) + for team in ["home_team", "away_team"] + ] + + orientation = self.get_orientation(actual_meta_data) + + periods = self.build_periods( + metadata=actual_meta_data, ) + metadata = self.get_metadata_information( + match_information=metadata_information, + teams=teams, + orientation=orientation, + periods=periods, + ) + + return EventDataset(events=None, metadata=metadata) + except Exception as e: raise DeserializationError( "Failed to create transformer for PFF event data" From f96696f0a3431e1a11d6be5faa43b05d2a0133c4 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 25 May 2025 20:13:51 -0300 Subject: [PATCH 3/3] feat(PFFParser): add parser for pass events in PFF data --- .../serializers/event/pff/deserializer.py | 145 +++++++++++++++++- kloppy/infra/serializers/event/pff/parser.py | 17 ++ 2 files changed, 154 insertions(+), 8 deletions(-) create mode 100644 kloppy/infra/serializers/event/pff/parser.py diff --git a/kloppy/infra/serializers/event/pff/deserializer.py b/kloppy/infra/serializers/event/pff/deserializer.py index 716b44f19..c514f76cf 100644 --- a/kloppy/infra/serializers/event/pff/deserializer.py +++ b/kloppy/infra/serializers/event/pff/deserializer.py @@ -5,11 +5,14 @@ from kloppy.domain import ( Provider, EventDataset, + EventFactory, Metadata, Team, Player, DatasetFlag, Period, + Event, + Point, ) from kloppy.infra.serializers.event.deserializer import EventDataDeserializer from kloppy.exceptions import DeserializationError @@ -183,6 +186,121 @@ def build_periods(self, metadata: Dict[str, Any]) -> List[Period]: return periods + def _extract_coordinates( + self, event: Dict[str, Any], player_id: str, team_id: str, home_team_id: str + ) -> Point: + """ + Extract the coordinates from the event data. + """ + + player_list_index = "homePlayers" if team_id == home_team_id else "awayPlayers" + + player_list = event[player_list_index] + + coordinates_list = [ + { + "x": player["x"], + "y": player["y"], + } + for player in player_list + if player["playerId"] == player_id + ] + + if coordinates_list: + coordinates = coordinates_list.pop() + return Point(x=coordinates["x"], y=coordinates["y"]) + return Point(x=None, y=None) + + def _extract_possession_team( + self, + team_id: str, + home_team: Team, + away_team: Team, + ) -> Team: + return home_team if home_team.team_id == team_id else away_team + + def _extract_ball_state(self, event: Dict[str, Any]) -> str: + return "dead" if event["gameEvents"]["gameEventType"] == "OUT" else "alive" + + def _extract_player(self, player_id: str, team: Team) -> Player: + player_list = [ + player for player in team.players if player.player_id == player_id + ] + return player_list.pop() if player_list else None + + def build_generic_event_kwargs( + self, + event: Dict[str, Any], + home_team: Team, + away_team: Team, + ) -> Dict[str, Any]: + team_id = str(event["gameEvents"]["teamId"]) + team = home_team if team_id == home_team.team_id else away_team + player_id = event["gameEvents"]["playerId"] + coordinates = self._extract_coordinates( + event=event, + player_id=player_id, + team_id=team_id, + home_team_id=home_team.team_id, + ) + + return { + "period": event["gameEvents"]["period"], + "timestamp": event["possessionEvents"]["gameClock"], + "ball_owning_team": self._extract_possession_team( + team_id=event, + home_team=home_team, + away_team=away_team, + ), + "ball_state": self._extract_ball_state(event=event), + "event_id": event["gameEventId"], + "team": team, + "player": self._extract_player(str(player_id), team), + "coordinates": coordinates, + "raw_event": event, + } + + def transform_event( + self, + event: Dict[str, Any], + home_team: Team, + away_team: Team, + ) -> Event: + """ + Transform the event data to the desired format. + """ + + generic_event_kwargs = self.build_generic_event_kwargs( + event=event, + home_team=home_team, + away_team=away_team, + ) + + return generic_event_kwargs + + def build_events( + self, + raw_events: List[Dict[str, Any]], + home_team: Team, + away_team: Team, + ) -> List[Dict[str, Any]]: + """ + Build the events from the raw event data. + """ + + event_factory = EventFactory() + + events = [ + self.transform_event( + event=event, + home_team=home_team, + away_team=away_team, + ) + for event in raw_events + ] + + return events + def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: """ Deserialize the PFF event. @@ -204,14 +322,19 @@ def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: provider=self.provider, ) - teams = [ - self.build_team( - team_data=metadata_information["home_team"], - rooster_data=roster_data, - ground_type="home" if team == "home_team" else "away", - ) - for team in ["home_team", "away_team"] - ] + home_team = self.build_team( + team_data=metadata_information["home_team"], + rooster_data=roster_data, + ground_type="home", + ) + + away_team = self.build_team( + team_data=metadata_information["away_team"], + rooster_data=roster_data, + ground_type="away", + ) + + teams = [home_team, away_team] orientation = self.get_orientation(actual_meta_data) @@ -226,6 +349,12 @@ def deserialize(self, inputs: PFFEventDataInput) -> EventDataset: periods=periods, ) + events = self.build_events( + raw_events=raw_events, + home_team=home_team, + away_team=away_team, + ) + return EventDataset(events=None, metadata=metadata) except Exception as e: diff --git a/kloppy/infra/serializers/event/pff/parser.py b/kloppy/infra/serializers/event/pff/parser.py new file mode 100644 index 000000000..6f8280b13 --- /dev/null +++ b/kloppy/infra/serializers/event/pff/parser.py @@ -0,0 +1,17 @@ +from typing import Any, Dict + +class PFFParser: + + def _parse_pass(self, event: Dict[str, Any]) -> Dict[str, Any]: + """ + Parse a pass event from the PFF data. + """ + return { + "type": "pass", + "player_id": event.get("player_id"), + "start_location": event.get("start_location"), + "end_location": event.get("end_location"), + "outcome": event.get("outcome"), + "distance": event.get("distance"), + "angle": event.get("angle"), + } \ No newline at end of file