From ff86817d74c0158069354af9e01ca2a4e48540a8 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 09:40:12 +0200 Subject: [PATCH 01/17] to_df(orient=row) --- kloppy/domain/models/common.py | 207 ++++++++++++------ .../domain/services/transformers/attribute.py | 101 +++++++++ .../services/transformers/data_record.py | 21 +- kloppy/tests/test_sportec.py | 31 +++ 4 files changed, 296 insertions(+), 64 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index e0ab88ce1..9888b49cd 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -680,12 +680,16 @@ def to_mplsoccer(self): dim = BaseDims( left=self.pitch_dimensions.x_dim.min, right=self.pitch_dimensions.x_dim.max, - bottom=self.pitch_dimensions.y_dim.min - if not invert_y - else self.pitch_dimensions.y_dim.max, - top=self.pitch_dimensions.y_dim.max - if not invert_y - else self.pitch_dimensions.y_dim.min, + bottom=( + self.pitch_dimensions.y_dim.min + if not invert_y + else self.pitch_dimensions.y_dim.max + ), + top=( + self.pitch_dimensions.y_dim.max + if not invert_y + else self.pitch_dimensions.y_dim.min + ), width=self.pitch_dimensions.x_dim.max - self.pitch_dimensions.x_dim.min, length=self.pitch_dimensions.y_dim.max @@ -734,14 +738,16 @@ def to_mplsoccer(self): - self.pitch_dimensions.x_dim.min ), pad_multiplier=1, - aspect_equal=False - if self.pitch_dimensions.unit == Unit.NORMED - else True, + aspect_equal=( + False if self.pitch_dimensions.unit == Unit.NORMED else True + ), pitch_width=pitch_width, pitch_length=pitch_length, - aspect=pitch_width / pitch_length - if self.pitch_dimensions.unit == Unit.NORMED - else 1.0, + aspect=( + pitch_width / pitch_length + if self.pitch_dimensions.unit == Unit.NORMED + else 1.0 + ), ) return dim @@ -1798,8 +1804,7 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[True] = True, **named_columns: NamedColumns, - ) -> List[Dict[str, Any]]: - ... + ) -> List[Dict[str, Any]]: ... @overload def to_records( @@ -1807,8 +1812,7 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[False] = False, **named_columns: NamedColumns, - ) -> Iterable[Dict[str, Any]]: - ... + ) -> Iterable[Dict[str, Any]]: ... def to_records( self, @@ -1853,6 +1857,28 @@ def to_dict( f"Orient {orient} is not supported. Only orient='list' is supported" ) + def to_dict_rowwise( + self, + *columns: Unpack[tuple[Column]], + **named_columns: NamedColumns, + ) -> List[Dict[str, Any]]: + if self.dataset_type != DatasetType.TRACKING: + raise KloppyParameterError( + f"Row-wise format is only supported for tracking datasets, " + f"got {self.dataset_type}" + ) + + from ..services.transformers.data_record import RowWiseFrameTransformer + + transformer = RowWiseFrameTransformer(*columns, **named_columns) + + all_rows = [] + for record in self.records: + rows = transformer(record) + all_rows.extend(rows) + + return all_rows + def to_df( self, *columns: Unpack[tuple[Column]], @@ -1863,6 +1889,7 @@ def to_df( Literal["pandas[pyarrow]"], ] ] = None, + orient: Literal["column", "row"] = "column", **named_columns: NamedColumns, ): from kloppy.config import get_config @@ -1870,61 +1897,119 @@ def to_df( if not engine: engine = get_config("dataframe.engine") - if engine == "pandas[pyarrow]": - try: - import pandas as pd + if orient == "column": + # Original column-wise behavior using to_dict + if engine == "pandas[pyarrow]": + try: + import pandas as pd - types_mapper = pd.ArrowDtype - except ImportError: - raise ImportError( - "Seems like you don't have pandas installed. Please" - " install it using: pip install pandas" - ) - except AttributeError: - raise AttributeError( - "Seems like you have an older version of pandas installed. Please" - " upgrade to at least 1.5 using: pip install pandas>=1.5" - ) + types_mapper = pd.ArrowDtype + except ImportError: + raise ImportError( + "Seems like you don't have pandas installed. Please" + " install it using: pip install pandas" + ) + except AttributeError: + raise AttributeError( + "Seems like you have an older version of pandas installed. Please" + " upgrade to at least 1.5 using: pip install pandas>=1.5" + ) - try: - import pyarrow as pa - except ImportError: - raise ImportError( - "Seems like you don't have pyarrow installed. Please" - " install it using: pip install pyarrow" + try: + import pyarrow as pa + except ImportError: + raise ImportError( + "Seems like you don't have pyarrow installed. Please" + " install it using: pip install pyarrow" + ) + + table = pa.Table.from_pydict( + self.to_dict(*columns, orient="list", **named_columns) ) + return table.to_pandas(types_mapper=types_mapper) - table = pa.Table.from_pydict( - self.to_dict(*columns, orient="list", **named_columns) - ) - return table.to_pandas(types_mapper=types_mapper) + elif engine == "pandas": + try: + from pandas import DataFrame + except ImportError: + raise ImportError( + "Seems like you don't have pandas installed. Please" + " install it using: pip install pandas" + ) - elif engine == "pandas": - try: - from pandas import DataFrame - except ImportError: - raise ImportError( - "Seems like you don't have pandas installed. Please" - " install it using: pip install pandas" + return DataFrame.from_dict( + self.to_dict(*columns, orient="list", **named_columns) ) + elif engine == "polars": + try: + from polars import from_dict + except ImportError: + raise ImportError( + "Seems like you don't have polars installed. Please" + " install it using: pip install polars" + ) - return DataFrame.from_dict( - self.to_dict(*columns, orient="list", **named_columns) - ) - elif engine == "polars": - try: - from polars import from_dict - except ImportError: - raise ImportError( - "Seems like you don't have polars installed. Please" - " install it using: pip install polars" + return from_dict( + self.to_dict(*columns, orient="list", **named_columns) ) + else: + raise KloppyParameterError(f"Engine {engine} is not valid") + + elif orient == "row": + # Row-wise behavior using to_dict_rowwise + all_rows = self.to_dict_rowwise(*columns, **named_columns) + + if engine == "pandas[pyarrow]": + try: + import pandas as pd + import pyarrow as pa + + types_mapper = pd.ArrowDtype + except ImportError: + raise ImportError( + "Seems like you don't have pandas and pyarrow installed. Please" + " install them using: pip install pandas pyarrow" + ) + + # Convert list of dicts to dict of lists for pyarrow + if all_rows: + keys = all_rows[0].keys() + data_dict = { + key: [row.get(key) for row in all_rows] for key in keys + } + table = pa.Table.from_pydict(data_dict) + return table.to_pandas(types_mapper=types_mapper) + else: + return pd.DataFrame() + + elif engine == "pandas": + try: + from pandas import DataFrame + except ImportError: + raise ImportError( + "Seems like you don't have pandas installed. Please" + " install it using: pip install pandas" + ) + + return DataFrame(all_rows) + + elif engine == "polars": + try: + from polars import DataFrame + except ImportError: + raise ImportError( + "Seems like you don't have polars installed. Please" + " install it using: pip install polars" + ) + + return DataFrame(all_rows) + else: + raise KloppyParameterError(f"Engine {engine} is not valid") - return from_dict( - self.to_dict(*columns, orient="list", **named_columns) - ) else: - raise KloppyParameterError(f"Engine {engine} is not valid") + raise KloppyParameterError( + f"Orient '{orient}' is not valid. Must be 'column' or 'row'" + ) def __repr__(self): return f"<{self.__class__.__name__} record_count={len(self.records)}>" diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index 18b99bc14..f6dd66df9 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -363,6 +363,107 @@ def __call__(self, frame: Frame) -> Dict[str, Any]: return row +class RowWiseFrameTransformer: + def __init__( + self, + *include: str, + exclude: Optional[List[str]] = None, + ): + if include and exclude: + raise KloppyParameterError( + "Cannot specify both include as exclude" + ) + + self.exclude = exclude or [] + self.include = include or [] + + def __call__(self, frame: Frame) -> List[Dict[str, Any]]: + rows = [] + + base_data = { + "period_id": frame.period.id if frame.period else None, + "timestamp": frame.timestamp, + "frame_id": frame.frame_id, + "ball_state": frame.ball_state.value if frame.ball_state else None, + "ball_owning_team_id": ( + frame.ball_owning_team.team_id + if frame.ball_owning_team + else None + ), + } + + if frame.other_data: + base_data.update(frame.other_data) + + ball_row = base_data.copy() + ball_row.update( + { + "team_id": "ball", + "player_id": "ball", + "x": ( + frame.ball_coordinates.x + if frame.ball_coordinates + else None + ), + "y": ( + frame.ball_coordinates.y + if frame.ball_coordinates + else None + ), + "z": ( + getattr(frame.ball_coordinates, "z", None) + if frame.ball_coordinates + else None + ), + "d": None, + "s": frame.ball_speed, + } + ) + rows.append(ball_row) + + for i, (player, player_data) in enumerate(frame.players_data.items()): + player_row = base_data.copy() + player_row.update( + { + "team_id": player.team.team_id if player else None, + "player_id": player.player_id if player else None, + "x": ( + player_data.coordinates.x + if player_data.coordinates + else None + ), + "y": ( + player_data.coordinates.y + if player_data.coordinates + else None + ), + "z": ( + getattr(player_data.coordinates, "z", None) + if player_data.coordinates + else None + ), + "d": player_data.distance, + "s": player_data.speed, + } + ) + + if player_data.other_data: + player_row.update(player_data.other_data) + + rows.append(player_row) + + if self.include: + rows = [ + {k: row[k] for k in self.include if k in row} for row in rows + ] + elif self.exclude: + rows = [ + {k: v for k, v in row.items() if k not in self.exclude} + for row in rows + ] + return rows + + class DefaultCodeTransformer: def __init__( self, diff --git a/kloppy/domain/services/transformers/data_record.py b/kloppy/domain/services/transformers/data_record.py index 5685df07a..eff72c778 100644 --- a/kloppy/domain/services/transformers/data_record.py +++ b/kloppy/domain/services/transformers/data_record.py @@ -1,7 +1,17 @@ import sys from abc import ABC, abstractmethod from fnmatch import fnmatch -from typing import Any, Callable, Dict, Generic, Tuple, Type, TypeVar, Union +from typing import ( + Any, + Callable, + Dict, + Generic, + Tuple, + Type, + TypeVar, + Union, + List, +) if sys.version_info >= (3, 11): from typing import Unpack @@ -13,6 +23,7 @@ DefaultCodeTransformer, DefaultEventTransformer, DefaultFrameTransformer, + RowWiseFrameTransformer, ) from kloppy.exceptions import KloppyError @@ -23,8 +34,7 @@ class DataRecordToDictTransformer(ABC, Generic[T]): @abstractmethod - def default_transformer(self) -> Callable[[T], Dict]: - ... + def default_transformer(self) -> Callable[[T], Dict]: ... def __init__( self, @@ -98,6 +108,11 @@ def default_transformer(self) -> Callable[[Code], Dict]: return DefaultCodeTransformer() +class RowWiseFrameToDictTransformer(DataRecordToDictTransformer[Frame]): + def default_transformer(self) -> Callable[[Frame], List[Dict]]: + return RowWiseFrameTransformer() + + def get_transformer_cls( dataset_type: DatasetType, ) -> Type[DataRecordToDictTransformer]: diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index 822d1fefd..de1d191e5 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -230,6 +230,37 @@ def test_limit_sample(self, raw_data: Path, meta_data: Path): ) assert len(dataset.records) == 100 + def test_rowwise_df(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + only_alive=True, + limit=100, + ) + + rowwise_df = dataset.to_df(orient="row") + + assert rowwise_df.frame_id.nunique() == 100 + assert len(rowwise_df) == 256 + assert list(rowwise_df.columns) == [ + "period_id", + "timestamp", + "frame_id", + "ball_state", + "ball_owning_team_id", + "team_id", + "player_id", + "x", + "y", + "z", + "d", + "s", + ] + assert len(rowwise_df.dropna(subset=["z"])) == 100 + assert len(rowwise_df[rowwise_df["team_id"] == "ball"]) == 100 + assert rowwise_df.player_id.nunique() == 4 + def test_enriched_metadata(self, raw_data: Path, meta_data: Path): dataset = sportec.load_tracking( raw_data=raw_data, From 17766d456b7ac8250e8474322037307c97543557 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 09:45:56 +0200 Subject: [PATCH 02/17] remove enumerate --- kloppy/domain/services/transformers/attribute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index f6dd66df9..823d40f9e 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -421,7 +421,7 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: ) rows.append(ball_row) - for i, (player, player_data) in enumerate(frame.players_data.items()): + for player, player_data in frame.players_data.items(): player_row = base_data.copy() player_row.update( { From 11378a915f7316833235c8cea20bb28544fba103 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 09:54:13 +0200 Subject: [PATCH 03/17] black --- kloppy/domain/models/common.py | 6 ++++-- kloppy/domain/services/transformers/data_record.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 9888b49cd..3b517919b 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1804,7 +1804,8 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[True] = True, **named_columns: NamedColumns, - ) -> List[Dict[str, Any]]: ... + ) -> List[Dict[str, Any]]: + ... @overload def to_records( @@ -1812,7 +1813,8 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[False] = False, **named_columns: NamedColumns, - ) -> Iterable[Dict[str, Any]]: ... + ) -> Iterable[Dict[str, Any]]: + ... def to_records( self, diff --git a/kloppy/domain/services/transformers/data_record.py b/kloppy/domain/services/transformers/data_record.py index eff72c778..507e709f2 100644 --- a/kloppy/domain/services/transformers/data_record.py +++ b/kloppy/domain/services/transformers/data_record.py @@ -34,7 +34,8 @@ class DataRecordToDictTransformer(ABC, Generic[T]): @abstractmethod - def default_transformer(self) -> Callable[[T], Dict]: ... + def default_transformer(self) -> Callable[[T], Dict]: + ... def __init__( self, From 12f1d646fbb8e6ab48132a07b7f0758de780bdb1 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 12:20:30 +0200 Subject: [PATCH 04/17] add event_id to other_data, add tests for statsbomb to tracking, and change row to rows and column to columns for orient --- kloppy/_providers/statsbomb.py | 3 +- .../serializers/event/statsbomb/helpers.py | 2 +- kloppy/tests/test_statsbomb.py | 66 ++++++++++++++++++- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/kloppy/_providers/statsbomb.py b/kloppy/_providers/statsbomb.py index abcc67299..02a63dd8f 100644 --- a/kloppy/_providers/statsbomb.py +++ b/kloppy/_providers/statsbomb.py @@ -19,6 +19,7 @@ def load( coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, additional_metadata: dict = {}, + as_tracking_dataset: bool = False, ) -> EventDataset: """ Load StatsBomb event data. @@ -33,7 +34,6 @@ def load( additional_metadata: A dict with additional data that will be added to the metadata. See the [`Metadata`][kloppy.domain.Metadata] entity for a list of possible keys. - Returns: The parsed event data. """ @@ -64,6 +64,7 @@ def load_open_data( event_types: Optional[List[str]] = None, coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, + as_tracking_dataset: bool = False, ) -> EventDataset: """ Load StatsBomb open data. diff --git a/kloppy/infra/serializers/event/statsbomb/helpers.py b/kloppy/infra/serializers/event/statsbomb/helpers.py index 521ce2ec3..817543217 100644 --- a/kloppy/infra/serializers/event/statsbomb/helpers.py +++ b/kloppy/infra/serializers/event/statsbomb/helpers.py @@ -160,7 +160,7 @@ def get_player_from_freeze_frame(player_data, team, i): timestamp=event.timestamp, ball_state=event.ball_state, ball_owning_team=event.ball_owning_team, - other_data={"visible_area": visible_area}, + other_data={"visible_area": visible_area, "event_id": event.event_id}, ) return frame diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index c2d2e9a14..5c7309683 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -36,6 +36,7 @@ SubstitutionEvent, TakeOnResult, Time, + TrackingDataset, build_coordinate_system, ) from kloppy.domain.models import PositionType @@ -49,7 +50,7 @@ PassType, UnderPressureQualifier, ) -from kloppy.exceptions import DeserializationError +from kloppy.exceptions import DeserializationError, KloppyParameterError from kloppy.infra.serializers.event.statsbomb.helpers import parse_str_ts ENABLE_PLOTTING = True @@ -607,7 +608,7 @@ def test_correct_normalized_deserialization(self): coordinates, ) in pass_event.freeze_frame.players_coordinates.items(): coordinates_per_team[player.team.name].append(coordinates) - print(coordinates_per_team) + assert coordinates_per_team == { "Belgium": [ Point(x=0.30230680550305883, y=0.5224074534269804), @@ -1257,3 +1258,64 @@ def test_player_position(self, base_dir): PositionType.LeftMidfield, ) ] + + +class TestStatsBombAsTrackingDataset: + """Tests related to deserializing 34/Tactical Shift events""" + + def test_convert_to_tracking(self, dataset: EventDataset): + sb_tracking = TrackingDataset.from_dataset( + dataset.filter(lambda event: event.freeze_frame is not None), + lambda event: event.freeze_frame, + ) + assert len(sb_tracking) == 3346 + + with pytest.raises( + AttributeError, + match=r"'NoneType' object has no attribute 'player_id'", + ): + sb_tracking.to_df(orient="columns") + + with pytest.raises( + KloppyParameterError, + match=r"Row-wise format is only supported for tracking datasets, got DatasetType.EVENT", + ): + dataset.to_df(orient="rows") + + df = sb_tracking.to_df(orient="rows") + assert list(df.columns) == [ + "period_id", + "timestamp", + "frame_id", + "ball_state", + "ball_owning_team_id", + "visible_area", + "event_id", + "team_id", + "player_id", + "x", + "y", + "z", + "d", + "s", + ] + + assert ( + len( + df[df["frame_id"] == 37].drop_duplicates( + subset=["period_id", "frame_id", "event_id"] + ) + ) + == 2 + ) + + assert ( + len( + df[df["frame_id"] == 37].drop_duplicates( + subset=["period_id", "frame_id", "player_id"] + ) + ) + == 40 + ) + + assert len(df) == 54540 From c8b61294b1db10f9f951f6d6a70a5d57957b7c2f Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 12:28:47 +0200 Subject: [PATCH 05/17] rows --- kloppy/domain/models/common.py | 6 +++--- kloppy/tests/test_sportec.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 3b517919b..8c3724490 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1891,7 +1891,7 @@ def to_df( Literal["pandas[pyarrow]"], ] ] = None, - orient: Literal["column", "row"] = "column", + orient: Literal["columns", "rows"] = "colums", **named_columns: NamedColumns, ): from kloppy.config import get_config @@ -1899,7 +1899,7 @@ def to_df( if not engine: engine = get_config("dataframe.engine") - if orient == "column": + if orient == "columns": # Original column-wise behavior using to_dict if engine == "pandas[pyarrow]": try: @@ -1957,7 +1957,7 @@ def to_df( else: raise KloppyParameterError(f"Engine {engine} is not valid") - elif orient == "row": + elif orient == "rows": # Row-wise behavior using to_dict_rowwise all_rows = self.to_dict_rowwise(*columns, **named_columns) diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index de1d191e5..6ca3c0405 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -239,7 +239,7 @@ def test_rowwise_df(self, raw_data: Path, meta_data: Path): limit=100, ) - rowwise_df = dataset.to_df(orient="row") + rowwise_df = dataset.to_df(orient="rows") assert rowwise_df.frame_id.nunique() == 100 assert len(rowwise_df) == 256 From d315dfbfd934672adc905e6d66ca5a1e10dcea27 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 14:45:09 +0200 Subject: [PATCH 06/17] typo --- kloppy/domain/models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 8c3724490..aa01510a1 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1891,7 +1891,7 @@ def to_df( Literal["pandas[pyarrow]"], ] ] = None, - orient: Literal["columns", "rows"] = "colums", + orient: Literal["columns", "rows"] = "columns", **named_columns: NamedColumns, ): from kloppy.config import get_config @@ -2010,7 +2010,7 @@ def to_df( else: raise KloppyParameterError( - f"Orient '{orient}' is not valid. Must be 'column' or 'row'" + f"Orient '{orient}' is not valid. Must be 'columns' or 'rows'" ) def __repr__(self): From c8fcb31d628ed23e07b6a55f296563af10386f30 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Mon, 2 Jun 2025 16:03:43 +0200 Subject: [PATCH 07/17] EventDataset.to_tracking_data --- kloppy/_providers/statsbomb.py | 2 -- kloppy/domain/models/event.py | 20 ++++++++++++++++++-- kloppy/tests/test_statsbomb.py | 5 +---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/kloppy/_providers/statsbomb.py b/kloppy/_providers/statsbomb.py index 02a63dd8f..9a67a80da 100644 --- a/kloppy/_providers/statsbomb.py +++ b/kloppy/_providers/statsbomb.py @@ -19,7 +19,6 @@ def load( coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, additional_metadata: dict = {}, - as_tracking_dataset: bool = False, ) -> EventDataset: """ Load StatsBomb event data. @@ -64,7 +63,6 @@ def load_open_data( event_types: Optional[List[str]] = None, coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, - as_tracking_dataset: bool = False, ) -> EventDataset: """ Load StatsBomb open data. diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index aea3ea8d6..80e1f824c 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -17,7 +17,6 @@ Union, cast, ) - from kloppy.domain.models.common import ( AttackingDirection, DatasetType, @@ -40,7 +39,8 @@ if TYPE_CHECKING: from ..services.transformers.data_record import NamedColumns - from .tracking import Frame + from .tracking import Frame, TrackingDataset + QualifierValueType = TypeVar("QualifierValueType") EnumQualifierType = TypeVar("EnumQualifierType", bound=Enum) @@ -1540,6 +1540,22 @@ def aggregate(self, type_: str, **aggregator_kwargs) -> List[Any]: return aggregator.aggregate(self) + def to_tracking_data(self) -> "TrackingDataset": + from .tracking import TrackingDataset + + freeze_frames = self.filter( + lambda event: event.freeze_frame is not None + ) + if len(freeze_frames.records) == 0: + raise ValueError( + "EventDataset has 0 freeze frame records making it impossible to convert to a TrackingDataset" + ) + + return TrackingDataset.from_dataset( + freeze_frames, + lambda event: event.freeze_frame, + ) + __all__ = [ "EnumQualifier", diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 5c7309683..781f38509 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -1264,10 +1264,7 @@ class TestStatsBombAsTrackingDataset: """Tests related to deserializing 34/Tactical Shift events""" def test_convert_to_tracking(self, dataset: EventDataset): - sb_tracking = TrackingDataset.from_dataset( - dataset.filter(lambda event: event.freeze_frame is not None), - lambda event: event.freeze_frame, - ) + sb_tracking = dataset.to_tracking_data() assert len(sb_tracking) == 3346 with pytest.raises( From 8806587502b8ffeee911fd157103afaebd6e6145 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Tue, 25 Nov 2025 12:59:20 +0100 Subject: [PATCH 08/17] black --- kloppy/domain/services/transformers/data_record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kloppy/domain/services/transformers/data_record.py b/kloppy/domain/services/transformers/data_record.py index 68e301eb3..6d94956b7 100644 --- a/kloppy/domain/services/transformers/data_record.py +++ b/kloppy/domain/services/transformers/data_record.py @@ -108,7 +108,7 @@ def default_transformer(self) -> Callable[[Code], Dict]: return DefaultCodeTransformer() -class RowWiseFrameToDictTransformer(DataRecordToDictTransformer[Frame]): +class LongLayoutFrameToDictTransformer(DataRecordToDictTransformer[Frame]): def default_transformer(self) -> Callable[[Frame], List[Dict]]: return LongLayoutFrameTransformer() From 8771ff60fc5816f809499b7ee9109151300e59fa Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Tue, 25 Nov 2025 13:04:36 +0100 Subject: [PATCH 09/17] black... --- kloppy/domain/services/transformers/data_record.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kloppy/domain/services/transformers/data_record.py b/kloppy/domain/services/transformers/data_record.py index 6d94956b7..50e8cebd6 100644 --- a/kloppy/domain/services/transformers/data_record.py +++ b/kloppy/domain/services/transformers/data_record.py @@ -34,7 +34,8 @@ class DataRecordToDictTransformer(ABC, Generic[T]): @abstractmethod - def default_transformer(self) -> Callable[[T], Dict]: ... + def default_transformer(self) -> Callable[[T], Dict]: + ... def __init__( self, From 178a42701b88f2285f4fa075d3054b52f0cb2257 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Tue, 25 Nov 2025 13:07:59 +0100 Subject: [PATCH 10/17] more black... --- kloppy/domain/models/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index e2e1f9f85..ab7f3dc08 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1835,7 +1835,8 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[True] = True, **named_columns: NamedColumns, - ) -> List[Dict[str, Any]]: ... + ) -> List[Dict[str, Any]]: + ... @overload def to_records( @@ -1843,7 +1844,8 @@ def to_records( *columns: Unpack[tuple[Column]], as_list: Literal[False] = False, **named_columns: NamedColumns, - ) -> Iterable[Dict[str, Any]]: ... + ) -> Iterable[Dict[str, Any]]: + ... def to_records( self, From 007edb6c5fe4b588cfb10a29d46edbf40206b796 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Tue, 16 Dec 2025 17:18:26 +0100 Subject: [PATCH 11/17] missing types --- .../domain/services/transformers/attribute.py | 89 +++++-------------- 1 file changed, 22 insertions(+), 67 deletions(-) diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index 873b93589..690b158ec 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import math import sys -from typing import Any, Optional, Union +from typing import Any, Optional, Union, List, Dict from kloppy.domain import ( BodyPartQualifier, @@ -101,8 +101,7 @@ def __call__(self, event: Event) -> dict[str, Any]: event_y = event.coordinates.y goal_x = metadata.pitch_dimensions.x_dim.max goal_y = ( - metadata.pitch_dimensions.y_dim.max - + metadata.pitch_dimensions.y_dim.min + metadata.pitch_dimensions.y_dim.max + metadata.pitch_dimensions.y_dim.min ) / 2 return { @@ -123,8 +122,7 @@ def __call__(self, event: Event) -> dict[str, Any]: event_y = event.coordinates.y goal_x = metadata.pitch_dimensions.x_dim.min goal_y = ( - metadata.pitch_dimensions.y_dim.max - + metadata.pitch_dimensions.y_dim.min + metadata.pitch_dimensions.y_dim.max + metadata.pitch_dimensions.y_dim.min ) / 2 return { @@ -184,9 +182,7 @@ def __call__(self, event: Event) -> dict[str, Any]: end_timestamp=None, ball_state=event.ball_state.value if event.ball_state else None, ball_owning_team=( - event.ball_owning_team.team_id - if event.ball_owning_team - else None + event.ball_owning_team.team_id if event.ball_owning_team else None ), team_id=event.team.team_id if event.team else None, player_id=event.player.player_id if event.player else None, @@ -219,14 +215,10 @@ def __call__(self, event: Event) -> dict[str, Any]: { "end_timestamp": event.end_timestamp, "end_coordinates_x": ( - event.end_coordinates.x - if event.end_coordinates - else None + event.end_coordinates.x if event.end_coordinates else None ), "end_coordinates_y": ( - event.end_coordinates.y - if event.end_coordinates - else None + event.end_coordinates.y if event.end_coordinates else None ), } ) @@ -234,24 +226,16 @@ def __call__(self, event: Event) -> dict[str, Any]: row.update( { "end_coordinates_x": ( - event.result_coordinates.x - if event.result_coordinates - else None + event.result_coordinates.x if event.result_coordinates else None ), "end_coordinates_y": ( - event.result_coordinates.y - if event.result_coordinates - else None + event.result_coordinates.y if event.result_coordinates else None ), } ) elif isinstance(event, CardEvent): row.update( - { - "card_type": ( - event.card_type.value if event.card_type else None - ) - } + {"card_type": (event.card_type.value if event.card_type else None)} ) if isinstance(event, QualifierMixin) and event.qualifiers: @@ -300,16 +284,10 @@ def __call__(self, frame: Frame) -> dict[str, Any]: frame_id=frame.frame_id, ball_state=frame.ball_state.value if frame.ball_state else None, ball_owning_team_id=( - frame.ball_owning_team.team_id - if frame.ball_owning_team - else None - ), - ball_x=( - frame.ball_coordinates.x if frame.ball_coordinates else None - ), - ball_y=( - frame.ball_coordinates.y if frame.ball_coordinates else None + frame.ball_owning_team.team_id if frame.ball_owning_team else None ), + ball_x=(frame.ball_coordinates.x if frame.ball_coordinates else None), + ball_y=(frame.ball_coordinates.y if frame.ball_coordinates else None), ball_z=( getattr(frame.ball_coordinates, "z", None) if frame.ball_coordinates @@ -321,14 +299,10 @@ def __call__(self, frame: Frame) -> dict[str, Any]: row.update( { f"{player.player_id}_x": ( - player_data.coordinates.x - if player_data.coordinates - else None + player_data.coordinates.x if player_data.coordinates else None ), f"{player.player_id}_y": ( - player_data.coordinates.y - if player_data.coordinates - else None + player_data.coordinates.y if player_data.coordinates else None ), f"{player.player_id}_d": player_data.distance, f"{player.player_id}_s": player_data.speed, @@ -366,9 +340,7 @@ def __init__( exclude: Optional[List[str]] = None, ): if include and exclude: - raise KloppyParameterError( - "Cannot specify both include as exclude" - ) + raise KloppyParameterError("Cannot specify both include as exclude") self.exclude = exclude or [] self.include = include or [] @@ -382,9 +354,7 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: "frame_id": frame.frame_id, "ball_state": frame.ball_state.value if frame.ball_state else None, "ball_owning_team_id": ( - frame.ball_owning_team.team_id - if frame.ball_owning_team - else None + frame.ball_owning_team.team_id if frame.ball_owning_team else None ), } @@ -396,16 +366,8 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: { "team_id": "ball", "player_id": "ball", - "x": ( - frame.ball_coordinates.x - if frame.ball_coordinates - else None - ), - "y": ( - frame.ball_coordinates.y - if frame.ball_coordinates - else None - ), + "x": (frame.ball_coordinates.x if frame.ball_coordinates else None), + "y": (frame.ball_coordinates.y if frame.ball_coordinates else None), "z": ( getattr(frame.ball_coordinates, "z", None) if frame.ball_coordinates @@ -424,14 +386,10 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: "team_id": player.team.team_id if player else None, "player_id": player.player_id if player else None, "x": ( - player_data.coordinates.x - if player_data.coordinates - else None + player_data.coordinates.x if player_data.coordinates else None ), "y": ( - player_data.coordinates.y - if player_data.coordinates - else None + player_data.coordinates.y if player_data.coordinates else None ), "z": ( getattr(player_data.coordinates, "z", None) @@ -449,13 +407,10 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: rows.append(player_row) if self.include: - rows = [ - {k: row[k] for k in self.include if k in row} for row in rows - ] + rows = [{k: row[k] for k in self.include if k in row} for row in rows] elif self.exclude: rows = [ - {k: v for k, v in row.items() if k not in self.exclude} - for row in rows + {k: v for k, v in row.items() if k not in self.exclude} for row in rows ] return rows From 3c0555eea334d2e9a2ebdb536820896044210949 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Tue, 16 Dec 2025 17:24:43 +0100 Subject: [PATCH 12/17] idk --- .../domain/services/transformers/attribute.py | 79 ++++++++++++++----- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index 690b158ec..20338265a 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -101,7 +101,8 @@ def __call__(self, event: Event) -> dict[str, Any]: event_y = event.coordinates.y goal_x = metadata.pitch_dimensions.x_dim.max goal_y = ( - metadata.pitch_dimensions.y_dim.max + metadata.pitch_dimensions.y_dim.min + metadata.pitch_dimensions.y_dim.max + + metadata.pitch_dimensions.y_dim.min ) / 2 return { @@ -122,7 +123,8 @@ def __call__(self, event: Event) -> dict[str, Any]: event_y = event.coordinates.y goal_x = metadata.pitch_dimensions.x_dim.min goal_y = ( - metadata.pitch_dimensions.y_dim.max + metadata.pitch_dimensions.y_dim.min + metadata.pitch_dimensions.y_dim.max + + metadata.pitch_dimensions.y_dim.min ) / 2 return { @@ -182,7 +184,9 @@ def __call__(self, event: Event) -> dict[str, Any]: end_timestamp=None, ball_state=event.ball_state.value if event.ball_state else None, ball_owning_team=( - event.ball_owning_team.team_id if event.ball_owning_team else None + event.ball_owning_team.team_id + if event.ball_owning_team + else None ), team_id=event.team.team_id if event.team else None, player_id=event.player.player_id if event.player else None, @@ -215,10 +219,14 @@ def __call__(self, event: Event) -> dict[str, Any]: { "end_timestamp": event.end_timestamp, "end_coordinates_x": ( - event.end_coordinates.x if event.end_coordinates else None + event.end_coordinates.x + if event.end_coordinates + else None ), "end_coordinates_y": ( - event.end_coordinates.y if event.end_coordinates else None + event.end_coordinates.y + if event.end_coordinates + else None ), } ) @@ -226,16 +234,24 @@ def __call__(self, event: Event) -> dict[str, Any]: row.update( { "end_coordinates_x": ( - event.result_coordinates.x if event.result_coordinates else None + event.result_coordinates.x + if event.result_coordinates + else None ), "end_coordinates_y": ( - event.result_coordinates.y if event.result_coordinates else None + event.result_coordinates.y + if event.result_coordinates + else None ), } ) elif isinstance(event, CardEvent): row.update( - {"card_type": (event.card_type.value if event.card_type else None)} + { + "card_type": ( + event.card_type.value if event.card_type else None + ) + } ) if isinstance(event, QualifierMixin) and event.qualifiers: @@ -284,10 +300,16 @@ def __call__(self, frame: Frame) -> dict[str, Any]: frame_id=frame.frame_id, ball_state=frame.ball_state.value if frame.ball_state else None, ball_owning_team_id=( - frame.ball_owning_team.team_id if frame.ball_owning_team else None + frame.ball_owning_team.team_id + if frame.ball_owning_team + else None + ), + ball_x=( + frame.ball_coordinates.x if frame.ball_coordinates else None + ), + ball_y=( + frame.ball_coordinates.y if frame.ball_coordinates else None ), - ball_x=(frame.ball_coordinates.x if frame.ball_coordinates else None), - ball_y=(frame.ball_coordinates.y if frame.ball_coordinates else None), ball_z=( getattr(frame.ball_coordinates, "z", None) if frame.ball_coordinates @@ -299,10 +321,14 @@ def __call__(self, frame: Frame) -> dict[str, Any]: row.update( { f"{player.player_id}_x": ( - player_data.coordinates.x if player_data.coordinates else None + player_data.coordinates.x + if player_data.coordinates + else None ), f"{player.player_id}_y": ( - player_data.coordinates.y if player_data.coordinates else None + player_data.coordinates.y + if player_data.coordinates + else None ), f"{player.player_id}_d": player_data.distance, f"{player.player_id}_s": player_data.speed, @@ -354,7 +380,9 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: "frame_id": frame.frame_id, "ball_state": frame.ball_state.value if frame.ball_state else None, "ball_owning_team_id": ( - frame.ball_owning_team.team_id if frame.ball_owning_team else None + frame.ball_owning_team.team_id + if frame.ball_owning_team + else None ), } @@ -366,8 +394,12 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: { "team_id": "ball", "player_id": "ball", - "x": (frame.ball_coordinates.x if frame.ball_coordinates else None), - "y": (frame.ball_coordinates.y if frame.ball_coordinates else None), + "x": ( + frame.ball_coordinates.x if frame.ball_coordinates else None + ), + "y": ( + frame.ball_coordinates.y if frame.ball_coordinates else None + ), "z": ( getattr(frame.ball_coordinates, "z", None) if frame.ball_coordinates @@ -386,10 +418,14 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: "team_id": player.team.team_id if player else None, "player_id": player.player_id if player else None, "x": ( - player_data.coordinates.x if player_data.coordinates else None + player_data.coordinates.x + if player_data.coordinates + else None ), "y": ( - player_data.coordinates.y if player_data.coordinates else None + player_data.coordinates.y + if player_data.coordinates + else None ), "z": ( getattr(player_data.coordinates, "z", None) @@ -407,10 +443,13 @@ def __call__(self, frame: Frame) -> List[Dict[str, Any]]: rows.append(player_row) if self.include: - rows = [{k: row[k] for k in self.include if k in row} for row in rows] + rows = [ + {k: row[k] for k in self.include if k in row} for row in rows + ] elif self.exclude: rows = [ - {k: v for k, v in row.items() if k not in self.exclude} for row in rows + {k: v for k, v in row.items() if k not in self.exclude} + for row in rows ] return rows From e27dd31bd6c5cf417f6aa87468feb6f513ec69b8 Mon Sep 17 00:00:00 2001 From: "UnravelSports [JB]" Date: Wed, 17 Dec 2025 11:07:01 +0100 Subject: [PATCH 13/17] ruff --- kloppy/domain/models/common.py | 15 ++++++--------- kloppy/domain/models/event.py | 4 +++- kloppy/domain/services/transformers/attribute.py | 6 +++--- .../domain/services/transformers/data_record.py | 7 ++----- kloppy/tests/test_statsbomb.py | 1 - 5 files changed, 14 insertions(+), 19 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 0aae90f3a..dc3ba5d53 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1883,7 +1883,7 @@ def to_dict_long( self, *columns: Unpack[tuple[Column]], **named_columns: NamedColumns, - ) -> List[Dict[str, Any]]: + ) -> list[dict[str, Any]]: if self.dataset_type != DatasetType.TRACKING: raise KloppyParameterError( f"Row-wise format is only supported for tracking datasets, " @@ -1906,14 +1906,11 @@ def to_dict_long( def to_df( self, *columns: Unpack[tuple[Column]], - engine: Optional[ - Union[ - Literal["polars"], - Literal["pandas"], - Literal["pandas[pyarrow]"], - ] - ] = None, - layout: Literal["wide", "long"] = "wide", + engine: Literal["polars"] + | Literal["pandas"] + | Literal["pandas[pyarrow]"] + | None = None, + layout: Literal["wide"] | Literal["long"] = "wide", **named_columns: NamedColumns, ): from kloppy.config import get_config diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 571b7f206..7f63f7bc4 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -14,6 +14,8 @@ Union, cast, ) +import warnings + from kloppy.domain.models.common import ( AttackingDirection, DatasetType, @@ -1531,7 +1533,7 @@ def aggregate(self, type_: str, **aggregator_kwargs) -> list[Any]: return aggregator.aggregate(self) - def to_tracking_data(self) -> "TrackingDataset": + def to_tracking_data(self) -> TrackingDataset: from .tracking import TrackingDataset freeze_frames = self.filter( diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index 20338265a..a6d2830ba 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import math import sys -from typing import Any, Optional, Union, List, Dict +from typing import Any, Optional, Union from kloppy.domain import ( BodyPartQualifier, @@ -363,7 +363,7 @@ class LongLayoutFrameTransformer: def __init__( self, *include: str, - exclude: Optional[List[str]] = None, + exclude: Optional[list[str]] = None, ): if include and exclude: raise KloppyParameterError("Cannot specify both include as exclude") @@ -371,7 +371,7 @@ def __init__( self.exclude = exclude or [] self.include = include or [] - def __call__(self, frame: Frame) -> List[Dict[str, Any]]: + def __call__(self, frame: Frame) -> list[dict[str, Any]]: rows = [] base_data = { diff --git a/kloppy/domain/services/transformers/data_record.py b/kloppy/domain/services/transformers/data_record.py index 677e6d671..a0d754437 100644 --- a/kloppy/domain/services/transformers/data_record.py +++ b/kloppy/domain/services/transformers/data_record.py @@ -1,15 +1,12 @@ from abc import ABC, abstractmethod from fnmatch import fnmatch +import sys from typing import ( Any, Callable, - Dict, Generic, - Tuple, - Type, TypeVar, Union, - List, ) if sys.version_info >= (3, 11): @@ -106,7 +103,7 @@ def default_transformer(self) -> Callable[[Code], dict]: class LongLayoutFrameToDictTransformer(DataRecordToDictTransformer[Frame]): - def default_transformer(self) -> Callable[[Frame], List[Dict]]: + def default_transformer(self) -> Callable[[Frame], list[dict]]: return LongLayoutFrameTransformer() diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 1d1b1ab61..8353ad74c 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -35,7 +35,6 @@ SubstitutionEvent, TakeOnResult, Time, - TrackingDataset, build_coordinate_system, ) from kloppy.domain.models.event import ( From 66f815a6e0e71f15bac7989b1f0c5e4354e024b7 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 23 Dec 2025 10:33:46 +0100 Subject: [PATCH 14/17] refactor: remove to_tracking_dataset --- kloppy/_providers/statsbomb.py | 1 + kloppy/domain/models/event.py | 18 +----- .../serializers/event/statsbomb/helpers.py | 2 +- kloppy/tests/test_statsbomb.py | 60 +------------------ 4 files changed, 4 insertions(+), 77 deletions(-) diff --git a/kloppy/_providers/statsbomb.py b/kloppy/_providers/statsbomb.py index e8d33acba..c2845d92e 100644 --- a/kloppy/_providers/statsbomb.py +++ b/kloppy/_providers/statsbomb.py @@ -34,6 +34,7 @@ def load( additional_metadata: A dict with additional data that will be added to the metadata. See the [`Metadata`][kloppy.domain.Metadata] entity for a list of possible keys. + Returns: The parsed event data. """ diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index 2f5731ba6..1509fa8eb 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -38,7 +38,7 @@ from pandas import DataFrame from ..services.transformers.data_record import NamedColumns - from .tracking import Frame, TrackingDataset + from .tracking import Frame QualifierValueType = TypeVar("QualifierValueType") @@ -1539,22 +1539,6 @@ def aggregate(self, type_: str, **aggregator_kwargs) -> list[Any]: return aggregator.aggregate(self) - def to_tracking_data(self) -> "TrackingDataset": - from .tracking import TrackingDataset - - freeze_frames = self.filter( - lambda event: event.freeze_frame is not None - ) - if len(freeze_frames.records) == 0: - raise ValueError( - "EventDataset has 0 freeze frame records making it impossible to convert to a TrackingDataset" - ) - - return TrackingDataset.from_dataset( - freeze_frames, - lambda event: event.freeze_frame, - ) - __all__ = [ "EnumQualifier", diff --git a/kloppy/infra/serializers/event/statsbomb/helpers.py b/kloppy/infra/serializers/event/statsbomb/helpers.py index 477e3ebe9..757e33c7d 100644 --- a/kloppy/infra/serializers/event/statsbomb/helpers.py +++ b/kloppy/infra/serializers/event/statsbomb/helpers.py @@ -156,7 +156,7 @@ def get_player_from_freeze_frame(player_data, team, i): timestamp=event.timestamp, ball_state=event.ball_state, ball_owning_team=event.ball_owning_team, - other_data={"visible_area": visible_area, "event_id": event.event_id}, + other_data={"visible_area": visible_area}, ) return frame diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 8353ad74c..3aec36dab 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -47,7 +47,7 @@ PassType, UnderPressureQualifier, ) -from kloppy.exceptions import DeserializationError, KloppyParameterError +from kloppy.exceptions import DeserializationError from kloppy.infra.serializers.event.statsbomb.helpers import parse_str_ts import kloppy.infra.serializers.event.statsbomb.specification as SB @@ -1264,61 +1264,3 @@ def test_player_position(self, base_dir): PositionType.LeftMidfield, ) ] - - -class TestStatsBombAsTrackingDataset: - """Tests related to deserializing 34/Tactical Shift events""" - - def test_convert_to_tracking(self, dataset: EventDataset): - sb_tracking = dataset.to_tracking_data() - assert len(sb_tracking) == 3346 - - with pytest.raises( - AttributeError, - match=r"'NoneType' object has no attribute 'player_id'", - ): - sb_tracking.to_df(layout="wide") - - with pytest.raises( - KloppyParameterError, - match=r"Row-wise format is only supported for tracking datasets, got DatasetType.EVENT", - ): - dataset.to_df(layout="long") - - df = sb_tracking.to_df(layout="long") - assert list(df.columns) == [ - "period_id", - "timestamp", - "frame_id", - "ball_state", - "ball_owning_team_id", - "visible_area", - "event_id", - "team_id", - "player_id", - "x", - "y", - "z", - "d", - "s", - ] - - assert ( - len( - df[df["frame_id"] == 37].drop_duplicates( - subset=["period_id", "frame_id", "event_id"] - ) - ) - == 2 - ) - - assert ( - len( - df[df["frame_id"] == 37].drop_duplicates( - subset=["period_id", "frame_id", "player_id"] - ) - ) - == 40 - ) - - assert len(df) == 54540 From e21b9ca3a5f77519bdbb32ec19ad497534fc825b Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 23 Dec 2025 17:06:16 +0100 Subject: [PATCH 15/17] refactor: services.transformers.data_record --- kloppy/domain/models/common.py | 217 +++---- kloppy/domain/services/__init__.py | 2 +- .../domain/services/transformers/__init__.py | 4 +- .../domain/services/transformers/attribute.py | 370 ++---------- .../services/transformers/data_record.py | 534 +++++++++++++++--- .../domain/services/transformers/dataset.py | 60 ++ kloppy/tests/test_helpers.py | 45 +- kloppy/tests/test_metadata.py | 2 +- kloppy/tests/test_sportec.py | 66 --- 9 files changed, 677 insertions(+), 623 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 6cddeeee1..3db17539b 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1,9 +1,9 @@ from abc import ABC, abstractmethod -from collections import defaultdict from collections.abc import Iterable from dataclasses import dataclass, field, replace from datetime import datetime, timedelta from enum import Enum, Flag +from itertools import chain import sys from typing import ( TYPE_CHECKING, @@ -1829,6 +1829,7 @@ def to_records( self, *columns: Unpack[tuple["Column"]], as_list: Literal[True] = True, + layout: Optional[str] = None, **named_columns: "NamedColumns", ) -> list[dict[str, Any]]: ... @@ -1836,6 +1837,7 @@ def to_records( def to_records( self, *columns: Unpack[tuple["Column"]], + layout: Optional[str] = None, as_list: Literal[False] = False, **named_columns: "NamedColumns", ) -> Iterable[dict[str, Any]]: ... @@ -1843,15 +1845,16 @@ def to_records( def to_records( self, *columns: Unpack[tuple["Column"]], + layout: Optional[str] = None, as_list: bool = True, **named_columns: "NamedColumns", ) -> Union[list[dict[str, Any]], Iterable[dict[str, Any]]]: from ..services.transformers.data_record import get_transformer_cls - transformer = get_transformer_cls(self.dataset_type)( + transformer = get_transformer_cls(self.dataset_type, layout=layout)( *columns, **named_columns ) - iterator = map(transformer, self.records) + iterator = chain.from_iterable(map(transformer, self.records)) if as_list: return list(iterator) else: @@ -1861,57 +1864,46 @@ def to_dict( self, *columns: Unpack[tuple["Column"]], orient: Literal["list"] = "list", + layout: Optional[str] = None, **named_columns: "NamedColumns", ) -> dict[str, list[Any]]: if orient == "list": from ..services.transformers.data_record import get_transformer_cls - transformer = get_transformer_cls(self.dataset_type)( + transformer = get_transformer_cls(self.dataset_type, layout=layout)( *columns, **named_columns ) - c = len(self.records) - items = defaultdict(lambda: [None] * c) - for i, record in enumerate(self.records): - item = transformer(record) - for k, v in item.items(): - items[k][i] = v + result = {} + for record_idx, record in enumerate(self.records): + items = transformer(record) - return items + for item_idx, item in enumerate(items): + seen_keys = set() + for k, v in item.items(): + # If this is a new key, backfill previous records + if k not in result: + result[k] = [None] * (record_idx + item_idx) + + result[k].append(v) + seen_keys.add(k) + + # Pad keys that were not seen in this record + for k in result: + if k not in seen_keys: + result[k].append(None) + + return dict(result) else: raise KloppyParameterError( f"Orient {orient} is not supported. Only orient='list' is supported" ) - def to_dict_long( - self, - *columns: Unpack[tuple["Column"]], - **named_columns: "NamedColumns", - ) -> list[dict[str, Any]]: - if self.dataset_type != DatasetType.TRACKING: - raise KloppyParameterError( - f"Row-wise format is only supported for tracking datasets, " - f"got {self.dataset_type}" - ) - - from ..services.transformers.data_record import ( - LongLayoutFrameTransformer, - ) - - transformer = LongLayoutFrameTransformer(*columns, **named_columns) - - all_rows = [] - for record in self.records: - rows = transformer(record) - all_rows.extend(rows) - - return all_rows - def to_df( self, *columns: Unpack[tuple["Column"]], engine: Optional[Literal["polars", "pandas", "pandas[pyarrow]"]] = None, - layout: Literal["wide", "long"] = "wide", + layout: Optional[str] = None, **named_columns: "NamedColumns", ): from kloppy.config import get_config @@ -1919,118 +1911,67 @@ def to_df( if not engine: engine = get_config("dataframe.engine") - if layout == "wide": - # Original column-wise behavior using to_dict - if engine == "pandas[pyarrow]": - try: - import pandas as pd - - types_mapper = pd.ArrowDtype - except ImportError: - raise ImportError( - "Seems like you don't have pandas installed. Please" - " install it using: pip install pandas" - ) - except AttributeError: - raise AttributeError( - "Seems like you have an older version of pandas installed. Please" - " upgrade to at least 1.5 using: pip install pandas>=1.5" - ) - - try: - import pyarrow as pa - except ImportError: - raise ImportError( - "Seems like you don't have pyarrow installed. Please" - " install it using: pip install pyarrow" - ) + if engine == "pandas[pyarrow]": + try: + import pandas as pd - table = pa.Table.from_pydict( - self.to_dict(*columns, orient="list", **named_columns) + types_mapper = pd.ArrowDtype + except ImportError: + raise ImportError( + "Seems like you don't have pandas installed. Please" + " install it using: pip install pandas" ) - return table.to_pandas(types_mapper=types_mapper) - - elif engine == "pandas": - try: - from pandas import DataFrame - except ImportError: - raise ImportError( - "Seems like you don't have pandas installed. Please" - " install it using: pip install pandas" - ) - - return DataFrame.from_dict( - self.to_dict(*columns, orient="list", **named_columns) + except AttributeError: + raise AttributeError( + "Seems like you have an older version of pandas installed. Please" + " upgrade to at least 1.5 using: pip install pandas>=1.5" ) - elif engine == "polars": - try: - from polars import from_dict - except ImportError: - raise ImportError( - "Seems like you don't have polars installed. Please" - " install it using: pip install polars" - ) - return from_dict( - self.to_dict(*columns, orient="list", **named_columns) + try: + import pyarrow as pa + except ImportError: + raise ImportError( + "Seems like you don't have pyarrow installed. Please" + " install it using: pip install pyarrow" ) - else: - raise KloppyParameterError(f"Engine {engine} is not valid") - elif layout == "long": - all_rows = self.to_dict_long(*columns, **named_columns) - - if engine == "pandas[pyarrow]": - try: - import pandas as pd - import pyarrow as pa - - types_mapper = pd.ArrowDtype - except ImportError: - raise ImportError( - "Seems like you don't have pandas and pyarrow installed. Please" - " install them using: pip install pandas pyarrow" - ) - - # Convert list of dicts to dict of lists for pyarrow - if all_rows: - keys = all_rows[0].keys() - data_dict = { - key: [row.get(key) for row in all_rows] for key in keys - } - table = pa.Table.from_pydict(data_dict) - return table.to_pandas(types_mapper=types_mapper) - else: - return pd.DataFrame() - - elif engine == "pandas": - try: - from pandas import DataFrame - except ImportError: - raise ImportError( - "Seems like you don't have pandas installed. Please" - " install it using: pip install pandas" - ) - - return DataFrame(all_rows) + table = pa.Table.from_pydict( + self.to_dict( + *columns, orient="list", layout=layout, **named_columns + ) + ) + return table.to_pandas(types_mapper=types_mapper) - elif engine == "polars": - try: - from polars import DataFrame - except ImportError: - raise ImportError( - "Seems like you don't have polars installed. Please" - " install it using: pip install polars" - ) + elif engine == "pandas": + try: + from pandas import DataFrame + except ImportError: + raise ImportError( + "Seems like you don't have pandas installed. Please" + " install it using: pip install pandas" + ) - return DataFrame(all_rows) - else: - raise KloppyParameterError(f"Engine {engine} is not valid") + return DataFrame.from_dict( + self.to_dict( + *columns, orient="list", layout=layout, **named_columns + ) + ) + elif engine == "polars": + try: + from polars import from_dict + except ImportError: + raise ImportError( + "Seems like you don't have polars installed. Please" + " install it using: pip install polars" + ) - else: - raise KloppyParameterError( - f"layout '{layout}' is not valid. Must be 'wide' or 'long'" + return from_dict( + self.to_dict( + *columns, orient="list", layout=layout, **named_columns + ) ) + else: + raise KloppyParameterError(f"Engine {engine} is not valid") def __repr__(self): return f"<{self.__class__.__name__} record_count={len(self.records)}>" diff --git a/kloppy/domain/services/__init__.py b/kloppy/domain/services/__init__.py index d38a26cb2..9cd5b6608 100644 --- a/kloppy/domain/services/__init__.py +++ b/kloppy/domain/services/__init__.py @@ -3,7 +3,7 @@ from kloppy.domain import AttackingDirection, Frame, Ground, Period from .event_factory import EventFactory, create_event -from .transformers import DatasetTransformer, DatasetTransformerBuilder +from .transformers.dataset import DatasetTransformer, DatasetTransformerBuilder # NOT YET: from .enrichers import TrackingPossessionEnricher diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py index 18ea0c75c..4b8b66a67 100644 --- a/kloppy/domain/services/transformers/__init__.py +++ b/kloppy/domain/services/transformers/__init__.py @@ -1,3 +1,3 @@ -from .dataset import DatasetTransformer, DatasetTransformerBuilder +from . import attribute, data_record, dataset -__all__ = ["DatasetTransformer", "DatasetTransformerBuilder"] +__all__ = ["dataset", "data_record", "attribute"] diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index a6d2830ba..d27d7fee2 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -1,28 +1,51 @@ +"""Event Attribute Transformation. + +This module provides tools to extract, calculate, and encode features from +individual `Event` objects. These transformers are designed to enrich event data +with derived metrics (like distance to goal) or categorical encodings (like +one-hot encoded body parts) for downstream analysis or machine learning tasks. + +Examples: + **1. Calculating Distances and Angles** + Compute spatial metrics for an event relative to the goal. + + >>> from kloppy.domain.models.event import ShotEvent + >>> # event is a ShotEvent derived from a dataset with ACTION_EXECUTING_TEAM orientation + >>> + >>> dist_transformer = DistanceToGoalTransformer() + >>> angle_transformer = AngleToGoalTransformer() + >>> + >>> features = {} + >>> features.update(dist_transformer(event)) + >>> features.update(angle_transformer(event)) + >>> # features: {'distance_to_goal': 16.5, 'angle_to_goal': 25.4} + + **2. Encoding Qualifiers (Body Parts)** + Convert categorical body part qualifiers into one-hot encoded columns. + + >>> from kloppy.domain import BodyPartQualifier + >>> # event has a qualifier BodyPartQualifier(value=BodyPart.HEAD) + >>> + >>> transformer = BodyPartTransformer() + >>> encoded = transformer(event) + >>> # encoded: {'is_body_part_head': True, 'is_body_part_foot_right': False, ...} +""" + from abc import ABC, abstractmethod import math import sys -from typing import Any, Optional, Union +from typing import Any, Union from kloppy.domain import ( BodyPartQualifier, - Code, Event, - Frame, Orientation, Point, - QualifierMixin, - ResultMixin, ) from kloppy.domain.models.event import ( - CardEvent, - CarryEvent, EnumQualifier, - EventType, - PassEvent, - ShotEvent, ) from kloppy.exceptions import ( - KloppyParameterError, OrientationError, UnknownEncoderError, ) @@ -159,329 +182,4 @@ def __call__(self, event: Event) -> dict[str, Any]: return _Transformer -class DefaultEventTransformer(EventAttributeTransformer): - def __init__( - self, - *include: str, - exclude: Optional[list[str]] = None, - ): - if include and exclude: - raise KloppyParameterError("Cannot specify both include as exclude") - - self.exclude = exclude or [] - self.include = include or [] - - def __call__(self, event: Event) -> dict[str, Any]: - row = dict( - event_id=event.event_id, - event_type=( - event.event_type.value - if event.event_type != EventType.GENERIC - else f"GENERIC:{event.event_name}" - ), - period_id=event.period.id, - timestamp=event.timestamp, - end_timestamp=None, - ball_state=event.ball_state.value if event.ball_state else None, - ball_owning_team=( - event.ball_owning_team.team_id - if event.ball_owning_team - else None - ), - team_id=event.team.team_id if event.team else None, - player_id=event.player.player_id if event.player else None, - coordinates_x=event.coordinates.x if event.coordinates else None, - coordinates_y=event.coordinates.y if event.coordinates else None, - ) - if isinstance(event, PassEvent): - row.update( - { - "end_timestamp": event.receive_timestamp, - "end_coordinates_x": ( - event.receiver_coordinates.x - if event.receiver_coordinates - else None - ), - "end_coordinates_y": ( - event.receiver_coordinates.y - if event.receiver_coordinates - else None - ), - "receiver_player_id": ( - event.receiver_player.player_id - if event.receiver_player - else None - ), - } - ) - elif isinstance(event, CarryEvent): - row.update( - { - "end_timestamp": event.end_timestamp, - "end_coordinates_x": ( - event.end_coordinates.x - if event.end_coordinates - else None - ), - "end_coordinates_y": ( - event.end_coordinates.y - if event.end_coordinates - else None - ), - } - ) - elif isinstance(event, ShotEvent): - row.update( - { - "end_coordinates_x": ( - event.result_coordinates.x - if event.result_coordinates - else None - ), - "end_coordinates_y": ( - event.result_coordinates.y - if event.result_coordinates - else None - ), - } - ) - elif isinstance(event, CardEvent): - row.update( - { - "card_type": ( - event.card_type.value if event.card_type else None - ) - } - ) - - if isinstance(event, QualifierMixin) and event.qualifiers: - for qualifier in event.qualifiers: - row.update(qualifier.to_dict()) - - if isinstance(event, ResultMixin) and event.result is not None: - row.update( - { - "result": event.result.value, - "success": event.result.is_success, - } - ) - else: - row.update( - { - "result": None, - "success": None, - } - ) - - if self.include: - return {k: row[k] for k in self.include} - elif self.exclude: - return {k: v for k, v in row.items() if k not in self.exclude} - else: - return row - - -class DefaultFrameTransformer: - def __init__( - self, - *include: str, - exclude: Optional[list[str]] = None, - ): - if include and exclude: - raise KloppyParameterError("Cannot specify both include as exclude") - - self.exclude = exclude or [] - self.include = include or [] - - def __call__(self, frame: Frame) -> dict[str, Any]: - row = dict( - period_id=frame.period.id if frame.period else None, - timestamp=frame.timestamp, - frame_id=frame.frame_id, - ball_state=frame.ball_state.value if frame.ball_state else None, - ball_owning_team_id=( - frame.ball_owning_team.team_id - if frame.ball_owning_team - else None - ), - ball_x=( - frame.ball_coordinates.x if frame.ball_coordinates else None - ), - ball_y=( - frame.ball_coordinates.y if frame.ball_coordinates else None - ), - ball_z=( - getattr(frame.ball_coordinates, "z", None) - if frame.ball_coordinates - else None - ), - ball_speed=frame.ball_speed, - ) - for player, player_data in frame.players_data.items(): - row.update( - { - f"{player.player_id}_x": ( - player_data.coordinates.x - if player_data.coordinates - else None - ), - f"{player.player_id}_y": ( - player_data.coordinates.y - if player_data.coordinates - else None - ), - f"{player.player_id}_d": player_data.distance, - f"{player.player_id}_s": player_data.speed, - } - ) - - if player_data.other_data: - for name, value in player_data.other_data.items(): - row.update( - { - f"{player.player_id}_{name}": value, - } - ) - - if frame.other_data: - for name, value in frame.other_data.items(): - row.update( - { - name: value, - } - ) - - if self.include: - return {k: row[k] for k in self.include} - elif self.exclude: - return {k: v for k, v in row.items() if k not in self.exclude} - else: - return row - - -class LongLayoutFrameTransformer: - def __init__( - self, - *include: str, - exclude: Optional[list[str]] = None, - ): - if include and exclude: - raise KloppyParameterError("Cannot specify both include as exclude") - - self.exclude = exclude or [] - self.include = include or [] - - def __call__(self, frame: Frame) -> list[dict[str, Any]]: - rows = [] - - base_data = { - "period_id": frame.period.id if frame.period else None, - "timestamp": frame.timestamp, - "frame_id": frame.frame_id, - "ball_state": frame.ball_state.value if frame.ball_state else None, - "ball_owning_team_id": ( - frame.ball_owning_team.team_id - if frame.ball_owning_team - else None - ), - } - - if frame.other_data: - base_data.update(frame.other_data) - - ball_row = base_data.copy() - ball_row.update( - { - "team_id": "ball", - "player_id": "ball", - "x": ( - frame.ball_coordinates.x if frame.ball_coordinates else None - ), - "y": ( - frame.ball_coordinates.y if frame.ball_coordinates else None - ), - "z": ( - getattr(frame.ball_coordinates, "z", None) - if frame.ball_coordinates - else None - ), - "d": None, - "s": frame.ball_speed, - } - ) - rows.append(ball_row) - - for player, player_data in frame.players_data.items(): - player_row = base_data.copy() - player_row.update( - { - "team_id": player.team.team_id if player else None, - "player_id": player.player_id if player else None, - "x": ( - player_data.coordinates.x - if player_data.coordinates - else None - ), - "y": ( - player_data.coordinates.y - if player_data.coordinates - else None - ), - "z": ( - getattr(player_data.coordinates, "z", None) - if player_data.coordinates - else None - ), - "d": player_data.distance, - "s": player_data.speed, - } - ) - - if player_data.other_data: - player_row.update(player_data.other_data) - - rows.append(player_row) - - if self.include: - rows = [ - {k: row[k] for k in self.include if k in row} for row in rows - ] - elif self.exclude: - rows = [ - {k: v for k, v in row.items() if k not in self.exclude} - for row in rows - ] - return rows - - -class DefaultCodeTransformer: - def __init__( - self, - *include: str, - exclude: Optional[list[str]] = None, - ): - if include and exclude: - raise KloppyParameterError("Cannot specify both include as exclude") - - self.exclude = exclude or [] - self.include = include or [] - - def __call__(self, code: Code) -> dict[str, Any]: - row = dict( - code_id=code.code_id, - period_id=code.period.id if code.period else None, - timestamp=code.timestamp, - end_timestamp=code.end_timestamp, - code=code.code, - ) - row.update(code.labels) - - if self.include: - return {k: row[k] for k in self.include} - elif self.exclude: - return {k: v for k, v in row.items() if k not in self.exclude} - else: - return row - - BodyPartTransformer = create_transformer_from_qualifier(BodyPartQualifier) diff --git a/kloppy/domain/services/transformers/data_record.py b/kloppy/domain/services/transformers/data_record.py index a0d754437..61ef94346 100644 --- a/kloppy/domain/services/transformers/data_record.py +++ b/kloppy/domain/services/transformers/data_record.py @@ -1,3 +1,47 @@ +"""Data Record Transformation. + +This module provides tools for transforming kloppy DataRecord objects (such as +`Event`, `Frame`, and `Code`) into alternative formats like dictionaries, JSON +strings, or custom data structures. + +It separates **data extraction** (getting values from the object) from +**formatting** (structuring the output). This allows the same underlying +extraction logic to support multiple layouts (e.g., wide vs. long for tracking +data formats) and target types (e.g., `dict` vs. `str`). + +Key Components: + TransformerRegistry: Maps `DatasetType` and layout names to transformer classes. + DataRecordTransformer: Generic base class for extraction, filtering, and formatting. + register_data_record_transformer: Decorator to register new transformers. + +Examples: + **1. Basic Transformation** + Get a transformer for a specific DatasetType and convert a record to a list of dicts. + + >>> from kloppy.domain import DatasetType + >>> from kloppy.domain.services.transformers.data_record import get_transformer_cls + >>> # Default layout is implied + >>> cls = get_transformer_cls(DatasetType.EVENT) + >>> transformer = cls() + >>> data = transformer(event) + >>> # Result: [{'event_id': '...', 'timestamp': 0.1, ...}] + + **2. Column Selection & Wildcards** + Filter the output using specific column names or wildcards. + + >>> # Select 'event_id' and any column containing 'coordinates' + >>> transformer = cls("event_id", "*coordinates*") + >>> data = transformer(event) + + **3. Custom Formatting (e.g., JSON)** + Change the output type by providing a `formatter` callable. + + >>> import json + >>> # Returns a list of JSON strings instead of dicts + >>> transformer = cls(formatter=json.dumps) + >>> json_data = transformer(event) +""" + from abc import ABC, abstractmethod from fnmatch import fnmatch import sys @@ -5,6 +49,7 @@ Any, Callable, Generic, + Optional, TypeVar, Union, ) @@ -14,105 +59,438 @@ else: from typing_extensions import Unpack -from kloppy.domain import Code, DataRecord, DatasetType, Event, Frame -from kloppy.domain.services.transformers.attribute import ( - DefaultCodeTransformer, - DefaultEventTransformer, - DefaultFrameTransformer, - LongLayoutFrameTransformer, +from kloppy.domain import ( + Code, + DataRecord, + DatasetType, + Event, + Frame, + QualifierMixin, + ResultMixin, +) +from kloppy.domain.models.event import ( + CardEvent, + CarryEvent, + EventType, + PassEvent, + ShotEvent, ) from kloppy.exceptions import KloppyError -T = TypeVar("T", bound=DataRecord) -Column = Union[str, Callable[[T], Any]] -NamedColumns = dict[str, Column] +# --- Type Definitions --- +RecordT = TypeVar("RecordT", bound=DataRecord) +OutputT = TypeVar("OutputT") -class DataRecordToDictTransformer(ABC, Generic[T]): - @abstractmethod - def default_transformer(self) -> Callable[[T], dict]: ... +# A column can be a name (str) or a logical function +ColumnSelector = Union[str, Callable[[RecordT], Any]] +NamedColumnDefinitions = dict[str, Union[Any, Callable[[RecordT], Any]]] + + +# --- Registry System --- + + +class TransformerRegistry: + """Central registry for DataRecord transformers.""" + + def __init__(self): + # Structure: DatasetType -> Layout -> TransformerClass + self._registry: dict[ + DatasetType, dict[str, type[DataRecordTransformer]] + ] = {} + + def register( + self, + dataset_type: DatasetType, + layout: Union[str, list[str]] = "default", + ): + """Decorator to register a transformer class.""" + layouts = [layout] if isinstance(layout, str) else layout + + def wrapper(cls): + if dataset_type not in self._registry: + self._registry[dataset_type] = {} + + for layout_name in layouts: + current_map = self._registry[dataset_type] + + # Idempotency check + if layout_name in current_map: + existing = current_map[layout_name] + if existing != cls: + raise KloppyError( + f"Conflict: '{layout_name}' for {dataset_type} is already " + f"registered to {existing.__name__}." + ) + + current_map[layout_name] = cls + return cls + + return wrapper + + def get_class( + self, dataset_type: DatasetType, layout: Optional[str] = "default" + ) -> type["DataRecordTransformer"]: + """Retrieve a transformer class by type and layout.""" + layout = layout or "default" + + if dataset_type not in self._registry: + raise KloppyError( + f"No transformers registered for dataset type: {dataset_type}" + ) + + available = self._registry[dataset_type] + if layout not in available: + raise KloppyError( + f"Layout '{layout}' not found for {dataset_type}. " + f"Available: {list(available.keys())}" + ) + return available[layout] + + +# Global instance +_REGISTRY = TransformerRegistry() + +# Public API aliases for explicit naming +register_transformer = _REGISTRY.register +get_transformer_cls = _REGISTRY.get_class + + +# --- Base Transformer --- + + +class DataRecordTransformer(ABC, Generic[RecordT, OutputT]): + """ + Base class for transforming DataRecords into a specific OutputT. + + This class orchestrates: + 1. Extraction (to a canonical dict) + 2. Filtering (selecting specific columns) + 3. Augmentation (adding named columns) + 4. Formatting (converting dict to OutputT) + """ def __init__( self, - *columns: Unpack[tuple[Column]], - **named_columns: NamedColumns, + *columns: Unpack[tuple[ColumnSelector]], + formatter: Optional[Callable[[dict[str, Any]], OutputT]] = None, + **named_columns: NamedColumnDefinitions, ): - if not columns and not named_columns: - converter = self.default_transformer() - else: - default = self.default_transformer() - has_string_columns = any(not callable(column) for column in columns) + """ + Args: + *columns: Fields to select/compute. + formatter: Optional function to convert the final dict to OutputT. + If None, the output remains a dict (OutputT = dict). + **named_columns: New columns to append. + """ + self.columns = columns + self.named_columns = named_columns + self.formatter = formatter - def converter(data_record: T) -> dict[str, Any]: - if has_string_columns: - default_row = default(data_record) - else: - default_row = {} - - row = {} - for column in columns: - if callable(column): - res = column(data_record) - if not isinstance(res, dict): - raise KloppyError( - "A function column should return a dictionary" - ) - row.update(res) - else: - if column == "*": - row.update(default_row) - elif "*" in column: - row.update( - { - k: v - for k, v in default_row.items() - if fnmatch(k, column) - } - ) - elif column in default_row: - row[column] = default_row[column] - else: - row[column] = getattr(data_record, column, None) - - for name, column in named_columns.items(): - row[name] = ( - column(data_record) if callable(column) else column + def transform_record(self, record: RecordT) -> list[OutputT]: + """Public API to transform a record.""" + # 1. Extract canonical data (List of Dictionaries) + canonical_rows = self._extract_canonical(record) + + # 2. Process rows (Filter & Augment) + processed_rows = [ + self._process_row(row, record) for row in canonical_rows + ] + + # 3. Format output + if self.formatter: + return [self.formatter(row) for row in processed_rows] + + # If no formatter, we assume OutputT is dict + return processed_rows # type: ignore + + @abstractmethod + def _extract_canonical(self, record: RecordT) -> list[dict[str, Any]]: + """ + Implementation specific logic to extract raw data from the record. + Must return a list of flat dictionaries. + """ + pass + + def _process_row( + self, base_row: dict[str, Any], record: RecordT + ) -> dict[str, Any]: + """Applies column selection (filtering) and named column augmentation.""" + + # Optimization: If no columns specified, keep everything + if not self.columns: + row = base_row.copy() + else: + row = {} + for col in self.columns: + if callable(col): + # Callables merge their result into the row + res = col(record) + if not isinstance(res, dict): + raise KloppyError( + "Callable columns must return a dictionary." + ) + row.update(res) + elif col == "*": + row.update(base_row) + elif "*" in col: + # Wildcard match + row.update( + {k: v for k, v in base_row.items() if fnmatch(k, col)} ) + elif col in base_row: + row[col] = base_row[col] + else: + # Fallback to record attribute + row[col] = getattr(record, col, None) + + # Apply named columns + for name, value_or_func in self.named_columns.items(): + if callable(value_or_func): + row[name] = value_or_func(record) + else: + row[name] = value_or_func + + return row + + def __call__(self, record: RecordT) -> list[OutputT]: + return self.transform_record(record) + + +# --- Concrete Implementations --- + + +@register_transformer(DatasetType.EVENT, layout="default") +class EventTransformer(DataRecordTransformer[Event, Any]): + """Transformer for Event data.""" + + def _extract_canonical(self, record: Event) -> list[dict[str, Any]]: + row: dict[str, Any] = dict( + event_id=record.event_id, + event_type=( + record.event_type.value + if record.event_type != EventType.GENERIC + else f"GENERIC:{record.event_name}" + ), + period_id=record.period.id, + timestamp=record.timestamp, + end_timestamp=None, + ball_state=record.ball_state.value if record.ball_state else None, + ball_owning_team=( + record.ball_owning_team.team_id + if record.ball_owning_team + else None + ), + team_id=record.team.team_id if record.team else None, + player_id=record.player.player_id if record.player else None, + coordinates_x=record.coordinates.x if record.coordinates else None, + coordinates_y=record.coordinates.y if record.coordinates else None, + ) + + # Event-specific logic + if isinstance(record, PassEvent): + row.update( + { + "end_timestamp": record.receive_timestamp, + "end_coordinates_x": record.receiver_coordinates.x + if record.receiver_coordinates + else None, + "end_coordinates_y": record.receiver_coordinates.y + if record.receiver_coordinates + else None, + "receiver_player_id": record.receiver_player.player_id + if record.receiver_player + else None, + } + ) + elif isinstance(record, CarryEvent): + row.update( + { + "end_timestamp": record.end_timestamp, + "end_coordinates_x": record.end_coordinates.x + if record.end_coordinates + else None, + "end_coordinates_y": record.end_coordinates.y + if record.end_coordinates + else None, + } + ) + elif isinstance(record, ShotEvent): + row.update( + { + "end_coordinates_x": record.result_coordinates.x + if record.result_coordinates + else None, + "end_coordinates_y": record.result_coordinates.y + if record.result_coordinates + else None, + } + ) + elif isinstance(record, CardEvent): + row.update( + { + "card_type": record.card_type.value + if record.card_type + else None + } + ) + + if isinstance(record, QualifierMixin) and record.qualifiers: + for qualifier in record.qualifiers: + row.update(qualifier.to_dict()) + + if isinstance(record, ResultMixin): + row.update( + { + "result": record.result.value if record.result else None, + "success": record.result.is_success + if record.result + else None, + } + ) + else: + row.update( + { + "result": None, + "success": None, + } + ) + + return [row] + + +@register_transformer(DatasetType.TRACKING, layout=["wide", "default"]) +class TrackingWideTransformer(DataRecordTransformer[Frame, Any]): + """Wide-format transformer for Tracking data.""" - return row + def _extract_canonical(self, record: Frame) -> list[dict[str, Any]]: + row: dict[str, Any] = dict( + period_id=record.period.id if record.period else None, + timestamp=record.timestamp, + frame_id=record.frame_id, + ball_state=record.ball_state.value if record.ball_state else None, + ball_owning_team_id=( + record.ball_owning_team.team_id + if record.ball_owning_team + else None + ), + ball_x=record.ball_coordinates.x + if record.ball_coordinates + else None, + ball_y=record.ball_coordinates.y + if record.ball_coordinates + else None, + ball_z=getattr(record.ball_coordinates, "z", None) + if record.ball_coordinates + else None, + ball_speed=record.ball_speed, + ) - self.converter = converter + for player, player_data in record.players_data.items(): + # Flatten player data into columns + prefix = f"{player.player_id}" + row.update( + { + f"{prefix}_x": player_data.coordinates.x + if player_data.coordinates + else None, + f"{prefix}_y": player_data.coordinates.y + if player_data.coordinates + else None, + f"{prefix}_d": player_data.distance, + f"{prefix}_s": player_data.speed, + } + ) + if player_data.other_data: + for k, v in player_data.other_data.items(): + row[f"{prefix}_{k}"] = v - def __call__(self, data_record: T) -> dict[str, Any]: - return self.converter(data_record) + if record.other_data: + row.update(record.other_data) + return [row] -class EventToDictTransformer(DataRecordToDictTransformer[Event]): - def default_transformer(self) -> Callable[[Event], dict]: - return DefaultEventTransformer() +@register_transformer(DatasetType.TRACKING, layout="long") +class TrackingLongTransformer(DataRecordTransformer[Frame, Any]): + """Long-format transformer for Tracking data.""" -class FrameToDictTransformer(DataRecordToDictTransformer[Frame]): - def default_transformer(self) -> Callable[[Frame], dict]: - return DefaultFrameTransformer() + def _extract_canonical(self, record: Frame) -> list[dict[str, Any]]: + rows = [] + base_data = { + "period_id": record.period.id if record.period else None, + "timestamp": record.timestamp, + "frame_id": record.frame_id, + "ball_state": record.ball_state.value + if record.ball_state + else None, + "ball_owning_team_id": ( + record.ball_owning_team.team_id + if record.ball_owning_team + else None + ), + } + if record.other_data: + base_data.update(record.other_data) + # Ball + ball_row = base_data.copy() + ball_row.update( + { + "team_id": "ball", + "player_id": "ball", + "x": record.ball_coordinates.x + if record.ball_coordinates + else None, + "y": record.ball_coordinates.y + if record.ball_coordinates + else None, + "z": getattr(record.ball_coordinates, "z", None) + if record.ball_coordinates + else None, + "s": record.ball_speed, + } + ) + rows.append(ball_row) -class CodeToDictTransformer(DataRecordToDictTransformer[Code]): - def default_transformer(self) -> Callable[[Code], dict]: - return DefaultCodeTransformer() + # Players + for player, player_data in record.players_data.items(): + p_row = base_data.copy() + p_row.update( + { + "team_id": player.team.team_id if player.team else None, + "player_id": player.player_id, + "x": player_data.coordinates.x + if player_data.coordinates + else None, + "y": player_data.coordinates.y + if player_data.coordinates + else None, + "z": getattr(player_data.coordinates, "z", None) + if player_data.coordinates + else None, + "d": player_data.distance, + "s": player_data.speed, + } + ) + if player_data.other_data: + p_row.update(player_data.other_data) + rows.append(p_row) + return rows -class LongLayoutFrameToDictTransformer(DataRecordToDictTransformer[Frame]): - def default_transformer(self) -> Callable[[Frame], list[dict]]: - return LongLayoutFrameTransformer() +@register_transformer(DatasetType.CODE, layout="default") +class CodeTransformer(DataRecordTransformer[Code, Any]): + """Transformer for Code data.""" -def get_transformer_cls( - dataset_type: DatasetType, -) -> type[DataRecordToDictTransformer]: - if dataset_type == DatasetType.EVENT: - return EventToDictTransformer - elif dataset_type == DatasetType.TRACKING: - return FrameToDictTransformer - elif dataset_type == DatasetType.CODE: - return CodeToDictTransformer + def _extract_canonical(self, record: Code) -> list[dict[str, Any]]: + row = dict( + code_id=record.code_id, + period_id=record.period.id if record.period else None, + timestamp=record.timestamp, + end_timestamp=record.end_timestamp, + code=record.code, + ) + row.update(record.labels) + return [row] diff --git a/kloppy/domain/services/transformers/dataset.py b/kloppy/domain/services/transformers/dataset.py index 5de757e54..d0ab6b671 100644 --- a/kloppy/domain/services/transformers/dataset.py +++ b/kloppy/domain/services/transformers/dataset.py @@ -1,3 +1,63 @@ +"""Dataset Transformation. + +This module provides the machinery for transforming the spatial representation of +kloppy datasets (`EventDataset` and `TrackingDataset`). It addresses three key +aspects of spatial data normalization: + +1. **Coordinate Systems**: Converting data from a provider-specific system + (e.g., Opta, Wyscout) to a standardized system (e.g., Kloppy Standard, + Metric). +2. **Pitch Dimensions**: Scaling coordinates from normalized (0-1) ranges to + metric values (meters), or vice-versa, based on specific pitch dimensions. +3. **Orientation**: Flipping coordinates to ensure a consistent attacking + direction (e.g., ensuring the Home team always attacks to the right). + +Key Components: + DatasetTransformer: The core class capable of transforming entire datasets, + individual frames, or specific events. + DatasetTransformerBuilder: A helper factory to construct transformers based + on configuration or provider names. + +Examples: + **1. Standardizing a Dataset** + Convert a dataset to the standard Kloppy coordinate system (Metric, origin + at center, x pointing to opposition goal). + + >>> from kloppy.domain import KloppyCoordinateSystem + >>> # dataset is an EventDataset or TrackingDataset + >>> new_dataset = DatasetTransformer.transform_dataset( + ... dataset, + ... to_coordinate_system=KloppyCoordinateSystem() + ... ) + + **2. Enforcing Orientation** + Force the dataset orientation so the Home team always attacks to the Right + (and the Away team attacks Left), flipping coordinates for the second half + if necessary. + + >>> from kloppy.domain import Orientation + >>> new_dataset = DatasetTransformer.transform_dataset( + ... dataset, + ... to_orientation=Orientation.HOME_AWAY + ... ) + + **3. Scaling to Specific Pitch Dimensions** + If the source data is normalized (0-1), you can project it onto a real pitch size. + + >>> from kloppy.domain import MetricPitchDimensions, Dimension + >>> dims = MetricPitchDimensions( + ... x_dim=Dimension(0, 105), + ... y_dim=Dimension(0, 68), + ... pitch_length=105, + ... pitch_width=68, + ... standardized=False, + ...) + >>> new_dataset = DatasetTransformer.transform_dataset( + ... dataset, + ... to_pitch_dimensions=dims + ... ) +""" + from dataclasses import fields, replace from typing import Optional, Union import warnings diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index f761ae26a..783f57589 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -25,6 +25,7 @@ TrackingDataset, ) from kloppy.domain.services.frame_factory import create_frame +from kloppy.exceptions import KloppyError class TestHelpers: @@ -411,7 +412,7 @@ def test_transform_event_data_freeze_frame(self, base_dir): assert coordinates.x == 1 - coordinates_transformed.x assert coordinates.y == 1 - coordinates_transformed.y - def test_to_pandas(self): + def test_to_pandas_wide_layout(self): tracking_data = self._get_tracking_dataset() data_frame = tracking_data.to_df(engine="pandas") @@ -437,6 +438,48 @@ def test_to_pandas(self): ) assert_frame_equal(data_frame, expected_data_frame, check_like=True) + def test_to_pandas_long_layout(self): + tracking_data = self._get_tracking_dataset() + + # Specify layout="long" + data_frame = tracking_data.to_df(engine="pandas", layout="long") + + expected_data_frame = DataFrame.from_dict( + { + # Row 0: Frame 1 - Ball + # Row 1: Frame 2 - Ball + # Row 2: Frame 2 - Player 'home_1' + "frame_id": [1, 2, 2], + "period_id": [1, 2, 2], + "timestamp": [0.1, 0.2, 0.2], + "ball_state": [None, None, None], + "ball_owning_team_id": ["home", "away", "away"], + # Identifiers + "team_id": ["ball", "ball", "home"], + "player_id": ["ball", "ball", "home_1"], + # Coordinates & Metrics (Unified columns) + "x": [100.0, 0.0, 15.0], + "y": [-50.0, 50.0, 35.0], + "z": [0.0, 1.0, None], # Player has no Z in wide test + "d": [None, None, 0.03], + "s": [None, None, 10.5], + # Metadata + # Note: Frame-level 'extra_data' (value 1 in frame 2) propagates to all rows in that frame + "extra_data": [None, 1, 1], + } + ) + + # check_like=True ignores column order + assert_frame_equal( + data_frame, expected_data_frame, check_like=True, check_dtype=False + ) + + def test_to_pandas_invalid_layout(self): + tracking_data = self._get_tracking_dataset() + + with pytest.raises(KloppyError, match="Layout 'wrong' not found"): + tracking_data.to_df(engine="pandas", layout="wrong") + def test_to_pandas_generic_events(self, base_dir): dataset = opta.load( f7_data=base_dir / "files/opta_f7.xml", diff --git a/kloppy/tests/test_metadata.py b/kloppy/tests/test_metadata.py index a6be8e737..c7c2120ef 100644 --- a/kloppy/tests/test_metadata.py +++ b/kloppy/tests/test_metadata.py @@ -11,7 +11,7 @@ Point3D, Unit, ) -from kloppy.domain.services.transformers import DatasetTransformer +from kloppy.domain.services.transformers.dataset import DatasetTransformer class TestPitchdimensions: diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index 0040b62f0..da6c39258 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -263,72 +263,6 @@ def test_limit_sample(self, raw_data: Path, meta_data: Path): ) assert len(dataset.records) == 100 - def test_long_layout_df(self, raw_data: Path, meta_data: Path): - dataset = sportec.load_tracking( - raw_data=raw_data, - meta_data=meta_data, - coordinates="sportec", - only_alive=True, - limit=100, - ) - - long_df = dataset.to_df(layout="long") - - assert long_df.frame_id.nunique() == 100 - assert len(long_df) == 256 - assert list(long_df.columns) == [ - "period_id", - "timestamp", - "frame_id", - "ball_state", - "ball_owning_team_id", - "team_id", - "player_id", - "x", - "y", - "z", - "d", - "s", - ] - assert len(long_df.dropna(subset=["z"])) == 100 - assert len(long_df[long_df["team_id"] == "ball"]) == 100 - assert long_df.player_id.nunique() == 4 - - def test_enriched_metadata(self, raw_data: Path, meta_data: Path): - dataset = sportec.load_tracking( - raw_data=raw_data, - meta_data=meta_data, - coordinates="sportec", - only_alive=True, - ) - - date = dataset.metadata.date - if date: - assert isinstance(date, datetime) - assert date == datetime( - 2020, 6, 5, 18, 30, 0, 210000, tzinfo=timezone.utc - ) - - game_week = dataset.metadata.game_week - if game_week: - assert isinstance(game_week, str) - assert game_week == "30" - - game_id = dataset.metadata.game_id - if game_id: - assert isinstance(game_id, str) - assert game_id == "DFL-MAT-003BN1" - - home_coach = dataset.metadata.home_coach - if home_coach: - assert isinstance(home_coach, str) - assert home_coach == "C. Streich" - - away_coach = dataset.metadata.away_coach - if away_coach: - assert isinstance(away_coach, str) - assert away_coach == "M. Rose" - def test_referees(self, raw_data_referee: Path, meta_data: Path): dataset = sportec.load_tracking( raw_data=raw_data_referee, From 323a2d9971924afa1c6bea264be12155b5f35921 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 23 Dec 2025 18:14:37 +0100 Subject: [PATCH 16/17] docs: document layout option --- docs/user-guide/exporting-data/dataframes.md | 61 ++++++++++++++++---- kloppy/domain/models/common.py | 37 ++++++++++++ 2 files changed, 87 insertions(+), 11 deletions(-) diff --git a/docs/user-guide/exporting-data/dataframes.md b/docs/user-guide/exporting-data/dataframes.md index 5820d4675..b89c676fe 100644 --- a/docs/user-guide/exporting-data/dataframes.md +++ b/docs/user-guide/exporting-data/dataframes.md @@ -86,24 +86,63 @@ print(f""" ### Tracking data -For a [`TrackingDataset`][kloppy.domain.TrackingDataset], the output columns include: +The [`TrackingDataset`][kloppy.domain.TrackingDataset] supports two different layouts: **Wide** (default) and **Long**. -| Column | Description | -|-------------------------------------------------|---------------------------------------| -| frame_id | Frame number | -| period_id | Match period | -| timestamp | Frame timestamp | -| ball_x, ball_y, ball_z, ball_speed | Ball position and speed | -| _x, _y, _d, _s | Player coordinates, distance (since previous frame), and speed | -| ball_state | Current state of the ball | -| ball_owning_team | Which team owns the ball | +#### Wide layout + +In the wide layout, each row represents a single frame. Player data is flattened into columns, with a specific column for each player's x/y coordinates, speed, etc. + +**Common Columns:** + +| Column | Description | +| :--- | :--- | +| `frame_id` | Frame number | +| `period_id` | Match period | +| `timestamp` | Frame timestamp | +| `ball_state` | Current state of the ball | +| `ball_owning_team` | Which team owns the ball | +| `ball_x`, `ball_y`, `ball_z` | Ball coordinates | +| `ball_speed` | Ball speed | +| `_x`, `_y` | Player coordinates | +| `_d` | Player distance covered (since previous frame) | +| `_s` | Player speed | + +**Example:** + +```python exec="true" html="true" session="export-df" +# Default is layout="wide" +print(f""" +
+{tracking_dataset.to_df(layout="wide").head(n=3).to_html(index=False, border="0")} +
+""") +``` + +#### Long layout + +In the long layout, each frame is "melted" into multiple rows: one for the ball and one for each player present in that frame. + +**Common Columns:** + +| Column | Description | +| :--- | :--- | +| `frame_id` | Frame number | +| `period_id` | Match period | +| `timestamp` | Frame timestamp | +| `team_id` | Team identifier (or "ball") | +| `player_id` | Player identifier (or "ball") | +| `x`, `y`, `z` | Entity coordinates | +| `d` | Entity distance covered (since previous frame) | +| `s` | Entity speed | +| `ball_state` | Current state of the ball (repeated for all rows in frame) | +| `ball_owning_team` | Which team owns the ball (repeated for all rows in frame) | **Example:** ```python exec="true" html="true" session="export-df" print(f"""
-{tracking_dataset.to_df().head(n=3).to_html(index=False, border="0")} +{tracking_dataset.to_df(layout="long").head(n=6).to_html(index=False, border="0")}
""") ``` diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 3db17539b..5b849586a 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1906,6 +1906,43 @@ def to_df( layout: Optional[str] = None, **named_columns: "NamedColumns", ): + """Converts the dataset's records into a DataFrame. + + This method extracts data from the internal records and formats them into + a tabular structure using the specified dataframe engine (Pandas or Polars). + + Args: + *columns: Column names to include in the output. + - If not provided, a default set of columns is returned. + - Supports wildcards (e.g., "*coordinates*"). + - Supports callables for custom extraction logic. + engine: The dataframe engine to use. + - 'pandas': Returns a standard pandas DataFrame. + - 'pandas[pyarrow]': Returns a pandas DataFrame backed by PyArrow. + - 'polars': Returns a Polars DataFrame. + - None: Defaults to the `dataframe.enging` configuration value. + layout: The layout structure of the output. + - For Event data: Default is a flat list of events. + - For Tracking data: + - 'wide' (default): One row per frame, with players as columns. + - 'long': One row per entity (player/ball) per frame ("tidy" data). + **named_columns: Additional columns to create, where the key is the + column name and the value is a literal or a callable applied to + each record. + + Returns: + The dataset as a DataFrame. + + Examples: + Basic conversion to Pandas: + >>> df = dataset.to_df() + + Using Polars and selecting specific columns: + >>> df = dataset.to_df("period_id", "timestamp", "player_id", "coordinates_*", engine="polars") + + Tracking data in long format: + >>> df = tracking_dataset.to_df(layout="long") + """ from kloppy.config import get_config if not engine: From 79b390c855f347ffe24ac0a05cc5acdcb011d96b Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 23 Dec 2025 18:31:17 +0100 Subject: [PATCH 17/17] docs: fix docstring --- kloppy/domain/models/common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 5b849586a..153aa0507 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1930,9 +1930,6 @@ def to_df( column name and the value is a literal or a callable applied to each record. - Returns: - The dataset as a DataFrame. - Examples: Basic conversion to Pandas: >>> df = dataset.to_df()