diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index aea3ea8d6..69566599a 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -978,13 +978,13 @@ class PassEvent( event_type (EventType): `EventType.PASS` event_name (str): `"pass"` result (PassResult): The pass's outcome. - receive_timestamp (Time): The time the pass was received. + receive_timestamp (timedelta): The time the pass was received. receiver_coordinates (Point): The coordinates where the pass was received. receiver_player (Player): The intended receiver of the pass. qualifiers: A list of qualifiers providing additional information about the pass. """ - receive_timestamp: Optional[Time] = None + receive_timestamp: Optional[timedelta] = None receiver_player: Optional[Player] = None receiver_coordinates: Optional[Point] = None @@ -1036,13 +1036,13 @@ class CarryEvent( Attributes: event_type (EventType): `EventType.CARRY` event_name (str): `"carry"` - end_timestamp (Time): Duration of the carry. + end_timestamp (timedelta): Duration of the carry. end_coordinates (Point): Coordinate on the pitch where the carry ended. result (CarryResult): The outcome of the carry. qualifiers: A list of qualifiers providing additional information about the carry. """ - end_timestamp: Time + end_timestamp: timedelta end_coordinates: Point @property @@ -1387,11 +1387,11 @@ class PressureEvent( Attributes: event_type (EventType): `EventType.Pressure` event_name (str): `"pressure"` - end_timestamp (Time): When the pressing ended. + end_timestamp (timedelta): When the pressing ended. qualifiers: A list of qualifiers providing additional information about the pressure event. """ - end_timestamp: Time + end_timestamp: timedelta @property def event_type(self) -> EventType: diff --git a/kloppy/domain/models/time.py b/kloppy/domain/models/time.py index b935373ed..d29539449 100644 --- a/kloppy/domain/models/time.py +++ b/kloppy/domain/models/time.py @@ -211,6 +211,21 @@ def __lt__(self, other): self.period == other.period and self.timestamp < other.timestamp ) + def __le__(self, other): + return self.period < other.period or ( + self.period == other.period and self.timestamp <= other.timestamp + ) + + def __gt__(self, other): + return self.period > other.period or ( + self.period == other.period and self.timestamp > other.timestamp + ) + + def __ge__(self, other): + return self.period > other.period or ( + self.period == other.period and self.timestamp >= other.timestamp + ) + def __str__(self): m, s = divmod(self.timestamp.total_seconds(), 60) return f"P{self.period.id}T{m:02.0f}:{s:02.0f}" diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 28ffcd24a..8a72b7fdf 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -1,71 +1,411 @@ +from dataclasses import dataclass from datetime import timedelta -from typing import List, NamedTuple, Union +from typing import List, NamedTuple, Optional, Dict, Tuple, Union +from enum import Enum -from kloppy.domain import EventDataset, Player, Time, PositionType +from kloppy.domain import ( + EventDataset, + Player, + Team, + Time, + PositionType, + BallState, + FoulCommittedEvent, + PassResult, + SubstitutionEvent, + CardEvent, + PlayerOnEvent, + PlayerOffEvent, + Period, + GenericEvent, + ShotResult, + InterceptionResult, + TakeOnResult, + BallOutEvent, + SetPieceQualifier, +) from kloppy.domain.services.aggregators.aggregator import ( EventDatasetAggregator, ) -class MinutesPlayed(NamedTuple): - player: Player - start_time: Time - end_time: Time - duration: timedelta +class BreakdownKey(Enum): + POSITION = "position" + POSSESSION_STATE = "possession_state" -class MinutesPlayedPerPosition(NamedTuple): - player: Player - position: PositionType +class PossessionState(Enum): + IN_POSSESSION = "in-possession" + OUT_OF_POSSESSION = "out-of-possession" + BALL_DEAD = "ball-dead" + + +EVENTS_CAUSING_DEAD_BALL = ( + FoulCommittedEvent, + SubstitutionEvent, + CardEvent, + PlayerOnEvent, + PlayerOffEvent, + BallOutEvent, +) + +RESULTS_CAUSING_DEAD_BALL = ( + PassResult.OFFSIDE, + ShotResult.GOAL, + ShotResult.OWN_GOAL, + ShotResult.OFF_TARGET, + PassResult.OUT, + InterceptionResult.OUT, + TakeOnResult.OUT, +) + + +@dataclass(frozen=True) +class MinutesPlayedKey: + player: Optional[Player] = None + team: Optional[Team] = None + position: Optional[PositionType] = None + possession_state: Optional[PossessionState] = None + + def __post_init__(self): + if (self.player is None and self.team is None) or ( + self.player is not None and self.team is not None + ): + raise ValueError( + "Either 'player' or 'team' must be provided, but not both." + ) + + +class MinutesPlayed(NamedTuple): + key: MinutesPlayedKey start_time: Time end_time: Time duration: timedelta class MinutesPlayedAggregator(EventDatasetAggregator): - def __init__(self, include_position: bool = False): - self.include_position = include_position + def __init__( + self, breakdown_key: Optional[Union[BreakdownKey, str]] = None + ): + if isinstance(breakdown_key, str): + try: + breakdown_key = BreakdownKey(breakdown_key) + except ValueError: + raise ValueError( + f"BreakdownKey {breakdown_key} not found. Known keys: {', '.join(key.value for key in BreakdownKey)}" + ) + self.breakdown_key = breakdown_key + + @staticmethod + def get_possession_state( + ball_state: BallState, ball_owning_team: Team, team: Team + ): + if ball_state == BallState.DEAD or ball_owning_team is None: + return PossessionState.BALL_DEAD + return ( + PossessionState.IN_POSSESSION + if ball_owning_team == team + else PossessionState.OUT_OF_POSSESSION + ) + + @staticmethod + def _flip_possession_state( + state: PossessionState, flip: bool + ) -> PossessionState: + if flip: + if state == PossessionState.IN_POSSESSION: + return PossessionState.OUT_OF_POSSESSION + elif state == PossessionState.OUT_OF_POSSESSION: + return PossessionState.IN_POSSESSION + return state + + @staticmethod + def _handle_possession_state_end( + time_per_possession_state: Dict[PossessionState, timedelta], + time_per_player: Dict[Player, Dict[PossessionState, timedelta]], + players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], + start_time: Time, + end_time: Time, + ball_state: BallState, + ball_owning_team: Team, + first_team: Team, + ): + possession_state = MinutesPlayedAggregator.get_possession_state( + ball_state, ball_owning_team, first_team + ) + time_per_possession_state[possession_state] += end_time - start_time + MinutesPlayedAggregator._accumulate_player_time( + time_per_player, + players_start_end_times, + start_time, + end_time, + possession_state, + ) + + @staticmethod + def _accumulate_player_time( + time_per_player: Dict[Player, Dict[PossessionState, timedelta]], + players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], + start_time: Time, + end_time: Time, + possession_state: PossessionState, + ): + for player, ( + start_player_time, + end_player_time, + _, + ) in players_start_end_times.items(): + if start_player_time <= end_time and end_player_time >= start_time: + duration = min(end_time, end_player_time) - max( + start_time, start_player_time + ) + time_per_player[player][possession_state] += duration + + def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: - def aggregate( - self, dataset: EventDataset - ) -> List[Union[MinutesPlayedPerPosition, MinutesPlayed]]: items = [] - for team in dataset.metadata.teams: - for player in team.players: - if not self.include_position: + if self.breakdown_key == BreakdownKey.POSITION: + for team in dataset.metadata.teams: + for player in team.players: + for ( + start_timestamp, + end_time, + position, + ) in player.positions.ranges(): + items.append( + MinutesPlayed( + key=MinutesPlayedKey( + player=player, position=position + ), + start_time=start_timestamp, + end_time=end_time, + duration=end_time - start_timestamp, + ) + ) + elif self.breakdown_key == BreakdownKey.POSSESSION_STATE: + first_team = dataset.metadata.teams[0] + + players_start_end_times = {} + for team in dataset.metadata.teams: + for player in team.players: _start_time = None end_time = None for ( - start_time, + start_timestamp, end_time, position, ) in player.positions.ranges(): if not _start_time: - _start_time = start_time + _start_time = start_timestamp if _start_time: - items.append( - MinutesPlayed( - player=player, - start_time=_start_time, - end_time=end_time, - duration=end_time - _start_time, - ) + flip_possession_state = team != first_team + players_start_end_times[player] = ( + _start_time, + end_time, + flip_possession_state, ) + time_per_possession_state = { + state: timedelta(0) for state in PossessionState + } + time_per_player = { + player: {state: timedelta(0) for state in PossessionState} + for player in players_start_end_times.keys() + } + start_time: Optional[Time] = dataset.metadata.periods[0].start_time + ball_owning_team: Optional[Team] = None + ball_state: Optional[BallState] = None + period: Optional[Period] = dataset.metadata.periods[0] + + for event in dataset.events: + if isinstance(event, GenericEvent): + continue + if event.time < start_time: + continue + + if ( + any( + isinstance(q, SetPieceQualifier) + for q in event.qualifiers or [] + ) + and ball_state != BallState.DEAD + ): + # Ball state should be dead, so we mistagged a prior event (for example a clearance that went out of play) + previous_event = event.prev( + lambda x: not isinstance(x, GenericEvent) + ) + if previous_event: + self._handle_possession_state_end( + time_per_possession_state, + time_per_player, + players_start_end_times, + start_time, + previous_event.time, + ball_state, + ball_owning_team, + first_team, + ) + start_time = previous_event.time + ball_state = ( + BallState.DEAD + ) # set current bal state to dead + ball_owning_team = event.ball_owning_team + + actual_event_ball_state = ( + BallState.DEAD + if isinstance(event, EVENTS_CAUSING_DEAD_BALL) + or (event.result in RESULTS_CAUSING_DEAD_BALL) + else event.ball_state + ) + + if ( + actual_event_ball_state == BallState.DEAD + and event.result in RESULTS_CAUSING_DEAD_BALL + ): + + _actual_event_timestamp = ( + event.receive_timestamp + if hasattr(event, "receive_timestamp") + and event.receive_timestamp is not None + else event.end_timestamp + if hasattr(event, "end_timestamp") + and event.end_timestamp is not None + else event.timestamp + ) + actual_event_time = Time( + event.period, _actual_event_timestamp + ) else: - for ( + actual_event_time = event.time + if event.period != period: + + end_time = Time( + period=period, + timestamp=( + period.end_timestamp - period.start_timestamp + ), + ) + self._handle_possession_state_end( + time_per_possession_state, + time_per_player, + players_start_end_times, start_time, end_time, + ball_state, + ball_owning_team, + first_team, + ) + + start_time = actual_event_time + period = event.period + ball_state = actual_event_ball_state + ball_owning_team = event.ball_owning_team + + if ( + actual_event_ball_state != ball_state + or event.ball_owning_team != ball_owning_team + ): + + self._handle_possession_state_end( + time_per_possession_state, + time_per_player, + players_start_end_times, + start_time, + actual_event_time, + ball_state, + ball_owning_team, + first_team, + ) + + start_time = actual_event_time + ball_state = actual_event_ball_state + ball_owning_team = event.ball_owning_team + + # Handle the last event in the period + end_time = Time( + period=period, + timestamp=(period.end_timestamp - period.start_timestamp), + ) + self._handle_possession_state_end( + time_per_possession_state, + time_per_player, + players_start_end_times, + start_time, + end_time, + ball_state, + ball_owning_team, + first_team, + ) + + for team in dataset.metadata.teams: + flip_possession = team != first_team + for state, duration in time_per_possession_state.items(): + possession_state = self._flip_possession_state( + state, flip_possession + ) + + items.append( + MinutesPlayed( + key=MinutesPlayedKey( + team=team, possession_state=possession_state + ), + start_time=dataset.metadata.periods[0].start_time, + end_time=dataset.metadata.periods[1].end_time, + duration=duration, + ) + ) + for player in team.players: + if player in time_per_player: + for state, duration in time_per_player[player].items(): + possession_state = self._flip_possession_state( + state, flip_possession + ) + items.append( + MinutesPlayed( + key=MinutesPlayedKey( + player=player, + possession_state=possession_state, + ), + start_time=dataset.metadata.periods[ + 0 + ].start_time, + end_time=dataset.metadata.periods[ + 1 + ].end_time, + duration=duration, + ) + ) + else: + _start_time = dataset.metadata.periods[0].start_time + _end_time = dataset.metadata.periods[1].end_time + for team in dataset.metadata.teams: + items.append( + MinutesPlayed( + key=MinutesPlayedKey(team=team), + start_time=_start_time, + end_time=_end_time, + duration=_end_time - _start_time, + ) + ) + for player in team.players: + _start_time = None + end_time = None + for ( + start_timestamp, + end_time, position, ) in player.positions.ranges(): + if not _start_time: + _start_time = start_timestamp + + if _start_time: items.append( - MinutesPlayedPerPosition( - player=player, - position=position, - start_time=start_time, + MinutesPlayed( + key=MinutesPlayedKey(player=player), + start_time=_start_time, end_time=end_time, - duration=end_time - start_time, + duration=end_time - _start_time, ) ) diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index c2d2e9a14..e7b0774d6 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -49,6 +49,7 @@ PassType, UnderPressureQualifier, ) +from kloppy.domain.services.aggregators.minutes_played import BreakdownKey from kloppy.exceptions import DeserializationError from kloppy.infra.serializers.event.statsbomb.helpers import parse_str_ts @@ -1226,10 +1227,13 @@ def test_player_position(self, base_dir): event_data=base_dir / "files/statsbomb_event.json", ) - for item in dataset.aggregate("minutes_played", include_position=True): - print( - f"{item.player} {item.player.player_id}- {item.start_time} - {item.end_time} - {item.duration} - {item.position}" - ) + for item in dataset.aggregate( + "minutes_played", breakdown_key="position" + ): + if item.key.player and item.key.position: + print( + f"{item.key.player} {item.key.player.player_id}- {item.start_time} - {item.end_time} - {item.duration} - {item.key.position}" + ) home_team, away_team = dataset.metadata.teams period1, period2 = dataset.metadata.periods diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 4c14065b9..39740d7ae 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -5,6 +5,10 @@ from kloppy import statsbomb from kloppy.domain import Time, Period, TimeContainer +from kloppy.domain.services.aggregators.minutes_played import ( + BreakdownKey, + PossessionState, +) @pytest.fixture @@ -155,8 +159,10 @@ def __period_offset(period_id, dataset): home_team, away_team = dataset.metadata.teams - minutes_played_map = { - item.player: item.duration for item in minutes_played + player_minutes_played_map = { + item.key.player: item.duration + for item in minutes_played + if item.key.player } """ @@ -168,27 +174,35 @@ def __period_offset(period_id, dataset): # Didn't play player_malcon = home_team.get_player_by_id(3109) - assert player_malcon not in minutes_played_map + assert player_malcon not in player_minutes_played_map # Started second half player_coutinho = home_team.get_player_by_id(3501) - assert minutes_played_map[player_coutinho] == timedelta( + assert player_minutes_played_map[player_coutinho] == timedelta( seconds=2852.053 ) # Replaced in second half player_busquets = home_team.get_player_by_id(5203) - assert minutes_played_map[player_busquets] == timedelta( + assert player_minutes_played_map[player_busquets] == timedelta( seconds=5052.343 ) # Played entire match player_ramos = home_team.get_player_by_id(5211) - assert minutes_played_map[player_ramos] == ( + assert player_minutes_played_map[player_ramos] == ( dataset.metadata.periods[0].duration + dataset.metadata.periods[1].duration ) + # Teams played entire match + for item in minutes_played: + if item.key.team: + assert item.duration == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + # Check if total difference between start and end time equal minutes played for item in minutes_played: assert item.duration.total_seconds() == pytest.approx( @@ -210,6 +224,117 @@ def __period_offset(period_id, dataset): 0.001, ) + def test_statsbomb_minutes_played_per_possession_state(self, base_dir): + dataset = statsbomb.load( + # 3788741 + # 15986 + lineup_data=base_dir / "files/statsbomb_lineup.json", + event_data=base_dir / "files/statsbomb_event.json", + ) + + minutes_played_per_possession_state = dataset.aggregate( + "minutes_played", breakdown_key=BreakdownKey.POSSESSION_STATE + ) + + player_minutes_played_map = {} + team_minutes_played_map = {} + for item in minutes_played_per_possession_state: + if item.key.team: + team = item.key.team + if team not in team_minutes_played_map: + team_minutes_played_map[team] = {} + team_minutes_played_map[team][ + item.key.possession_state + ] = item.duration + else: + player = item.key.player + possession_state = item.key.possession_state + duration = item.duration + + if player and possession_state: + if player not in player_minutes_played_map: + player_minutes_played_map[player] = {} + player_minutes_played_map[player][ + possession_state + ] = duration + + # Teams played entire match + for item in team_minutes_played_map.values(): + total_duration = sum(item.values(), timedelta()) + assert total_duration == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + + teams = list(team_minutes_played_map.keys()) + assert ( + team_minutes_played_map[teams[0]][PossessionState.IN_POSSESSION] + == team_minutes_played_map[teams[1]][ + PossessionState.OUT_OF_POSSESSION + ] + ) + assert ( + team_minutes_played_map[teams[0]][ + PossessionState.OUT_OF_POSSESSION + ] + == team_minutes_played_map[teams[1]][PossessionState.IN_POSSESSION] + ) + assert ( + team_minutes_played_map[teams[0]][PossessionState.BALL_DEAD] + == team_minutes_played_map[teams[1]][PossessionState.BALL_DEAD] + ) + + home_team, away_team = dataset.metadata.teams + + """ + 3109 - 0:00:00.000000 - Malcom + 3501 - 0:47:32.053000 - Coutinho + 5203 - 1:24:12.343000 - Busquets + 5211 - 1:32:37.320000 - Ramos + """ + # Didn't play + player_malcon = home_team.get_player_by_id(3109) + assert player_malcon not in player_minutes_played_map + + # Started second half + player_coutinho = home_team.get_player_by_id(3501) + playtime_coutinho = player_minutes_played_map[player_coutinho] + assert sum(playtime_coutinho.values(), timedelta()) == timedelta( + seconds=2852.053 + ) + assert playtime_coutinho == { + PossessionState.IN_POSSESSION: timedelta(seconds=1559.075), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=287.916), + PossessionState.BALL_DEAD: timedelta(seconds=1005.062), + } + + # Replaced in second half + player_busquets = home_team.get_player_by_id(5203) + playtime_busquets = player_minutes_played_map[player_busquets] + assert sum(playtime_busquets.values(), timedelta()) == timedelta( + seconds=5052.343 + ) + + assert playtime_busquets == { + PossessionState.IN_POSSESSION: timedelta(seconds=2751.232000), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=611.662000), + PossessionState.BALL_DEAD: timedelta(seconds=1689.449000), + } + + # Played entire match + player_ramos = home_team.get_player_by_id(5211) + playtime_ramos = player_minutes_played_map[player_ramos] + assert sum(playtime_ramos.values(), timedelta()) == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + assert playtime_ramos == { + PossessionState.IN_POSSESSION: timedelta(seconds=3016.821000), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=677.191000), + PossessionState.BALL_DEAD: timedelta(seconds=1863.308000), + } + assert playtime_ramos == team_minutes_played_map[home_team] + class TestAbsTimeContainer: def test_value_at(self, periods):