From 1c6b7bdbaf7f18f4ae759e4835c277b43575ab7b Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Thu, 5 Jun 2025 14:21:07 +0200 Subject: [PATCH 01/10] Extend MinutesPlayedAggregator with minutes played per possession state --- kloppy/domain/models/time.py | 13 + .../services/aggregators/minutes_played.py | 251 +++++++++++++++--- kloppy/tests/test_statsbomb.py | 10 +- kloppy/tests/test_time.py | 105 +++++++- 4 files changed, 334 insertions(+), 45 deletions(-) diff --git a/kloppy/domain/models/time.py b/kloppy/domain/models/time.py index b935373ed..cfea92365 100644 --- a/kloppy/domain/models/time.py +++ b/kloppy/domain/models/time.py @@ -210,6 +210,19 @@ def __lt__(self, other): return self.period < other.period or ( self.period == other.period and self.timestamp < other.timestamp ) + def __le__(self, other): + return self.period < other.period or ( + self.period == other.period and self.timestamp <= other.timestamp + ) + def __gt__(self, other): + return self.period > other.period or ( + self.period == other.period and self.timestamp > other.timestamp + ) + def __ge__(self, other): + return self.period > other.period or ( + self.period == other.period and self.timestamp >= other.timestamp + ) + def __str__(self): m, s = divmod(self.timestamp.total_seconds(), 60) diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 28ffcd24a..73cefd544 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -1,72 +1,253 @@ +from dataclasses import dataclass from datetime import timedelta -from typing import List, NamedTuple, Union +from typing import List, NamedTuple, Optional +from enum import Enum -from kloppy.domain import EventDataset, Player, Time, PositionType +from kloppy.domain import EventDataset, Player, Team, Time, PositionType, BallState, FoulCommittedEvent, \ + PassResult, SubstitutionEvent, CardEvent, PlayerOnEvent, PlayerOffEvent, Period, GenericEvent, ShotResult from kloppy.domain.services.aggregators.aggregator import ( EventDatasetAggregator, ) +class BreakdownKey(Enum): + POSITION = "position" + POSSESSION_STATE = "possession_state" +class PossessionState(Enum): + IN_POSSESSION = 'in-possession' + OUT_OF_POSSESSION = 'out-of-possession' + BALL_DEAD = 'ball-dead' -class MinutesPlayed(NamedTuple): - player: Player - start_time: Time - end_time: Time - duration: timedelta +EVENTS_CAUSING_DEAD_BALL = ( + FoulCommittedEvent, + SubstitutionEvent, + CardEvent, + PlayerOnEvent, + PlayerOffEvent, +) + +RESULTS_CAUSING_DEAD_BALL = ( + PassResult.OFFSIDE, + ShotResult.GOAL, + ShotResult.OWN_GOAL, +) -class MinutesPlayedPerPosition(NamedTuple): - player: Player - position: PositionType +@dataclass(frozen=True) +class MinutesPlayedKey: + player: Optional[Player] = None + team: Optional[Team] = None + position: Optional[PositionType] = None + possession_state: Optional[PossessionState] = None + + def __post_init__(self): + if (self.player is None and self.team is None) or (self.player is not None and self.team is not None): + raise ValueError("Either 'player' or 'team' must be provided, but not both.") + + + + +class MinutesPlayed(NamedTuple): + key: MinutesPlayedKey start_time: Time end_time: Time duration: timedelta class MinutesPlayedAggregator(EventDatasetAggregator): - def __init__(self, include_position: bool = False): - self.include_position = include_position + def __init__(self, breakdown_key: Optional[BreakdownKey] = None): + self.breakdown_key = breakdown_key + + def get_possession_state(self, ball_state: BallState, ball_owning_team: Team, team: Team): + """Determine the possession state.""" + if ball_state == BallState.DEAD or ball_owning_team is None: + return PossessionState.BALL_DEAD + return ( + PossessionState.IN_POSSESSION + if ball_owning_team == team + else PossessionState.OUT_OF_POSSESSION + ) + + def _flip_possession_state(self, state: PossessionState, flip: bool) -> PossessionState: + if flip: + if state == PossessionState.IN_POSSESSION: + return PossessionState.OUT_OF_POSSESSION + elif state == PossessionState.OUT_OF_POSSESSION: + return PossessionState.IN_POSSESSION + return state + + def _accumulate_player_time( + self, + time_per_player: dict[Player, dict[PossessionState, timedelta]], + players_start_end_times: dict[Player, tuple[Time, Time, bool]], + start_time: Time, + end_time: Time, + possession_state: PossessionState + ): + for player, (start_player_time, end_player_time, _) in players_start_end_times.items(): + if start_player_time <= end_time and end_player_time >= start_time: + duration = min(end_time, end_player_time) - max(start_time, start_player_time) + time_per_player[player][possession_state] += duration def aggregate( self, dataset: EventDataset - ) -> List[Union[MinutesPlayedPerPosition, MinutesPlayed]]: + ) -> List[MinutesPlayed]: items = [] - for team in dataset.metadata.teams: - for player in team.players: - if not self.include_position: + if self.breakdown_key == BreakdownKey.POSITION: + for team in dataset.metadata.teams: + for player in team.players: + for ( + start_timestamp, + end_time, + position, + ) in player.positions.ranges(): + items.append( + MinutesPlayed( + key=MinutesPlayedKey(player=player, position=position), + start_time=start_timestamp, + end_time=end_time, + duration=end_time - start_timestamp, + ) + ) + elif self.breakdown_key == BreakdownKey.POSSESSION_STATE: + first_team = dataset.metadata.teams[0] + + players_start_end_times = {} + for team in dataset.metadata.teams: + for player in team.players: _start_time = None end_time = None for ( + start_timestamp, + end_time, + position, + ) in player.positions.ranges(): + if not _start_time: + _start_time = start_timestamp + + if _start_time: + flip_possession_state = (team != first_team) + players_start_end_times[player] = (_start_time, end_time, flip_possession_state) + time_per_possession_state = { + state: timedelta(0) for state in PossessionState + } + time_per_player = { + player: {state: timedelta(0) for state in PossessionState} for player in players_start_end_times.keys() + } + start_time: Optional[Time] = dataset.metadata.periods[0].start_time + ball_owning_team: Optional[Team] = None + ball_state: Optional[BallState] = None + period: Optional[Period] = dataset.metadata.periods[0] + for event in dataset.events: + if isinstance(event, GenericEvent): + continue + actual_event_ball_state = ( + BallState.DEAD + if isinstance(event, EVENTS_CAUSING_DEAD_BALL) or + (event.result in RESULTS_CAUSING_DEAD_BALL) + else event.ball_state + ) + if event.period != period: + + possession_state = self.get_possession_state(ball_state, ball_owning_team, first_team) + end_time = Time(period=period, timestamp=(period.end_timestamp - period.start_timestamp)) + time_per_possession_state[possession_state] += end_time - start_time + self._accumulate_player_time( + time_per_player, + players_start_end_times, start_time, end_time, - position, + possession_state + ) + + start_time = event.time + period = event.period + ball_state = actual_event_ball_state + ball_owning_team = event.ball_owning_team + + if actual_event_ball_state != ball_state or event.ball_owning_team != ball_owning_team: + + possession_state = self.get_possession_state(ball_state, ball_owning_team, first_team) + time_per_possession_state[possession_state] += event.time - start_time + self._accumulate_player_time( + time_per_player, + players_start_end_times, + start_time, + event.time, + possession_state + ) + + start_time = event.time + ball_state = actual_event_ball_state + ball_owning_team = event.ball_owning_team + # Handle the last event in the period + possession_state = self.get_possession_state(ball_state, ball_owning_team, first_team) + end_time = Time(period=period, timestamp=(period.end_timestamp - period.start_timestamp)) + time_per_possession_state[possession_state] += end_time - start_time + self._accumulate_player_time( + time_per_player, + players_start_end_times, + start_time, + end_time, + possession_state + ) + + for team in dataset.metadata.teams: + flip_possession = (team != first_team) + for state, duration in time_per_possession_state.items(): + possession_state = self._flip_possession_state(state, flip_possession) + + items.append( + MinutesPlayed( + key=MinutesPlayedKey(team=team, possession_state=possession_state), + start_time=dataset.metadata.periods[0].start_time, + end_time=dataset.metadata.periods[1].end_time, + duration=duration, + ) + ) + for player in team.players: + if player in time_per_player: + for state, duration in time_per_player[player].items(): + possession_state = self._flip_possession_state(state, flip_possession) + items.append( + MinutesPlayed( + key=MinutesPlayedKey(player=player, possession_state=possession_state), + start_time=dataset.metadata.periods[0].start_time, + end_time=dataset.metadata.periods[1].end_time, + duration=duration, + ) + ) + else: + _start_time = dataset.metadata.periods[0].start_time + _end_time = dataset.metadata.periods[1].end_time + for team in dataset.metadata.teams: + items.append( + MinutesPlayed( + key=MinutesPlayedKey(team=team), + start_time=_start_time, + end_time=_end_time, + duration=_end_time - _start_time, + ) + ) + for player in team.players: + _start_time = None + end_time = None + for ( + start_timestamp, + end_time, + position, ) in player.positions.ranges(): if not _start_time: - _start_time = start_time + _start_time = start_timestamp if _start_time: items.append( MinutesPlayed( - player=player, + key=MinutesPlayedKey(player=player), start_time=_start_time, end_time=end_time, duration=end_time - _start_time, ) ) - else: - for ( - start_time, - end_time, - position, - ) in player.positions.ranges(): - items.append( - MinutesPlayedPerPosition( - player=player, - position=position, - start_time=start_time, - end_time=end_time, - duration=end_time - start_time, - ) - ) - return items + return items \ No newline at end of file diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index c2d2e9a14..4a4005edc 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -49,6 +49,7 @@ PassType, UnderPressureQualifier, ) +from kloppy.domain.services.aggregators.minutes_played import BreakdownKey from kloppy.exceptions import DeserializationError from kloppy.infra.serializers.event.statsbomb.helpers import parse_str_ts @@ -1226,10 +1227,11 @@ def test_player_position(self, base_dir): event_data=base_dir / "files/statsbomb_event.json", ) - for item in dataset.aggregate("minutes_played", include_position=True): - print( - f"{item.player} {item.player.player_id}- {item.start_time} - {item.end_time} - {item.duration} - {item.position}" - ) + for item in dataset.aggregate("minutes_played", breakdown_key = BreakdownKey.POSITION): + if item.key.player and item.key.position: + print( + f"{item.key.player} {item.key.player.player_id}- {item.start_time} - {item.end_time} - {item.duration} - {item.key.position}" + ) home_team, away_team = dataset.metadata.teams period1, period2 = dataset.metadata.periods diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 4c14065b9..94d09bfcb 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -5,6 +5,7 @@ from kloppy import statsbomb from kloppy.domain import Time, Period, TimeContainer +from kloppy.domain.services.aggregators.minutes_played import BreakdownKey, PossessionState @pytest.fixture @@ -155,8 +156,8 @@ def __period_offset(period_id, dataset): home_team, away_team = dataset.metadata.teams - minutes_played_map = { - item.player: item.duration for item in minutes_played + player_minutes_played_map = { + item.key.player: item.duration for item in minutes_played if item.key.player } """ @@ -168,27 +169,35 @@ def __period_offset(period_id, dataset): # Didn't play player_malcon = home_team.get_player_by_id(3109) - assert player_malcon not in minutes_played_map + assert player_malcon not in player_minutes_played_map # Started second half player_coutinho = home_team.get_player_by_id(3501) - assert minutes_played_map[player_coutinho] == timedelta( + assert player_minutes_played_map[player_coutinho] == timedelta( seconds=2852.053 ) # Replaced in second half player_busquets = home_team.get_player_by_id(5203) - assert minutes_played_map[player_busquets] == timedelta( + assert player_minutes_played_map[player_busquets] == timedelta( seconds=5052.343 ) # Played entire match player_ramos = home_team.get_player_by_id(5211) - assert minutes_played_map[player_ramos] == ( + assert player_minutes_played_map[player_ramos] == ( dataset.metadata.periods[0].duration + dataset.metadata.periods[1].duration ) + # Teams played entire match + for item in minutes_played: + if item.key.team: + assert item.duration == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + # Check if total difference between start and end time equal minutes played for item in minutes_played: assert item.duration.total_seconds() == pytest.approx( @@ -209,6 +218,90 @@ def __period_offset(period_id, dataset): ), 0.001, ) + def test_statsbomb_minutes_played_per_possession_state(self, base_dir): + dataset = statsbomb.load( + # 3788741 + # 15986 + lineup_data=base_dir / "files/statsbomb_lineup.json", + event_data=base_dir / "files/statsbomb_event.json", + ) + + minutes_played_per_possession_state = dataset.aggregate("minutes_played", breakdown_key= BreakdownKey.POSSESSION_STATE) + + + player_minutes_played_map = {} + team_minutes_played_map = {} + for item in minutes_played_per_possession_state: + if item.key.team: + team = item.key.team + if team not in team_minutes_played_map: + team_minutes_played_map[team] = {} + team_minutes_played_map[team][item.key.possession_state] = item.duration + else: + player = item.key.player + possession_state = item.key.possession_state + duration = item.duration + + if player and possession_state: + if player not in player_minutes_played_map: + player_minutes_played_map[player] = {} + player_minutes_played_map[player][possession_state] = duration + + + + + # Teams played entire match + for item in team_minutes_played_map.values(): + total_duration = sum(item.values(), timedelta()) + assert total_duration == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + + home_team, away_team = dataset.metadata.teams + + """ + 3109 - 0:00:00.000000 - Malcom + 3501 - 0:47:32.053000 - Coutinho + 5203 - 1:24:12.343000 - Busquets + 5211 - 1:32:37.320000 - Ramos + """ + # Didn't play + player_malcon = home_team.get_player_by_id(3109) + assert player_malcon not in player_minutes_played_map + + # Started second half + player_coutinho = home_team.get_player_by_id(3501) + playtime_coutinho = player_minutes_played_map[player_coutinho] + assert sum(playtime_coutinho.values(), timedelta()) == timedelta( + seconds=2852.053 + ) + assert playtime_coutinho[PossessionState.IN_POSSESSION] == timedelta(seconds=1505.312) + assert playtime_coutinho[PossessionState.OUT_OF_POSSESSION] == timedelta(seconds=296.958) + assert playtime_coutinho[PossessionState.BALL_DEAD] == timedelta(seconds=1049.783) + + + # Replaced in second half + player_busquets = home_team.get_player_by_id(5203) + playtime_busquets = player_minutes_played_map[player_busquets] + assert sum(playtime_busquets.values(), timedelta()) == timedelta( + seconds=5052.343 + ) + assert playtime_busquets[PossessionState.IN_POSSESSION] == timedelta(seconds=2917.724) + assert playtime_busquets[PossessionState.OUT_OF_POSSESSION] == timedelta(seconds=652.343) + assert playtime_busquets[PossessionState.BALL_DEAD] == timedelta(seconds=1482.276) + + # Played entire match + player_ramos = home_team.get_player_by_id(5211) + playtime_ramos = player_minutes_played_map[player_ramos] + assert sum(playtime_ramos.values(), timedelta()) == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + assert playtime_ramos[PossessionState.IN_POSSESSION] == timedelta(seconds=3020.965) + assert playtime_ramos[PossessionState.OUT_OF_POSSESSION] == timedelta(seconds=716.177) + assert playtime_ramos[PossessionState.BALL_DEAD] == timedelta(seconds=1820.178) + class TestAbsTimeContainer: From a6e1521903b2ff538f12cb2bf2d72db06f0cca24 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Thu, 5 Jun 2025 14:43:38 +0200 Subject: [PATCH 02/10] resolve type annotation errors --- kloppy/domain/services/aggregators/minutes_played.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 73cefd544..12d9b1686 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from datetime import timedelta -from typing import List, NamedTuple, Optional +from typing import List, NamedTuple, Optional, Dict, Tuple from enum import Enum from kloppy.domain import EventDataset, Player, Team, Time, PositionType, BallState, FoulCommittedEvent, \ @@ -77,8 +77,8 @@ def _flip_possession_state(self, state: PossessionState, flip: bool) -> Possessi def _accumulate_player_time( self, - time_per_player: dict[Player, dict[PossessionState, timedelta]], - players_start_end_times: dict[Player, tuple[Time, Time, bool]], + time_per_player: Dict[Player, Dict[PossessionState, timedelta]], + players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], start_time: Time, end_time: Time, possession_state: PossessionState From de10677137f9137635b35566b41a0f4b456700e4 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Thu, 5 Jun 2025 14:49:03 +0200 Subject: [PATCH 03/10] format with black --- kloppy/domain/models/time.py | 6 +- .../services/aggregators/minutes_played.py | 186 ++++++++++++------ kloppy/tests/test_statsbomb.py | 4 +- kloppy/tests/test_time.py | 74 ++++--- 4 files changed, 185 insertions(+), 85 deletions(-) diff --git a/kloppy/domain/models/time.py b/kloppy/domain/models/time.py index cfea92365..d29539449 100644 --- a/kloppy/domain/models/time.py +++ b/kloppy/domain/models/time.py @@ -210,20 +210,22 @@ def __lt__(self, other): return self.period < other.period or ( self.period == other.period and self.timestamp < other.timestamp ) + def __le__(self, other): return self.period < other.period or ( - self.period == other.period and self.timestamp <= other.timestamp + self.period == other.period and self.timestamp <= other.timestamp ) + def __gt__(self, other): return self.period > other.period or ( self.period == other.period and self.timestamp > other.timestamp ) + def __ge__(self, other): return self.period > other.period or ( self.period == other.period and self.timestamp >= other.timestamp ) - def __str__(self): m, s = divmod(self.timestamp.total_seconds(), 60) return f"P{self.period.id}T{m:02.0f}:{s:02.0f}" diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 12d9b1686..51d8f8160 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -3,19 +3,37 @@ from typing import List, NamedTuple, Optional, Dict, Tuple from enum import Enum -from kloppy.domain import EventDataset, Player, Team, Time, PositionType, BallState, FoulCommittedEvent, \ - PassResult, SubstitutionEvent, CardEvent, PlayerOnEvent, PlayerOffEvent, Period, GenericEvent, ShotResult +from kloppy.domain import ( + EventDataset, + Player, + Team, + Time, + PositionType, + BallState, + FoulCommittedEvent, + PassResult, + SubstitutionEvent, + CardEvent, + PlayerOnEvent, + PlayerOffEvent, + Period, + GenericEvent, + ShotResult, +) from kloppy.domain.services.aggregators.aggregator import ( EventDatasetAggregator, ) + + class BreakdownKey(Enum): POSITION = "position" POSSESSION_STATE = "possession_state" + class PossessionState(Enum): - IN_POSSESSION = 'in-possession' - OUT_OF_POSSESSION = 'out-of-possession' - BALL_DEAD = 'ball-dead' + IN_POSSESSION = "in-possession" + OUT_OF_POSSESSION = "out-of-possession" + BALL_DEAD = "ball-dead" EVENTS_CAUSING_DEAD_BALL = ( @@ -32,6 +50,7 @@ class PossessionState(Enum): ShotResult.OWN_GOAL, ) + @dataclass(frozen=True) class MinutesPlayedKey: player: Optional[Player] = None @@ -40,10 +59,12 @@ class MinutesPlayedKey: possession_state: Optional[PossessionState] = None def __post_init__(self): - if (self.player is None and self.team is None) or (self.player is not None and self.team is not None): - raise ValueError("Either 'player' or 'team' must be provided, but not both.") - - + if (self.player is None and self.team is None) or ( + self.player is not None and self.team is not None + ): + raise ValueError( + "Either 'player' or 'team' must be provided, but not both." + ) class MinutesPlayed(NamedTuple): @@ -57,7 +78,9 @@ class MinutesPlayedAggregator(EventDatasetAggregator): def __init__(self, breakdown_key: Optional[BreakdownKey] = None): self.breakdown_key = breakdown_key - def get_possession_state(self, ball_state: BallState, ball_owning_team: Team, team: Team): + def get_possession_state( + self, ball_state: BallState, ball_owning_team: Team, team: Team + ): """Determine the possession state.""" if ball_state == BallState.DEAD or ball_owning_team is None: return PossessionState.BALL_DEAD @@ -67,7 +90,9 @@ def get_possession_state(self, ball_state: BallState, ball_owning_team: Team, te else PossessionState.OUT_OF_POSSESSION ) - def _flip_possession_state(self, state: PossessionState, flip: bool) -> PossessionState: + def _flip_possession_state( + self, state: PossessionState, flip: bool + ) -> PossessionState: if flip: if state == PossessionState.IN_POSSESSION: return PossessionState.OUT_OF_POSSESSION @@ -76,34 +101,40 @@ def _flip_possession_state(self, state: PossessionState, flip: bool) -> Possessi return state def _accumulate_player_time( - self, - time_per_player: Dict[Player, Dict[PossessionState, timedelta]], - players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], - start_time: Time, - end_time: Time, - possession_state: PossessionState + self, + time_per_player: Dict[Player, Dict[PossessionState, timedelta]], + players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], + start_time: Time, + end_time: Time, + possession_state: PossessionState, ): - for player, (start_player_time, end_player_time, _) in players_start_end_times.items(): + for player, ( + start_player_time, + end_player_time, + _, + ) in players_start_end_times.items(): if start_player_time <= end_time and end_player_time >= start_time: - duration = min(end_time, end_player_time) - max(start_time, start_player_time) + duration = min(end_time, end_player_time) - max( + start_time, start_player_time + ) time_per_player[player][possession_state] += duration - def aggregate( - self, dataset: EventDataset - ) -> List[MinutesPlayed]: + def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: items = [] if self.breakdown_key == BreakdownKey.POSITION: for team in dataset.metadata.teams: for player in team.players: for ( - start_timestamp, - end_time, - position, + start_timestamp, + end_time, + position, ) in player.positions.ranges(): items.append( MinutesPlayed( - key=MinutesPlayedKey(player=player, position=position), + key=MinutesPlayedKey( + player=player, position=position + ), start_time=start_timestamp, end_time=end_time, duration=end_time - start_timestamp, @@ -118,21 +149,26 @@ def aggregate( _start_time = None end_time = None for ( - start_timestamp, - end_time, - position, + start_timestamp, + end_time, + position, ) in player.positions.ranges(): if not _start_time: _start_time = start_timestamp if _start_time: - flip_possession_state = (team != first_team) - players_start_end_times[player] = (_start_time, end_time, flip_possession_state) + flip_possession_state = team != first_team + players_start_end_times[player] = ( + _start_time, + end_time, + flip_possession_state, + ) time_per_possession_state = { state: timedelta(0) for state in PossessionState } time_per_player = { - player: {state: timedelta(0) for state in PossessionState} for player in players_start_end_times.keys() + player: {state: timedelta(0) for state in PossessionState} + for player in players_start_end_times.keys() } start_time: Optional[Time] = dataset.metadata.periods[0].start_time ball_owning_team: Optional[Team] = None @@ -143,21 +179,30 @@ def aggregate( continue actual_event_ball_state = ( BallState.DEAD - if isinstance(event, EVENTS_CAUSING_DEAD_BALL) or - (event.result in RESULTS_CAUSING_DEAD_BALL) + if isinstance(event, EVENTS_CAUSING_DEAD_BALL) + or (event.result in RESULTS_CAUSING_DEAD_BALL) else event.ball_state ) if event.period != period: - possession_state = self.get_possession_state(ball_state, ball_owning_team, first_team) - end_time = Time(period=period, timestamp=(period.end_timestamp - period.start_timestamp)) - time_per_possession_state[possession_state] += end_time - start_time + possession_state = self.get_possession_state( + ball_state, ball_owning_team, first_team + ) + end_time = Time( + period=period, + timestamp=( + period.end_timestamp - period.start_timestamp + ), + ) + time_per_possession_state[possession_state] += ( + end_time - start_time + ) self._accumulate_player_time( time_per_player, players_start_end_times, start_time, end_time, - possession_state + possession_state, ) start_time = event.time @@ -165,41 +210,59 @@ def aggregate( ball_state = actual_event_ball_state ball_owning_team = event.ball_owning_team - if actual_event_ball_state != ball_state or event.ball_owning_team != ball_owning_team: + if ( + actual_event_ball_state != ball_state + or event.ball_owning_team != ball_owning_team + ): - possession_state = self.get_possession_state(ball_state, ball_owning_team, first_team) - time_per_possession_state[possession_state] += event.time - start_time + possession_state = self.get_possession_state( + ball_state, ball_owning_team, first_team + ) + time_per_possession_state[possession_state] += ( + event.time - start_time + ) self._accumulate_player_time( time_per_player, players_start_end_times, start_time, event.time, - possession_state + possession_state, ) start_time = event.time ball_state = actual_event_ball_state ball_owning_team = event.ball_owning_team # Handle the last event in the period - possession_state = self.get_possession_state(ball_state, ball_owning_team, first_team) - end_time = Time(period=period, timestamp=(period.end_timestamp - period.start_timestamp)) - time_per_possession_state[possession_state] += end_time - start_time + possession_state = self.get_possession_state( + ball_state, ball_owning_team, first_team + ) + end_time = Time( + period=period, + timestamp=(period.end_timestamp - period.start_timestamp), + ) + time_per_possession_state[possession_state] += ( + end_time - start_time + ) self._accumulate_player_time( time_per_player, players_start_end_times, start_time, end_time, - possession_state + possession_state, ) for team in dataset.metadata.teams: - flip_possession = (team != first_team) + flip_possession = team != first_team for state, duration in time_per_possession_state.items(): - possession_state = self._flip_possession_state(state, flip_possession) + possession_state = self._flip_possession_state( + state, flip_possession + ) items.append( MinutesPlayed( - key=MinutesPlayedKey(team=team, possession_state=possession_state), + key=MinutesPlayedKey( + team=team, possession_state=possession_state + ), start_time=dataset.metadata.periods[0].start_time, end_time=dataset.metadata.periods[1].end_time, duration=duration, @@ -208,12 +271,21 @@ def aggregate( for player in team.players: if player in time_per_player: for state, duration in time_per_player[player].items(): - possession_state = self._flip_possession_state(state, flip_possession) + possession_state = self._flip_possession_state( + state, flip_possession + ) items.append( MinutesPlayed( - key=MinutesPlayedKey(player=player, possession_state=possession_state), - start_time=dataset.metadata.periods[0].start_time, - end_time=dataset.metadata.periods[1].end_time, + key=MinutesPlayedKey( + player=player, + possession_state=possession_state, + ), + start_time=dataset.metadata.periods[ + 0 + ].start_time, + end_time=dataset.metadata.periods[ + 1 + ].end_time, duration=duration, ) ) @@ -233,9 +305,9 @@ def aggregate( _start_time = None end_time = None for ( - start_timestamp, - end_time, - position, + start_timestamp, + end_time, + position, ) in player.positions.ranges(): if not _start_time: _start_time = start_timestamp @@ -250,4 +322,4 @@ def aggregate( ) ) - return items \ No newline at end of file + return items diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 4a4005edc..563ab2483 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -1227,7 +1227,9 @@ def test_player_position(self, base_dir): event_data=base_dir / "files/statsbomb_event.json", ) - for item in dataset.aggregate("minutes_played", breakdown_key = BreakdownKey.POSITION): + for item in dataset.aggregate( + "minutes_played", breakdown_key=BreakdownKey.POSITION + ): if item.key.player and item.key.position: print( f"{item.key.player} {item.key.player.player_id}- {item.start_time} - {item.end_time} - {item.duration} - {item.key.position}" diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 94d09bfcb..d4c273093 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -5,7 +5,10 @@ from kloppy import statsbomb from kloppy.domain import Time, Period, TimeContainer -from kloppy.domain.services.aggregators.minutes_played import BreakdownKey, PossessionState +from kloppy.domain.services.aggregators.minutes_played import ( + BreakdownKey, + PossessionState, +) @pytest.fixture @@ -157,7 +160,9 @@ def __period_offset(period_id, dataset): home_team, away_team = dataset.metadata.teams player_minutes_played_map = { - item.key.player: item.duration for item in minutes_played if item.key.player + item.key.player: item.duration + for item in minutes_played + if item.key.player } """ @@ -218,6 +223,7 @@ def __period_offset(period_id, dataset): ), 0.001, ) + def test_statsbomb_minutes_played_per_possession_state(self, base_dir): dataset = statsbomb.load( # 3788741 @@ -226,8 +232,9 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): event_data=base_dir / "files/statsbomb_event.json", ) - minutes_played_per_possession_state = dataset.aggregate("minutes_played", breakdown_key= BreakdownKey.POSSESSION_STATE) - + minutes_played_per_possession_state = dataset.aggregate( + "minutes_played", breakdown_key=BreakdownKey.POSSESSION_STATE + ) player_minutes_played_map = {} team_minutes_played_map = {} @@ -236,7 +243,9 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): team = item.key.team if team not in team_minutes_played_map: team_minutes_played_map[team] = {} - team_minutes_played_map[team][item.key.possession_state] = item.duration + team_minutes_played_map[team][ + item.key.possession_state + ] = item.duration else: player = item.key.player possession_state = item.key.possession_state @@ -245,18 +254,17 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): if player and possession_state: if player not in player_minutes_played_map: player_minutes_played_map[player] = {} - player_minutes_played_map[player][possession_state] = duration - - - + player_minutes_played_map[player][ + possession_state + ] = duration # Teams played entire match for item in team_minutes_played_map.values(): total_duration = sum(item.values(), timedelta()) assert total_duration == ( - dataset.metadata.periods[0].duration - + dataset.metadata.periods[1].duration - ) + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) home_team, away_team = dataset.metadata.teams @@ -276,10 +284,15 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): assert sum(playtime_coutinho.values(), timedelta()) == timedelta( seconds=2852.053 ) - assert playtime_coutinho[PossessionState.IN_POSSESSION] == timedelta(seconds=1505.312) - assert playtime_coutinho[PossessionState.OUT_OF_POSSESSION] == timedelta(seconds=296.958) - assert playtime_coutinho[PossessionState.BALL_DEAD] == timedelta(seconds=1049.783) - + assert playtime_coutinho[PossessionState.IN_POSSESSION] == timedelta( + seconds=1505.312 + ) + assert playtime_coutinho[ + PossessionState.OUT_OF_POSSESSION + ] == timedelta(seconds=296.958) + assert playtime_coutinho[PossessionState.BALL_DEAD] == timedelta( + seconds=1049.783 + ) # Replaced in second half player_busquets = home_team.get_player_by_id(5203) @@ -287,21 +300,32 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): assert sum(playtime_busquets.values(), timedelta()) == timedelta( seconds=5052.343 ) - assert playtime_busquets[PossessionState.IN_POSSESSION] == timedelta(seconds=2917.724) - assert playtime_busquets[PossessionState.OUT_OF_POSSESSION] == timedelta(seconds=652.343) - assert playtime_busquets[PossessionState.BALL_DEAD] == timedelta(seconds=1482.276) + assert playtime_busquets[PossessionState.IN_POSSESSION] == timedelta( + seconds=2917.724 + ) + assert playtime_busquets[ + PossessionState.OUT_OF_POSSESSION + ] == timedelta(seconds=652.343) + assert playtime_busquets[PossessionState.BALL_DEAD] == timedelta( + seconds=1482.276 + ) # Played entire match player_ramos = home_team.get_player_by_id(5211) playtime_ramos = player_minutes_played_map[player_ramos] assert sum(playtime_ramos.values(), timedelta()) == ( - dataset.metadata.periods[0].duration - + dataset.metadata.periods[1].duration + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + assert playtime_ramos[PossessionState.IN_POSSESSION] == timedelta( + seconds=3020.965 + ) + assert playtime_ramos[PossessionState.OUT_OF_POSSESSION] == timedelta( + seconds=716.177 + ) + assert playtime_ramos[PossessionState.BALL_DEAD] == timedelta( + seconds=1820.178 ) - assert playtime_ramos[PossessionState.IN_POSSESSION] == timedelta(seconds=3020.965) - assert playtime_ramos[PossessionState.OUT_OF_POSSESSION] == timedelta(seconds=716.177) - assert playtime_ramos[PossessionState.BALL_DEAD] == timedelta(seconds=1820.178) - class TestAbsTimeContainer: From e32cd1122ba829b4d132792973fbad183747feaf Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Tue, 10 Jun 2025 11:15:41 +0200 Subject: [PATCH 04/10] add more items in 'X_CAUSING_DEAD_BALL' arrays --- kloppy/domain/models/event.py | 12 +++--- .../services/aggregators/minutes_played.py | 41 ++++++++++++++++-- kloppy/tests/test_time.py | 43 +++++++------------ 3 files changed, 59 insertions(+), 37 deletions(-) diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index aea3ea8d6..69566599a 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -978,13 +978,13 @@ class PassEvent( event_type (EventType): `EventType.PASS` event_name (str): `"pass"` result (PassResult): The pass's outcome. - receive_timestamp (Time): The time the pass was received. + receive_timestamp (timedelta): The time the pass was received. receiver_coordinates (Point): The coordinates where the pass was received. receiver_player (Player): The intended receiver of the pass. qualifiers: A list of qualifiers providing additional information about the pass. """ - receive_timestamp: Optional[Time] = None + receive_timestamp: Optional[timedelta] = None receiver_player: Optional[Player] = None receiver_coordinates: Optional[Point] = None @@ -1036,13 +1036,13 @@ class CarryEvent( Attributes: event_type (EventType): `EventType.CARRY` event_name (str): `"carry"` - end_timestamp (Time): Duration of the carry. + end_timestamp (timedelta): Duration of the carry. end_coordinates (Point): Coordinate on the pitch where the carry ended. result (CarryResult): The outcome of the carry. qualifiers: A list of qualifiers providing additional information about the carry. """ - end_timestamp: Time + end_timestamp: timedelta end_coordinates: Point @property @@ -1387,11 +1387,11 @@ class PressureEvent( Attributes: event_type (EventType): `EventType.Pressure` event_name (str): `"pressure"` - end_timestamp (Time): When the pressing ended. + end_timestamp (timedelta): When the pressing ended. qualifiers: A list of qualifiers providing additional information about the pressure event. """ - end_timestamp: Time + end_timestamp: timedelta @property def event_type(self) -> EventType: diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 51d8f8160..05ae793c7 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -19,6 +19,9 @@ Period, GenericEvent, ShotResult, + InterceptionResult, + TakeOnResult, + BallOutEvent, ) from kloppy.domain.services.aggregators.aggregator import ( EventDatasetAggregator, @@ -42,12 +45,17 @@ class PossessionState(Enum): CardEvent, PlayerOnEvent, PlayerOffEvent, + BallOutEvent, ) RESULTS_CAUSING_DEAD_BALL = ( PassResult.OFFSIDE, ShotResult.GOAL, ShotResult.OWN_GOAL, + ShotResult.OFF_TARGET, + PassResult.OUT, + InterceptionResult.OUT, + TakeOnResult.OUT, ) @@ -174,15 +182,38 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: ball_owning_team: Optional[Team] = None ball_state: Optional[BallState] = None period: Optional[Period] = dataset.metadata.periods[0] + for event in dataset.events: if isinstance(event, GenericEvent): continue + if event.time < start_time: + continue actual_event_ball_state = ( BallState.DEAD if isinstance(event, EVENTS_CAUSING_DEAD_BALL) or (event.result in RESULTS_CAUSING_DEAD_BALL) else event.ball_state ) + + if ( + actual_event_ball_state == BallState.DEAD + and event.result in RESULTS_CAUSING_DEAD_BALL + ): + + _actual_event_timestamp = ( + event.receive_timestamp + if hasattr(event, "receive_timestamp") + and event.receive_timestamp is not None + else event.end_timestamp + if hasattr(event, "end_timestamp") + and event.end_timestamp is not None + else event.timestamp + ) + actual_event_time = Time( + event.period, _actual_event_timestamp + ) + else: + actual_event_time = event.time if event.period != period: possession_state = self.get_possession_state( @@ -205,7 +236,7 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: possession_state, ) - start_time = event.time + start_time = actual_event_time period = event.period ball_state = actual_event_ball_state ball_owning_team = event.ball_owning_team @@ -219,19 +250,20 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: ball_state, ball_owning_team, first_team ) time_per_possession_state[possession_state] += ( - event.time - start_time + actual_event_time - start_time ) self._accumulate_player_time( time_per_player, players_start_end_times, start_time, - event.time, + actual_event_time, possession_state, ) - start_time = event.time + start_time = actual_event_time ball_state = actual_event_ball_state ball_owning_team = event.ball_owning_team + # Handle the last event in the period possession_state = self.get_possession_state( ball_state, ball_owning_team, first_team @@ -243,6 +275,7 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: time_per_possession_state[possession_state] += ( end_time - start_time ) + self._accumulate_player_time( time_per_player, players_start_end_times, diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index d4c273093..85812be24 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -284,15 +284,11 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): assert sum(playtime_coutinho.values(), timedelta()) == timedelta( seconds=2852.053 ) - assert playtime_coutinho[PossessionState.IN_POSSESSION] == timedelta( - seconds=1505.312 - ) - assert playtime_coutinho[ - PossessionState.OUT_OF_POSSESSION - ] == timedelta(seconds=296.958) - assert playtime_coutinho[PossessionState.BALL_DEAD] == timedelta( - seconds=1049.783 - ) + assert playtime_coutinho == { + PossessionState.IN_POSSESSION: timedelta(seconds=1617.151), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=290.732), + PossessionState.BALL_DEAD: timedelta(seconds=944.17), + } # Replaced in second half player_busquets = home_team.get_player_by_id(5203) @@ -300,15 +296,12 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): assert sum(playtime_busquets.values(), timedelta()) == timedelta( seconds=5052.343 ) - assert playtime_busquets[PossessionState.IN_POSSESSION] == timedelta( - seconds=2917.724 - ) - assert playtime_busquets[ - PossessionState.OUT_OF_POSSESSION - ] == timedelta(seconds=652.343) - assert playtime_busquets[PossessionState.BALL_DEAD] == timedelta( - seconds=1482.276 - ) + + assert playtime_busquets == { + PossessionState.IN_POSSESSION: timedelta(seconds=2859.285), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=650.604), + PossessionState.BALL_DEAD: timedelta(seconds=1542.454), + } # Played entire match player_ramos = home_team.get_player_by_id(5211) @@ -317,15 +310,11 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): dataset.metadata.periods[0].duration + dataset.metadata.periods[1].duration ) - assert playtime_ramos[PossessionState.IN_POSSESSION] == timedelta( - seconds=3020.965 - ) - assert playtime_ramos[PossessionState.OUT_OF_POSSESSION] == timedelta( - seconds=716.177 - ) - assert playtime_ramos[PossessionState.BALL_DEAD] == timedelta( - seconds=1820.178 - ) + assert playtime_busquets == { + PossessionState.IN_POSSESSION: timedelta(seconds=2859.285), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=650.604), + PossessionState.BALL_DEAD: timedelta(seconds=1542.454), + } class TestAbsTimeContainer: From bb80b002af4e3b3a634b57f8eee418659affbbc4 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Tue, 10 Jun 2025 13:51:17 +0200 Subject: [PATCH 05/10] handle set pieces after mistagged event --- .../services/aggregators/minutes_played.py | 103 +++++++++++++----- kloppy/tests/test_time.py | 20 ++-- 2 files changed, 84 insertions(+), 39 deletions(-) diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 05ae793c7..5fb8496b2 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -22,6 +22,7 @@ InterceptionResult, TakeOnResult, BallOutEvent, + SetPieceQualifier, ) from kloppy.domain.services.aggregators.aggregator import ( EventDatasetAggregator, @@ -86,10 +87,10 @@ class MinutesPlayedAggregator(EventDatasetAggregator): def __init__(self, breakdown_key: Optional[BreakdownKey] = None): self.breakdown_key = breakdown_key + @staticmethod def get_possession_state( - self, ball_state: BallState, ball_owning_team: Team, team: Team + ball_state: BallState, ball_owning_team: Team, team: Team ): - """Determine the possession state.""" if ball_state == BallState.DEAD or ball_owning_team is None: return PossessionState.BALL_DEAD return ( @@ -98,8 +99,9 @@ def get_possession_state( else PossessionState.OUT_OF_POSSESSION ) + @staticmethod def _flip_possession_state( - self, state: PossessionState, flip: bool + state: PossessionState, flip: bool ) -> PossessionState: if flip: if state == PossessionState.IN_POSSESSION: @@ -108,8 +110,31 @@ def _flip_possession_state( return PossessionState.IN_POSSESSION return state + @staticmethod + def _handle_possession_state_end( + time_per_possession_state: Dict[PossessionState, timedelta], + time_per_player: Dict[Player, Dict[PossessionState, timedelta]], + players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], + start_time: Time, + end_time: Time, + ball_state: BallState, + ball_owning_team: Team, + first_team: Team, + ): + possession_state = MinutesPlayedAggregator.get_possession_state( + ball_state, ball_owning_team, first_team + ) + time_per_possession_state[possession_state] += end_time - start_time + MinutesPlayedAggregator._accumulate_player_time( + time_per_player, + players_start_end_times, + start_time, + end_time, + possession_state, + ) + + @staticmethod def _accumulate_player_time( - self, time_per_player: Dict[Player, Dict[PossessionState, timedelta]], players_start_end_times: Dict[Player, Tuple[Time, Time, bool]], start_time: Time, @@ -128,6 +153,7 @@ def _accumulate_player_time( time_per_player[player][possession_state] += duration def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: + items = [] if self.breakdown_key == BreakdownKey.POSITION: @@ -188,6 +214,35 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: continue if event.time < start_time: continue + + if ( + any( + isinstance(q, SetPieceQualifier) + for q in event.qualifiers or [] + ) + and ball_state != BallState.DEAD + ): + # Ball state should be dead, so we mistagged a prior event (for example a clearance that went out of play) + previous_event = event.prev( + lambda x: not isinstance(x, GenericEvent) + ) + if previous_event: + self._handle_possession_state_end( + time_per_possession_state, + time_per_player, + players_start_end_times, + start_time, + previous_event.time, + ball_state, + ball_owning_team, + first_team, + ) + start_time = previous_event.time + ball_state = ( + BallState.DEAD + ) # set current bal state to dead + ball_owning_team = event.ball_owning_team + actual_event_ball_state = ( BallState.DEAD if isinstance(event, EVENTS_CAUSING_DEAD_BALL) @@ -216,24 +271,21 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: actual_event_time = event.time if event.period != period: - possession_state = self.get_possession_state( - ball_state, ball_owning_team, first_team - ) end_time = Time( period=period, timestamp=( period.end_timestamp - period.start_timestamp ), ) - time_per_possession_state[possession_state] += ( - end_time - start_time - ) - self._accumulate_player_time( + self._handle_possession_state_end( + time_per_possession_state, time_per_player, players_start_end_times, start_time, end_time, - possession_state, + ball_state, + ball_owning_team, + first_team, ) start_time = actual_event_time @@ -246,18 +298,15 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: or event.ball_owning_team != ball_owning_team ): - possession_state = self.get_possession_state( - ball_state, ball_owning_team, first_team - ) - time_per_possession_state[possession_state] += ( - actual_event_time - start_time - ) - self._accumulate_player_time( + self._handle_possession_state_end( + time_per_possession_state, time_per_player, players_start_end_times, start_time, actual_event_time, - possession_state, + ball_state, + ball_owning_team, + first_team, ) start_time = actual_event_time @@ -265,23 +314,19 @@ def aggregate(self, dataset: EventDataset) -> List[MinutesPlayed]: ball_owning_team = event.ball_owning_team # Handle the last event in the period - possession_state = self.get_possession_state( - ball_state, ball_owning_team, first_team - ) end_time = Time( period=period, timestamp=(period.end_timestamp - period.start_timestamp), ) - time_per_possession_state[possession_state] += ( - end_time - start_time - ) - - self._accumulate_player_time( + self._handle_possession_state_end( + time_per_possession_state, time_per_player, players_start_end_times, start_time, end_time, - possession_state, + ball_state, + ball_owning_team, + first_team, ) for team in dataset.metadata.teams: diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 85812be24..7aedb9d5e 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -285,9 +285,9 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): seconds=2852.053 ) assert playtime_coutinho == { - PossessionState.IN_POSSESSION: timedelta(seconds=1617.151), - PossessionState.OUT_OF_POSSESSION: timedelta(seconds=290.732), - PossessionState.BALL_DEAD: timedelta(seconds=944.17), + PossessionState.IN_POSSESSION: timedelta(seconds=1559.075), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=287.916), + PossessionState.BALL_DEAD: timedelta(seconds=1005.062), } # Replaced in second half @@ -298,9 +298,9 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): ) assert playtime_busquets == { - PossessionState.IN_POSSESSION: timedelta(seconds=2859.285), - PossessionState.OUT_OF_POSSESSION: timedelta(seconds=650.604), - PossessionState.BALL_DEAD: timedelta(seconds=1542.454), + PossessionState.IN_POSSESSION: timedelta(seconds=2751.232000), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=611.662000), + PossessionState.BALL_DEAD: timedelta(seconds=1689.449000), } # Played entire match @@ -310,10 +310,10 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): dataset.metadata.periods[0].duration + dataset.metadata.periods[1].duration ) - assert playtime_busquets == { - PossessionState.IN_POSSESSION: timedelta(seconds=2859.285), - PossessionState.OUT_OF_POSSESSION: timedelta(seconds=650.604), - PossessionState.BALL_DEAD: timedelta(seconds=1542.454), + assert playtime_ramos == { + PossessionState.IN_POSSESSION: timedelta(seconds=3016.821000), + PossessionState.OUT_OF_POSSESSION: timedelta(seconds=677.191000), + PossessionState.BALL_DEAD: timedelta(seconds=1863.308000), } From 35de8c0a9e1f226b68a37c843aa870adc5c50671 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Tue, 10 Jun 2025 13:52:24 +0200 Subject: [PATCH 06/10] add extra test --- kloppy/tests/test_time.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 7aedb9d5e..e9d8e5813 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -315,6 +315,7 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): PossessionState.OUT_OF_POSSESSION: timedelta(seconds=677.191000), PossessionState.BALL_DEAD: timedelta(seconds=1863.308000), } + assert playtime_ramos == team_minutes_played_map[home_team] class TestAbsTimeContainer: From 8e8c5a29a34922d42b8e51c3791ea67d62db5915 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Mon, 16 Jun 2025 13:32:51 +0200 Subject: [PATCH 07/10] add extra sanity check in tests --- kloppy/tests/test_time.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index e9d8e5813..2405111d2 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -266,6 +266,12 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): + dataset.metadata.periods[1].duration ) + teams = list(team_minutes_played_map.keys()) + assert team_minutes_played_map[teams[0]][PossessionState.IN_POSSESSION] == team_minutes_played_map[teams[1]][PossessionState.OUT_OF_POSSESSION] + assert team_minutes_played_map[teams[0]][PossessionState.OUT_OF_POSSESSION] == team_minutes_played_map[teams[1]][PossessionState.IN_POSSESSION] + assert team_minutes_played_map[teams[0]][PossessionState.BALL_DEAD] == team_minutes_played_map[teams[1]][PossessionState.BALL_DEAD] + + home_team, away_team = dataset.metadata.teams """ From 63fcaf7d6f511e4e3c455dd7bc5815775534cf88 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Mon, 16 Jun 2025 13:50:21 +0200 Subject: [PATCH 08/10] allow breakdown key to be string representation --- kloppy/domain/services/aggregators/minutes_played.py | 12 +++++++++--- kloppy/tests/test_statsbomb.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 5fb8496b2..8f848845e 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from datetime import timedelta -from typing import List, NamedTuple, Optional, Dict, Tuple +from typing import List, NamedTuple, Optional, Dict, Tuple, Union from enum import Enum from kloppy.domain import ( @@ -84,8 +84,14 @@ class MinutesPlayed(NamedTuple): class MinutesPlayedAggregator(EventDatasetAggregator): - def __init__(self, breakdown_key: Optional[BreakdownKey] = None): - self.breakdown_key = breakdown_key + def __init__(self, breakdown_key: Optional[Union[BreakdownKey, str]] = None): + if isinstance(breakdown_key, str): + try: + breakdown_key = BreakdownKey(breakdown_key) + except ValueError: + raise ValueError( + f"BreakdownKey {breakdown_key} not found. Known keys: {', '.join(key.value for key in BreakdownKey)}") + self.breakdown_key = breakdown_key @staticmethod def get_possession_state( diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 563ab2483..e7b0774d6 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -1228,7 +1228,7 @@ def test_player_position(self, base_dir): ) for item in dataset.aggregate( - "minutes_played", breakdown_key=BreakdownKey.POSITION + "minutes_played", breakdown_key="position" ): if item.key.player and item.key.position: print( From 98f0fa4af33f52f3f728bc7d1d5e434f5e052c8e Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Mon, 16 Jun 2025 13:57:18 +0200 Subject: [PATCH 09/10] ensure breakdown_key is set in MinutesPlayedAggregator --- kloppy/domain/services/aggregators/minutes_played.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 8f848845e..830399bb4 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -91,7 +91,7 @@ def __init__(self, breakdown_key: Optional[Union[BreakdownKey, str]] = None): except ValueError: raise ValueError( f"BreakdownKey {breakdown_key} not found. Known keys: {', '.join(key.value for key in BreakdownKey)}") - self.breakdown_key = breakdown_key + self.breakdown_key = breakdown_key @staticmethod def get_possession_state( From 6dd018105938f9de169ac3ee6750804652637c5f Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Mon, 16 Jun 2025 14:02:00 +0200 Subject: [PATCH 10/10] black formatting --- .../services/aggregators/minutes_played.py | 7 +++++-- kloppy/tests/test_time.py | 20 +++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py index 830399bb4..8a72b7fdf 100644 --- a/kloppy/domain/services/aggregators/minutes_played.py +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -84,13 +84,16 @@ class MinutesPlayed(NamedTuple): class MinutesPlayedAggregator(EventDatasetAggregator): - def __init__(self, breakdown_key: Optional[Union[BreakdownKey, str]] = None): + def __init__( + self, breakdown_key: Optional[Union[BreakdownKey, str]] = None + ): if isinstance(breakdown_key, str): try: breakdown_key = BreakdownKey(breakdown_key) except ValueError: raise ValueError( - f"BreakdownKey {breakdown_key} not found. Known keys: {', '.join(key.value for key in BreakdownKey)}") + f"BreakdownKey {breakdown_key} not found. Known keys: {', '.join(key.value for key in BreakdownKey)}" + ) self.breakdown_key = breakdown_key @staticmethod diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 2405111d2..39740d7ae 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -267,10 +267,22 @@ def test_statsbomb_minutes_played_per_possession_state(self, base_dir): ) teams = list(team_minutes_played_map.keys()) - assert team_minutes_played_map[teams[0]][PossessionState.IN_POSSESSION] == team_minutes_played_map[teams[1]][PossessionState.OUT_OF_POSSESSION] - assert team_minutes_played_map[teams[0]][PossessionState.OUT_OF_POSSESSION] == team_minutes_played_map[teams[1]][PossessionState.IN_POSSESSION] - assert team_minutes_played_map[teams[0]][PossessionState.BALL_DEAD] == team_minutes_played_map[teams[1]][PossessionState.BALL_DEAD] - + assert ( + team_minutes_played_map[teams[0]][PossessionState.IN_POSSESSION] + == team_minutes_played_map[teams[1]][ + PossessionState.OUT_OF_POSSESSION + ] + ) + assert ( + team_minutes_played_map[teams[0]][ + PossessionState.OUT_OF_POSSESSION + ] + == team_minutes_played_map[teams[1]][PossessionState.IN_POSSESSION] + ) + assert ( + team_minutes_played_map[teams[0]][PossessionState.BALL_DEAD] + == team_minutes_played_map[teams[1]][PossessionState.BALL_DEAD] + ) home_team, away_team = dataset.metadata.teams