From 05ced729632ce16b8157baa4ffdb08624bb93d43 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Tue, 24 Jun 2025 16:50:42 +0200 Subject: [PATCH 1/7] more events can start sequence --- .../state_builder/builders/sequence.py | 46 +++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index e851f4309..bd55789a8 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -1,4 +1,5 @@ from dataclasses import replace, dataclass +from typing import Optional from kloppy.domain import ( Event, @@ -11,12 +12,15 @@ FoulCommittedEvent, ShotEvent, SetPieceQualifier, + GoalkeeperEvent, + GoalkeeperActionType, + TakeOnEvent, + InterceptionEvent, + InterceptionResult, + GoalkeeperQualifier, ) from ..builder import StateBuilder -OPEN_SEQUENCE = (PassEvent, CarryEvent, RecoveryEvent) -CLOSE_SEQUENCE = (BallOutEvent, FoulCommittedEvent, ShotEvent) - @dataclass class Sequence: @@ -24,18 +28,44 @@ class Sequence: team: Team +CLOSE_SEQUENCE = (BallOutEvent, FoulCommittedEvent, ShotEvent) + + +def should_open_sequence( + event: Event, state: Optional[Sequence] = None +) -> bool: + open_sequence = False + if isinstance(event, (PassEvent, CarryEvent, RecoveryEvent, TakeOnEvent)): + open_sequence = True + if isinstance(event, GoalkeeperEvent): + open_sequence = event.get_qualifier_value(GoalkeeperQualifier) in [ + GoalkeeperActionType.PICK_UP, + GoalkeeperActionType.CLAIM, + ] + if isinstance(event, InterceptionEvent): + open_sequence = event.result == InterceptionResult.SUCCESS + return open_sequence and ( + state is None + or state.team != event.team + or event.get_qualifier_value(SetPieceQualifier) + ) + + +def should_close_sequence(event: Event) -> bool: + if isinstance(event, CLOSE_SEQUENCE): + return True + return False + + class SequenceStateBuilder(StateBuilder): def initial_state(self, dataset: EventDataset) -> Sequence: for event in dataset.events: - if isinstance(event, OPEN_SEQUENCE): + if should_open_sequence(event): return Sequence(sequence_id=0, team=event.team) return Sequence(sequence_id=0, team=None) def reduce_before(self, state: Sequence, event: Event) -> Sequence: - if isinstance(event, OPEN_SEQUENCE) and ( - state.team != event.team - or event.get_qualifier_value(SetPieceQualifier) - ): + if should_open_sequence(event, state): state = replace( state, sequence_id=state.sequence_id + 1, team=event.team ) From 00187cdd3e3ef6b5c9ddc008a4e2bb54f9fc54c9 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Wed, 25 Jun 2025 13:01:59 +0200 Subject: [PATCH 2/7] handle duels at end of sequence --- .../state_builder/builders/sequence.py | 48 +++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index bd55789a8..66a67b715 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -18,6 +18,8 @@ InterceptionEvent, InterceptionResult, GoalkeeperQualifier, + DuelEvent, + DuelResult, ) from ..builder import StateBuilder @@ -31,20 +33,38 @@ class Sequence: CLOSE_SEQUENCE = (BallOutEvent, FoulCommittedEvent, ShotEvent) +def is_possessing_event(event: Event) -> bool: + if isinstance(event, (PassEvent, CarryEvent, RecoveryEvent, TakeOnEvent)): + return True + elif isinstance(event, GoalkeeperEvent) and event.get_qualifier_value( + GoalkeeperQualifier + ) in [ + GoalkeeperActionType.PICK_UP, + GoalkeeperActionType.CLAIM, + ]: + return True + elif ( + isinstance(event, InterceptionEvent) + and event.result == InterceptionResult.SUCCESS + ): + return True + else: + return False + + def should_open_sequence( - event: Event, state: Optional[Sequence] = None + event: Event, next_event: Event, state: Optional[Sequence] = None ) -> bool: - open_sequence = False - if isinstance(event, (PassEvent, CarryEvent, RecoveryEvent, TakeOnEvent)): - open_sequence = True - if isinstance(event, GoalkeeperEvent): - open_sequence = event.get_qualifier_value(GoalkeeperQualifier) in [ - GoalkeeperActionType.PICK_UP, - GoalkeeperActionType.CLAIM, - ] - if isinstance(event, InterceptionEvent): - open_sequence = event.result == InterceptionResult.SUCCESS - return open_sequence and ( + can_open_sequence = False + if is_possessing_event(event): + can_open_sequence = True + elif ( + isinstance(event, DuelEvent) + and event.result == DuelResult.WON + and is_possessing_event(next_event) + ): + can_open_sequence = True + return can_open_sequence and ( state is None or state.team != event.team or event.get_qualifier_value(SetPieceQualifier) @@ -60,12 +80,12 @@ def should_close_sequence(event: Event) -> bool: class SequenceStateBuilder(StateBuilder): def initial_state(self, dataset: EventDataset) -> Sequence: for event in dataset.events: - if should_open_sequence(event): + if should_open_sequence(event, event.next_record): return Sequence(sequence_id=0, team=event.team) return Sequence(sequence_id=0, team=None) def reduce_before(self, state: Sequence, event: Event) -> Sequence: - if should_open_sequence(event, state): + if should_open_sequence(event, event.next_record, state): state = replace( state, sequence_id=state.sequence_id + 1, team=event.team ) From a56e0766d8870bc2958d93521cb4685fb130d618 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Wed, 25 Jun 2025 15:27:12 +0200 Subject: [PATCH 3/7] exclude off ball events from sequences --- .../domain/services/state_builder/__init__.py | 3 + .../domain/services/state_builder/builder.py | 3 + .../state_builder/builders/sequence.py | 46 ++++++++++++++- kloppy/tests/test_state_builder.py | 57 +++++++++++++++---- 4 files changed, 94 insertions(+), 15 deletions(-) diff --git a/kloppy/domain/services/state_builder/__init__.py b/kloppy/domain/services/state_builder/__init__.py index d8073525a..34f27982c 100644 --- a/kloppy/domain/services/state_builder/__init__.py +++ b/kloppy/domain/services/state_builder/__init__.py @@ -48,4 +48,7 @@ def add_state(dataset: EventDataset, *builder_keys: List[str]) -> EventDataset: for builder_key, builder in builders.items() } + for builder_key, builder in builders.items(): + builder.post_process(events) + return replace(dataset, records=events) diff --git a/kloppy/domain/services/state_builder/builder.py b/kloppy/domain/services/state_builder/builder.py index dccdaec72..bc0e366ae 100644 --- a/kloppy/domain/services/state_builder/builder.py +++ b/kloppy/domain/services/state_builder/builder.py @@ -19,3 +19,6 @@ def reduce_before(self, state: T, event: Event) -> T: @abstractmethod def reduce_after(self, state: T, event: Event) -> T: pass + + def post_process(self, events: list[Event]) -> list[Event]: + pass diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index 66a67b715..6c2053130 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -1,5 +1,5 @@ from dataclasses import replace, dataclass -from typing import Optional +from typing import Optional, List from kloppy.domain import ( Event, @@ -20,16 +20,31 @@ GoalkeeperQualifier, DuelEvent, DuelResult, + GenericEvent, + PlayerOnEvent, + CardEvent, + SubstitutionEvent, + PlayerOffEvent, + FormationChangeEvent, ) from ..builder import StateBuilder @dataclass class Sequence: - sequence_id: int - team: Team + sequence_id: Optional[int] + team: Optional[Team] +EXCLUDED_OFF_BALL_EVENTS = ( + GenericEvent, + SubstitutionEvent, + CardEvent, + PlayerOnEvent, + PlayerOffEvent, + FormationChangeEvent, +) + CLOSE_SEQUENCE = (BallOutEvent, FoulCommittedEvent, ShotEvent) @@ -99,3 +114,28 @@ def reduce_after(self, state: Sequence, event: Event) -> Sequence: ) return state + + def post_process(self, events: List[Event]): + current_sequence_id = 1 + sequence_id_mapping = {} + + for event in events: + sequence = event.state["sequence"] + + if ( + isinstance(event, EXCLUDED_OFF_BALL_EVENTS) + or sequence.team is None + ): + event.state["sequence"] = Sequence(sequence_id=None, team=None) + else: + # Map old sequence IDs to new consecutive IDs + # Get or assign a new sequence ID + new_sequence_id = sequence_id_mapping.setdefault( + sequence.sequence_id, current_sequence_id + ) + if new_sequence_id == current_sequence_id: + current_sequence_id += 1 + # Assign the new sequence ID + event.state["sequence"] = Sequence( + sequence_id=new_sequence_id, team=sequence.team + ) diff --git a/kloppy/tests/test_state_builder.py b/kloppy/tests/test_state_builder.py index 10326d534..989a5318b 100644 --- a/kloppy/tests/test_state_builder.py +++ b/kloppy/tests/test_state_builder.py @@ -1,6 +1,8 @@ +import json +from collections import defaultdict from itertools import groupby -from kloppy import statsbomb +from kloppy import statsbomb, statsperform from kloppy.domain import Event, EventDataset, EventType, FormationType from kloppy.domain.services.state_builder.builder import StateBuilder from kloppy.utils import performance_logging @@ -9,14 +11,22 @@ class TestStateBuilder: """""" - def _load_dataset(self, base_dir, base_filename="statsbomb"): + def _load_dataset_statsbomb(self, base_dir, base_filename="statsbomb"): return statsbomb.load( event_data=base_dir / f"files/{base_filename}_event.json", lineup_data=base_dir / f"files/{base_filename}_lineup.json", ) + def _load_dataset_statsperform( + self, base_dir, base_filename="statsperform" + ): + return statsperform.load_event( + ma1_data=base_dir / f"files/{base_filename}_event_ma1.json", + ma3_data=base_dir / f"files/{base_filename}_event_ma3.json", + ) + def test_score_state_builder(self, base_dir): - dataset = self._load_dataset(base_dir) + dataset = self._load_dataset_statsbomb(base_dir) with performance_logging("add_state"): dataset_with_state = dataset.add_state("score") @@ -36,25 +46,44 @@ def test_score_state_builder(self, base_dir): "3-1": 2, } - def test_sequence_state_builder(self, base_dir): - dataset = self._load_dataset(base_dir) + def test_sequence_state_builder_statsbomb(self, base_dir): + dataset = self._load_dataset_statsbomb(base_dir) + + with performance_logging("add_state"): + dataset_with_state = dataset.add_state("sequence") + + events_per_sequence = defaultdict(int) + for sequence_id, events in groupby( + dataset_with_state.events, + lambda event: event.state["sequence"].sequence_id, + ): + events = list(events) + events_per_sequence[sequence_id] += len(events) + + assert events_per_sequence[1] == 3 + assert events_per_sequence[72] == 11 + + def test_sequence_state_builder_statsperform(self, base_dir): + dataset = self._load_dataset_statsperform(base_dir) with performance_logging("add_state"): dataset_with_state = dataset.add_state("sequence") - events_per_sequence = {} + events_per_sequence = defaultdict(int) for sequence_id, events in groupby( dataset_with_state.events, lambda event: event.state["sequence"].sequence_id, ): events = list(events) - events_per_sequence[sequence_id] = len(events) + events_per_sequence[sequence_id] += len(events) - assert events_per_sequence[0] == 4 - assert events_per_sequence[51] == 10 + assert events_per_sequence[1] == 5 + assert events_per_sequence[89] == 12 def test_lineup_state_builder(self, base_dir): - dataset = self._load_dataset(base_dir, base_filename="statsbomb_15986") + dataset = self._load_dataset_statsbomb( + base_dir, base_filename="statsbomb_15986" + ) with performance_logging("add_state"): dataset_with_state = dataset.add_state("lineup") @@ -79,7 +108,9 @@ def test_lineup_state_builder(self, base_dir): ] def test_formation_state_builder(self, base_dir): - dataset = self._load_dataset(base_dir, base_filename="statsbomb") + dataset = self._load_dataset_statsbomb( + base_dir, base_filename="statsbomb" + ) with performance_logging("add_state"): dataset_with_state = dataset.add_state("formation") @@ -114,7 +145,9 @@ def reduce_before(self, state: int, event: Event) -> int: def reduce_after(self, state: int, event: Event) -> int: return state + 1 - dataset = self._load_dataset(base_dir, base_filename="statsbomb_15986") + dataset = self._load_dataset_statsbomb( + base_dir, base_filename="statsbomb_15986" + ) with performance_logging("add_state"): dataset_with_state = dataset.add_state("custom") From 9663259cdbd37cabc9239afd616f871a55a22090 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Wed, 25 Jun 2025 15:46:38 +0200 Subject: [PATCH 4/7] fix type error --- kloppy/domain/services/state_builder/builder.py | 4 ++-- kloppy/domain/services/state_builder/builders/sequence.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kloppy/domain/services/state_builder/builder.py b/kloppy/domain/services/state_builder/builder.py index bc0e366ae..6927e8e66 100644 --- a/kloppy/domain/services/state_builder/builder.py +++ b/kloppy/domain/services/state_builder/builder.py @@ -1,5 +1,5 @@ from abc import abstractmethod -from typing import TypeVar +from typing import TypeVar, List from kloppy.domain import EventDataset, Event from .registered import RegisteredStateBuilder @@ -20,5 +20,5 @@ def reduce_before(self, state: T, event: Event) -> T: def reduce_after(self, state: T, event: Event) -> T: pass - def post_process(self, events: list[Event]) -> list[Event]: + def post_process(self, events: List[Event]): pass diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index 6c2053130..b807fb996 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -127,7 +127,7 @@ def post_process(self, events: List[Event]): or sequence.team is None ): event.state["sequence"] = Sequence(sequence_id=None, team=None) - else: + elif sequence.sequence_id is not None: # Map old sequence IDs to new consecutive IDs # Get or assign a new sequence ID new_sequence_id = sequence_id_mapping.setdefault( From 2abe73a54144dfaad683bd0cfaf4287810104cc1 Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Wed, 25 Jun 2025 15:58:00 +0200 Subject: [PATCH 5/7] next_record can be None --- kloppy/domain/services/state_builder/builders/sequence.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index b807fb996..7f2cf6dd6 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -68,7 +68,7 @@ def is_possessing_event(event: Event) -> bool: def should_open_sequence( - event: Event, next_event: Event, state: Optional[Sequence] = None + event: Event, next_event: Optional[Event], state: Optional[Sequence] = None ) -> bool: can_open_sequence = False if is_possessing_event(event): @@ -76,6 +76,7 @@ def should_open_sequence( elif ( isinstance(event, DuelEvent) and event.result == DuelResult.WON + and next_event is not None and is_possessing_event(next_event) ): can_open_sequence = True From 5cb023d0d31dec32e03e6b6fb3a1ceb639e3a6db Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Wed, 25 Jun 2025 16:57:20 +0200 Subject: [PATCH 6/7] defensive actions followed by possessing action can start new sequence --- .../services/state_builder/builders/sequence.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index 7f2cf6dd6..a14371899 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -26,6 +26,7 @@ SubstitutionEvent, PlayerOffEvent, FormationChangeEvent, + ClearanceEvent, ) from ..builder import StateBuilder @@ -48,6 +49,13 @@ class Sequence: CLOSE_SEQUENCE = (BallOutEvent, FoulCommittedEvent, ShotEvent) +def is_ball_winning_defensive_action(event: Event) -> bool: + if isinstance(event, DuelEvent) and event.result == DuelResult.WON: + return True + elif isinstance(event, ClearanceEvent): + return True + + def is_possessing_event(event: Event) -> bool: if isinstance(event, (PassEvent, CarryEvent, RecoveryEvent, TakeOnEvent)): return True @@ -63,8 +71,6 @@ def is_possessing_event(event: Event) -> bool: and event.result == InterceptionResult.SUCCESS ): return True - else: - return False def should_open_sequence( @@ -74,9 +80,9 @@ def should_open_sequence( if is_possessing_event(event): can_open_sequence = True elif ( - isinstance(event, DuelEvent) - and event.result == DuelResult.WON + is_ball_winning_defensive_action(event) and next_event is not None + and next_event.team == event.team and is_possessing_event(next_event) ): can_open_sequence = True @@ -90,7 +96,6 @@ def should_open_sequence( def should_close_sequence(event: Event) -> bool: if isinstance(event, CLOSE_SEQUENCE): return True - return False class SequenceStateBuilder(StateBuilder): From 407f194bf7ba9b5039c8a05e4fe9b91c1982203f Mon Sep 17 00:00:00 2001 From: lode-mgp Date: Tue, 6 Jan 2026 09:33:55 +0100 Subject: [PATCH 7/7] replace List with list --- .../domain/services/state_builder/builder.py | 4 +-- .../state_builder/builders/sequence.py | 36 +++++++++---------- kloppy/tests/test_state_builder.py | 1 - 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/kloppy/domain/services/state_builder/builder.py b/kloppy/domain/services/state_builder/builder.py index d33197a8b..34173af29 100644 --- a/kloppy/domain/services/state_builder/builder.py +++ b/kloppy/domain/services/state_builder/builder.py @@ -1,5 +1,5 @@ from abc import abstractmethod -from typing import TypeVar, List +from typing import TypeVar from kloppy.domain import Event, EventDataset @@ -21,5 +21,5 @@ def reduce_before(self, state: T, event: Event) -> T: def reduce_after(self, state: T, event: Event) -> T: pass - def post_process(self, events: List[Event]): + def post_process(self, events: list[Event]): pass diff --git a/kloppy/domain/services/state_builder/builders/sequence.py b/kloppy/domain/services/state_builder/builders/sequence.py index 29d2cc3f8..4e9fedc04 100644 --- a/kloppy/domain/services/state_builder/builders/sequence.py +++ b/kloppy/domain/services/state_builder/builders/sequence.py @@ -1,32 +1,32 @@ -from dataclasses import replace, dataclass -from typing import Optional, List +from dataclasses import dataclass, replace +from typing import Optional from kloppy.domain import ( BallOutEvent, + CardEvent, CarryEvent, + ClearanceEvent, + DuelEvent, + DuelResult, Event, EventDataset, + FormationChangeEvent, FoulCommittedEvent, - PassEvent, - RecoveryEvent, - SetPieceQualifier, - ShotEvent, - Team, - GoalkeeperEvent, + GenericEvent, GoalkeeperActionType, - TakeOnEvent, + GoalkeeperEvent, + GoalkeeperQualifier, InterceptionEvent, InterceptionResult, - GoalkeeperQualifier, - DuelEvent, - DuelResult, - GenericEvent, + PassEvent, + PlayerOffEvent, PlayerOnEvent, - CardEvent, + RecoveryEvent, + SetPieceQualifier, + ShotEvent, SubstitutionEvent, - PlayerOffEvent, - FormationChangeEvent, - ClearanceEvent, + TakeOnEvent, + Team, ) from ..builder import StateBuilder @@ -120,7 +120,7 @@ def reduce_after(self, state: Sequence, event: Event) -> Sequence: return state - def post_process(self, events: List[Event]): + def post_process(self, events: list[Event]): current_sequence_id = 1 sequence_id_mapping = {} diff --git a/kloppy/tests/test_state_builder.py b/kloppy/tests/test_state_builder.py index 989a5318b..7be865bde 100644 --- a/kloppy/tests/test_state_builder.py +++ b/kloppy/tests/test_state_builder.py @@ -1,4 +1,3 @@ -import json from collections import defaultdict from itertools import groupby