diff --git a/ghmap/cli.py b/ghmap/cli.py index eaf25c0..d834e82 100644 --- a/ghmap/cli.py +++ b/ghmap/cli.py @@ -189,6 +189,16 @@ def _parse_args() -> argparse.Namespace: default=None, help='Path to a custom action to activity mapping JSON file.' ) + parser.add_argument( + '--mapping-strategy', + choices=['strict', 'flexible'], + default='flexible', + help=( + "Mapping strategy to handle unknown actions. " + "'strict' raises an error on UnknownAction, " + "'flexible' issues a warning once and continues (default: flexible)." + ) + ) return parser.parse_args() @@ -228,7 +238,7 @@ def _apply_custom_mappings( if args.custom_action_mapping: action_mapping = load_json_file(args.custom_action_mapping) action_mapper = ActionMapper(action_mapping, progress_bar=args.progress_bar) - all_actions = action_mapper.map(events) + all_actions = action_mapper.map(events, args.mapping_strategy) if args.custom_activity_mapping: activity_mapping = load_json_file(args.custom_activity_mapping) @@ -269,7 +279,7 @@ def _process_period( # Step 1: Event to Action Mapping action_mapping = load_json_file(valid_mappings['action']) action_mapper = ActionMapper(action_mapping, progress_bar=args.progress_bar) - actions = action_mapper.map(period_events) + actions = action_mapper.map(period_events, args.mapping_strategy) # Step 2: Action to Activity Mapping activity_mapping = load_json_file(valid_mappings['activity']) diff --git a/ghmap/mapping/action_mapper.py b/ghmap/mapping/action_mapper.py index 6a15479..4b1f339 100644 --- a/ghmap/mapping/action_mapper.py +++ b/ghmap/mapping/action_mapper.py @@ -136,11 +136,20 @@ def _extract_field(event_record: Dict, field_path: str) -> Any: return None return value - def map(self, events: List[Dict]) -> List[Dict]: - """Maps events to high-level actions using mapping configuration.""" + def map(self, events: List[Dict], mapping_strategy: str = "flexible") -> List[Dict]: + """ + Maps events to high-level actions using mapping configuration. + + mapping_strategy: "strict" or "flexible" + - strict: raise an error on UnknownAction + - flexible: tolerate UnknownAction and issue a warning once + """ all_mapped_actions = [] + unknown_warning_issued = False # only warn once in flexible mode - for event_record in tqdm(events, desc="Mapping events to actions", unit="event", disable=not self.progress_bar): # pylint: disable=line-too-long + for event_record in tqdm( + events, desc="Mapping events to actions", unit="event", disable=not self.progress_bar + ): if 'payload' in event_record: event_record = self._deserialize_payload(event_record) event_record = self._convert_date_to_iso(event_record) @@ -162,11 +171,23 @@ def map(self, events: List[Dict]) -> List[Dict]: all_mapped_actions.append(mapped_action) break else: - mapped_action = self._extract_attributes( - event_record, - self.action_mapping['actions']['UnknownAction'], - 'UnknownAction' - ) - all_mapped_actions.append(mapped_action) + # Handle UnknownAction based on strategy + if mapping_strategy == "strict": + raise ValueError( + f"UnknownAction encountered for event: {event_record}" + ) + if mapping_strategy == "flexible": + if not unknown_warning_issued: + print("Warning: Some actions not identified and mapped as UnknownAction.") + unknown_warning_issued = True + + mapped_action = self._extract_attributes( + event_record, + self.action_mapping['actions']['UnknownAction'], + 'UnknownAction' + ) + all_mapped_actions.append(mapped_action) + else: + raise ValueError(f"Invalid mapping_strategy: {mapping_strategy}") return all_mapped_actions