Pabloo22 · Pabloo22 · Sep 13, 2025 · Sep 11, 2025 · Sep 12, 2025 · Sep 13, 2025
diff --git a/job_shop_lib/metaheuristics/__init__.py b/job_shop_lib/metaheuristics/__init__.py
@@ -31,12 +31,16 @@
     swap_random_operations
     ObjectiveFunction
     get_makespan_with_penalties_objective
+    compute_penalty_for_deadlines
+    compute_penalty_for_due_dates
 
 """
 
 from job_shop_lib.metaheuristics._objective_functions import (
     ObjectiveFunction,
     get_makespan_with_penalties_objective,
+    compute_penalty_for_deadlines,
+    compute_penalty_for_due_dates,
 )
 from job_shop_lib.metaheuristics._neighbor_generators import (
     NeighborGenerator,
@@ -58,4 +62,6 @@
     "swap_random_operations",
     "ObjectiveFunction",
     "get_makespan_with_penalties_objective",
+    "compute_penalty_for_deadlines",
+    "compute_penalty_for_due_dates",
 ]
diff --git a/job_shop_lib/metaheuristics/_objective_functions.py b/job_shop_lib/metaheuristics/_objective_functions.py
@@ -45,29 +45,70 @@ def get_makespan_with_penalties_objective(
 
     def objective(schedule: Schedule) -> float:
         makespan = schedule.makespan()
-        instance = schedule.instance
-
-        # Fast path: no constraint attributes present in the instance
-        if not instance.has_deadlines and not instance.has_due_dates:
-            return makespan
-
-        penalty = 0.0
-        for machine_schedule in schedule.schedule:
-            for scheduled_op in machine_schedule:
-                op = scheduled_op.operation
-                # Deadline (hard) penalty
-                if (
-                    op.deadline is not None
-                    and scheduled_op.end_time > op.deadline
-                ):
-                    penalty += deadline_penalty_factor
-                # Due date (soft) penalty
-                if (
-                    op.due_date is not None
-                    and scheduled_op.end_time > op.due_date
-                ):
-                    penalty += due_date_penalty_factor
+        penalty_for_deadlines = compute_penalty_for_deadlines(
+            schedule, deadline_penalty_factor
+        )
+        penalty_for_due_dates = compute_penalty_for_due_dates(
+            schedule, due_date_penalty_factor
+        )
+        penalty = penalty_for_deadlines + penalty_for_due_dates
 
         return makespan + penalty
 
     return objective
+
+
+def compute_penalty_for_deadlines(
+    schedule: Schedule, penalty_per_violation: float
+) -> float:
+    """Compute the total penalty for deadline violations in a schedule.
+
+    Args:
+        schedule:
+            The schedule to evaluate.
+        penalty_per_violation:
+            The penalty to apply for each operation that
+            finishes after its deadline.
+
+    Returns:
+        The total penalty for deadline violations.
+    """
+    if not schedule.instance.has_deadlines or penalty_per_violation == 0:
+        return 0.0
+
+    penalty = 0.0
+    for machine_schedule in schedule.schedule:
+        for scheduled_op in machine_schedule:
+            op = scheduled_op.operation
+            if op.deadline is not None and scheduled_op.end_time > op.deadline:
+                penalty += penalty_per_violation
+
+    return penalty
+
+
+def compute_penalty_for_due_dates(
+    schedule: Schedule, penalty_per_violation: float
+) -> float:
+    """Compute the total penalty for due date violations in a schedule.
+
+    Args:
+        schedule:
+            The schedule to evaluate.
+        penalty_per_violation:
+            The penalty to apply for each operation that
+            finishes after its due date.
+
+    Returns:
+        The total penalty for due date violations.
+    """
+    if not schedule.instance.has_due_dates or penalty_per_violation == 0:
+        return 0.0
+
+    penalty = 0.0
+    for machine_schedule in schedule.schedule:
+        for scheduled_op in machine_schedule:
+            op = scheduled_op.operation
+            if op.due_date is not None and scheduled_op.end_time > op.due_date:
+                penalty += penalty_per_violation
+
+    return penalty
diff --git a/job_shop_lib/reinforcement_learning/__init__.py b/job_shop_lib/reinforcement_learning/__init__.py
@@ -14,11 +14,14 @@
     RewardObserver
     MakespanReward
     IdleTimeReward
+    RewardWithPenalties
     RenderConfig
     add_padding
     create_edge_type_dict
     map_values
     get_optimal_actions
+    get_deadline_violation_penalty
+    get_due_date_violation_penalty
 
 """
 
@@ -32,13 +35,16 @@
     RewardObserver,
     MakespanReward,
     IdleTimeReward,
+    RewardWithPenalties,
 )
 
 from job_shop_lib.reinforcement_learning._utils import (
     add_padding,
     create_edge_type_dict,
     map_values,
     get_optimal_actions,
+    get_deadline_violation_penalty,
+    get_due_date_violation_penalty,
 )
 
 from job_shop_lib.reinforcement_learning._single_job_shop_graph_env import (
@@ -63,9 +69,12 @@
     "RewardObserver",
     "MakespanReward",
     "IdleTimeReward",
+    "RewardWithPenalties",
     "RenderConfig",
     "add_padding",
     "create_edge_type_dict",
     "map_values",
     "get_optimal_actions",
+    "get_deadline_violation_penalty",
+    "get_due_date_violation_penalty",
 ]
diff --git a/job_shop_lib/reinforcement_learning/_reward_observers.py b/job_shop_lib/reinforcement_learning/_reward_observers.py
@@ -1,6 +1,9 @@
 """Rewards functions are defined as `DispatcherObervers` and are used to
 calculate the reward for a given state."""
 
+from collections.abc import Callable
+
+from job_shop_lib.exceptions import ValidationError
 from job_shop_lib.dispatching import DispatcherObserver, Dispatcher
 from job_shop_lib import ScheduledOperation
 
@@ -83,3 +86,76 @@ def update(self, scheduled_operation: ScheduledOperation):
 
         reward = -idle_time
         self.rewards.append(reward)
+
+
+class RewardWithPenalties(RewardObserver):
+    """Reward function that adds penalties to another reward function.
+
+    The reward is calculated as the sum of the reward from another reward
+    function and a penalty for each constraint violation (due dates and
+    deadlines).
+
+    Attributes:
+        base_reward_observer:
+            The base reward observer to use for calculating the reward.
+        penalty_function:
+            A function that takes a scheduled operation and the dispatcher as
+            input and returns the penalty for that operation.
+
+    Args:
+        dispatcher:
+            The dispatcher to observe.
+        base_reward_observer:
+            The base reward observer to use for calculating the reward. It
+            must use the same dispatcher as this reward observer. If it is
+            subscribed to the dispatcher, it will be unsubscribed.
+        penalty_function:
+            A function that takes a scheduled operation and the
+            dispatcher as input and returns the penalty for that operation.
+        subscribe:
+            Whether to subscribe to the dispatcher upon initialization.
+
+    Raises:
+        ValidationError:
+            If the base reward observer does not use the same dispatcher as
+            this reward observer.
+
+    .. versionadded:: 1.7.0
+
+    .. seealso::
+        The following functions (along with ``functools.partial``) can be
+        used to create penalty functions:
+
+        - :func:`~job_shop_lib.reinforcement_learning.get_deadline_violation_penalty`
+        - :func:`~job_shop_lib.reinforcement_learning.get_due_date_violation_penalty`
+
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        dispatcher: Dispatcher,
+        *,
+        base_reward_observer: RewardObserver,
+        penalty_function: Callable[[ScheduledOperation, Dispatcher], float],
+        subscribe: bool = True,
+    ) -> None:
+        super().__init__(dispatcher, subscribe=subscribe)
+        self.base_reward_observer = base_reward_observer
+        self.penalty_function = penalty_function
+        if base_reward_observer.dispatcher is not dispatcher:
+            raise ValidationError(
+                "The base reward observer must use the same "
+                "dispatcher as this reward observer."
+            )
+        if base_reward_observer in dispatcher.subscribers:
+            dispatcher.unsubscribe(base_reward_observer)
+
+    def reset(self) -> None:
+        super().reset()
+        self.base_reward_observer.reset()
+
+    def update(self, scheduled_operation: ScheduledOperation):
+        self.base_reward_observer.update(scheduled_operation)
+        base_reward = self.base_reward_observer.last_reward
+        penalty = self.penalty_function(scheduled_operation, self.dispatcher)
+        self.rewards.append(base_reward - penalty)
diff --git a/job_shop_lib/reinforcement_learning/_utils.py b/job_shop_lib/reinforcement_learning/_utils.py
@@ -5,8 +5,9 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from job_shop_lib import ScheduledOperation
 from job_shop_lib.exceptions import ValidationError
-from job_shop_lib.dispatching import OptimalOperationsObserver
+from job_shop_lib.dispatching import OptimalOperationsObserver, Dispatcher
 
 T = TypeVar("T", bound=np.number)
 
@@ -193,7 +194,65 @@ def get_optimal_actions(
     return optimal_actions
 
 
-if __name__ == "__main__":
-    import doctest
+def get_deadline_violation_penalty(
+    scheduled_operation: ScheduledOperation,
+    unused_dispatcher: Dispatcher,
+    deadline_penalty_factor: float = 10_000,
+) -> float:
+    """Compute the penalty for a scheduled operation that violates its
+    deadline.
 
-    doctest.testmod()
+    Args:
+        scheduled_operation:
+            The scheduled operation to evaluate.
+        unused_dispatcher:
+            This argument is unused but included for compatibility with the
+            penalty function signature.
+        deadline_penalty_factor:
+            Cost added for each operation that
+            finishes after its deadline. Defaults to 10_000.
+    Returns:
+        The penalty for the scheduled operation if it violates its deadline,
+        otherwise 0.
+
+    .. versionadded:: 1.7.0
+    """
+    if (
+        scheduled_operation.operation.deadline is not None
+        and scheduled_operation.end_time
+        > scheduled_operation.operation.deadline
+    ):
+        return deadline_penalty_factor
+    return 0.0
+
+
+def get_due_date_violation_penalty(
+    scheduled_operation: ScheduledOperation,
+    unused_dispatcher: Dispatcher,
+    due_date_penalty_factor: float = 100,
+) -> float:
+    """Compute the penalty for a scheduled operation that violates its
+    due date.
+
+    Args:
+        scheduled_operation:
+            The scheduled operation to evaluate.
+        unused_dispatcher:
+            This argument is unused but included for compatibility with the
+            penalty function signature.
+        due_date_penalty_factor:
+            Cost added for each operation that
+            finishes after its due date. Defaults to 100.
+    Returns:
+        The penalty for the scheduled operation if it violates its due date,
+        otherwise 0.
+
+    .. versionadded:: 1.7.0
+    """
+    if (
+        scheduled_operation.operation.due_date is not None
+        and scheduled_operation.end_time
+        > scheduled_operation.operation.due_date
+    ):
+        return due_date_penalty_factor
+    return 0.0
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -334,3 +334,17 @@ def ft06_instance():
 @pytest.fixture
 def seeded_rng() -> random.Random:
     return random.Random(42)
+
+
+@pytest.fixture
+def single_machine_instance() -> JobShopInstance:
+    # Two single-op jobs on same machine
+    jobs = [[Operation(0, 2)], [Operation(0, 3)]]
+    return JobShopInstance(jobs, name="SingleMachine")
+
+
+@pytest.fixture
+def two_machines_instance() -> JobShopInstance:
+    # Two jobs, each with one operation on different machines
+    jobs = [[Operation(0, 5)], [Operation(1, 3)]]
+    return JobShopInstance(jobs, name="TwoMachines")