From d26f2e61470c30a6722616439ee361bd4fcc0d72 Mon Sep 17 00:00:00 2001 From: Pabloo22 Date: Thu, 11 Sep 2025 19:22:41 +0200 Subject: [PATCH 1/4] [Feature] Add `RewardWithPenalties` class and penalty functions --- job_shop_lib/metaheuristics/__init__.py | 6 + .../metaheuristics/_objective_functions.py | 85 ++++++++++---- .../reinforcement_learning/__init__.py | 9 ++ .../_reward_observers.py | 75 +++++++++++++ job_shop_lib/reinforcement_learning/_utils.py | 67 ++++++++++- .../test_reward_functions.py | 36 ------ .../{test_rl_utils.py => test_utils.py} | 104 ++++++++++++++++++ 7 files changed, 320 insertions(+), 62 deletions(-) delete mode 100644 tests/reinforcement_learning/test_reward_functions.py rename tests/reinforcement_learning/{test_rl_utils.py => test_utils.py} (71%) diff --git a/job_shop_lib/metaheuristics/__init__.py b/job_shop_lib/metaheuristics/__init__.py index 6118abdd..e4acd00e 100644 --- a/job_shop_lib/metaheuristics/__init__.py +++ b/job_shop_lib/metaheuristics/__init__.py @@ -31,12 +31,16 @@ swap_random_operations ObjectiveFunction get_makespan_with_penalties_objective + compute_penalty_for_deadlines + compute_penalty_for_due_dates """ from job_shop_lib.metaheuristics._objective_functions import ( ObjectiveFunction, get_makespan_with_penalties_objective, + compute_penalty_for_deadlines, + compute_penalty_for_due_dates, ) from job_shop_lib.metaheuristics._neighbor_generators import ( NeighborGenerator, @@ -58,4 +62,6 @@ "swap_random_operations", "ObjectiveFunction", "get_makespan_with_penalties_objective", + "compute_penalty_for_deadlines", + "compute_penalty_for_due_dates", ] diff --git a/job_shop_lib/metaheuristics/_objective_functions.py b/job_shop_lib/metaheuristics/_objective_functions.py index 9b52ae08..88ffccb0 100644 --- a/job_shop_lib/metaheuristics/_objective_functions.py +++ b/job_shop_lib/metaheuristics/_objective_functions.py @@ -45,29 +45,70 @@ def get_makespan_with_penalties_objective( def objective(schedule: Schedule) -> float: makespan = schedule.makespan() - instance = schedule.instance - - # Fast path: no constraint attributes present in the instance - if not instance.has_deadlines and not instance.has_due_dates: - return makespan - - penalty = 0.0 - for machine_schedule in schedule.schedule: - for scheduled_op in machine_schedule: - op = scheduled_op.operation - # Deadline (hard) penalty - if ( - op.deadline is not None - and scheduled_op.end_time > op.deadline - ): - penalty += deadline_penalty_factor - # Due date (soft) penalty - if ( - op.due_date is not None - and scheduled_op.end_time > op.due_date - ): - penalty += due_date_penalty_factor + penalty_for_deadlines = compute_penalty_for_deadlines( + schedule, deadline_penalty_factor + ) + penalty_for_due_dates = compute_penalty_for_due_dates( + schedule, due_date_penalty_factor + ) + penalty = penalty_for_deadlines + penalty_for_due_dates return makespan + penalty return objective + + +def compute_penalty_for_deadlines( + schedule: Schedule, penalty_per_violation: float +) -> float: + """Compute the total penalty for deadline violations in a schedule. + + Args: + schedule: + The schedule to evaluate. + penalty_per_violation: + The penalty to apply for each operation that + finishes after its deadline. + + Returns: + The total penalty for deadline violations. + """ + if not schedule.instance.has_deadlines or penalty_per_violation == 0: + return 0.0 + + penalty = 0.0 + for machine_schedule in schedule.schedule: + for scheduled_op in machine_schedule: + op = scheduled_op.operation + if op.deadline is not None and scheduled_op.end_time > op.deadline: + penalty += penalty_per_violation + + return penalty + + +def compute_penalty_for_due_dates( + schedule: Schedule, penalty_per_violation: float +) -> float: + """Compute the total penalty for due date violations in a schedule. + + Args: + schedule: + The schedule to evaluate. + penalty_per_violation: + The penalty to apply for each operation that + finishes after its due date. + + Returns: + The total penalty for due date violations. + """ + if not schedule.instance.has_due_dates or penalty_per_violation == 0: + return 0.0 + + penalty = 0.0 + for machine_schedule in schedule.schedule: + for scheduled_op in machine_schedule: + op = scheduled_op.operation + if op.due_date is not None and scheduled_op.end_time > op.due_date: + penalty += penalty_per_violation + + return penalty diff --git a/job_shop_lib/reinforcement_learning/__init__.py b/job_shop_lib/reinforcement_learning/__init__.py index 3757b3e9..c6841d75 100644 --- a/job_shop_lib/reinforcement_learning/__init__.py +++ b/job_shop_lib/reinforcement_learning/__init__.py @@ -14,11 +14,14 @@ RewardObserver MakespanReward IdleTimeReward + RewardWithPenalties RenderConfig add_padding create_edge_type_dict map_values get_optimal_actions + get_deadline_violation_penalty + get_due_date_violation_penalty """ @@ -32,6 +35,7 @@ RewardObserver, MakespanReward, IdleTimeReward, + RewardWithPenalties, ) from job_shop_lib.reinforcement_learning._utils import ( @@ -39,6 +43,8 @@ create_edge_type_dict, map_values, get_optimal_actions, + get_deadline_violation_penalty, + get_due_date_violation_penalty, ) from job_shop_lib.reinforcement_learning._single_job_shop_graph_env import ( @@ -63,9 +69,12 @@ "RewardObserver", "MakespanReward", "IdleTimeReward", + "RewardWithPenalties", "RenderConfig", "add_padding", "create_edge_type_dict", "map_values", "get_optimal_actions", + "get_deadline_violation_penalty", + "get_due_date_violation_penalty", ] diff --git a/job_shop_lib/reinforcement_learning/_reward_observers.py b/job_shop_lib/reinforcement_learning/_reward_observers.py index b1a53cea..06c9536b 100644 --- a/job_shop_lib/reinforcement_learning/_reward_observers.py +++ b/job_shop_lib/reinforcement_learning/_reward_observers.py @@ -1,6 +1,9 @@ """Rewards functions are defined as `DispatcherObervers` and are used to calculate the reward for a given state.""" +from collections.abc import Callable + +from job_shop_lib.exceptions import ValidationError from job_shop_lib.dispatching import DispatcherObserver, Dispatcher from job_shop_lib import ScheduledOperation @@ -83,3 +86,75 @@ def update(self, scheduled_operation: ScheduledOperation): reward = -idle_time self.rewards.append(reward) + + +class RewardWithPenalties(RewardObserver): + """Reward function that adds penalties to another reward function. + + The reward is calculated as the sum of the reward from another reward + function and a penalty for each constraint violation (due dates and + deadlines). + + Attributes: + base_reward_observer: + The base reward observer to use for calculating the reward. + penalty_per_violation: + The penalty to apply for each constraint violation. + + Args: + dispatcher: + The dispatcher to observe. + base_reward_observer: + The base reward observer to use for calculating the reward. It + must use the same dispatcher as this reward observer. If it is + subscribed to the dispatcher, it will be unsubscribed. + penalty_function: + A function that takes a scheduled operation and the + dispatcher as input and returns the penalty for that operation. + subscribe: + Whether to subscribe to the dispatcher upon initialization. + + Raises: + ValidationError: + If the base reward observer does not use the same dispatcher as + this reward observer. + + .. versionadded:: 1.7.0 + + .. seealso:: + The following functions (along with ``functools.partial``) can be + used to create penalty functions: + + - :class:`job_shop_lib.metaheuristics.penalty_for_deadlines` + - :class:`job_shop_lib.metaheuristics.penalty_for_due_dates` + + """ + + def __init__( + self, + dispatcher: Dispatcher, + *, + base_reward_observer: RewardObserver, + penalty_function: Callable[[ScheduledOperation, Dispatcher], float], + subscribe: bool = True, + ) -> None: + super().__init__(dispatcher, subscribe=subscribe) + self.base_reward_observer = base_reward_observer + self.penalty_function = penalty_function + if base_reward_observer.dispatcher is not dispatcher: + raise ValidationError( + "The base reward observer must use the same " + "dispatcher as this reward observer." + ) + if base_reward_observer in dispatcher.subscribers: + dispatcher.unsubscribe(base_reward_observer) + + def reset(self) -> None: + super().reset() + self.base_reward_observer.reset() + + def update(self, scheduled_operation: ScheduledOperation): + self.base_reward_observer.update(scheduled_operation) + base_reward = self.base_reward_observer.last_reward + penalty = self.penalty_function(scheduled_operation, self.dispatcher) + self.rewards.append(base_reward - penalty) diff --git a/job_shop_lib/reinforcement_learning/_utils.py b/job_shop_lib/reinforcement_learning/_utils.py index 4b15d44c..f76336f7 100644 --- a/job_shop_lib/reinforcement_learning/_utils.py +++ b/job_shop_lib/reinforcement_learning/_utils.py @@ -5,8 +5,9 @@ import numpy as np from numpy.typing import NDArray +from job_shop_lib import ScheduledOperation from job_shop_lib.exceptions import ValidationError -from job_shop_lib.dispatching import OptimalOperationsObserver +from job_shop_lib.dispatching import OptimalOperationsObserver, Dispatcher T = TypeVar("T", bound=np.number) @@ -193,7 +194,65 @@ def get_optimal_actions( return optimal_actions -if __name__ == "__main__": - import doctest +def get_deadline_violation_penalty( + scheduled_operation: ScheduledOperation, + unused_dispatcher: Dispatcher, + deadline_penalty_factor: float = 10_000, +) -> float: + """Compute the penalty for a scheduled operation that violates its + deadline. - doctest.testmod() + Args: + scheduled_operation: + The scheduled operation to evaluate. + unused_dispatcher: + This argument is unused but included for compatibility with the + penalty function signature. + deadline_penalty_factor: + Cost added for each operation that + finishes after its deadline. Defaults to 10_000. + Returns: + The penalty for the scheduled operation if it violates its deadline, + otherwise 0. + + .. versionadded:: 1.7.0 + """ + if ( + scheduled_operation.operation.deadline is not None + and scheduled_operation.end_time + > scheduled_operation.operation.deadline + ): + return deadline_penalty_factor + return 0.0 + + +def get_due_date_violation_penalty( + scheduled_operation: ScheduledOperation, + unused_dispatcher: Dispatcher, + due_date_penalty_factor: float = 100, +) -> float: + """Compute the penalty for a scheduled operation that violates its + due date. + + Args: + scheduled_operation: + The scheduled operation to evaluate. + unused_dispatcher: + This argument is unused but included for compatibility with the + penalty function signature. + due_date_penalty_factor: + Cost added for each operation that + finishes after its due date. Defaults to 100. + Returns: + The penalty for the scheduled operation if it violates its due date, + otherwise 0. + + .. versionadded:: 1.7.0 + """ + if ( + scheduled_operation.operation.due_date is not None + and scheduled_operation.end_time + > scheduled_operation.operation.due_date + ): + return due_date_penalty_factor + return 0.0 diff --git a/tests/reinforcement_learning/test_reward_functions.py b/tests/reinforcement_learning/test_reward_functions.py deleted file mode 100644 index 6632c973..00000000 --- a/tests/reinforcement_learning/test_reward_functions.py +++ /dev/null @@ -1,36 +0,0 @@ -from job_shop_lib import JobShopInstance -from job_shop_lib.reinforcement_learning import MakespanReward, IdleTimeReward -from job_shop_lib.dispatching import ( - Dispatcher, - filter_dominated_operations, -) -from job_shop_lib.dispatching.rules import DispatchingRuleSolver - - -def test_makespan_reward(example_job_shop_instance: JobShopInstance): - dispatcher = Dispatcher(example_job_shop_instance) - reward_function = MakespanReward(dispatcher) - assert not reward_function.rewards - solver = DispatchingRuleSolver("most_work_remaining") - while not dispatcher.schedule.is_complete(): - solver.step(dispatcher) - assert sum(reward_function.rewards) == -dispatcher.schedule.makespan() - - -def test_idle_time_reward(example_job_shop_instance: JobShopInstance): - dispatcher = Dispatcher( - example_job_shop_instance, - ready_operations_filter=filter_dominated_operations, - ) - reward_function = IdleTimeReward(dispatcher) - assert not reward_function.rewards - solver = DispatchingRuleSolver("most_work_remaining") - solver.solve(example_job_shop_instance, dispatcher) - - assert sum(reward_function.rewards) == -(1 + 1 + 6) - - -if __name__ == "__main__": - import pytest - - pytest.main(["-vv", __file__]) diff --git a/tests/reinforcement_learning/test_rl_utils.py b/tests/reinforcement_learning/test_utils.py similarity index 71% rename from tests/reinforcement_learning/test_rl_utils.py rename to tests/reinforcement_learning/test_utils.py index fa356835..170ea764 100644 --- a/tests/reinforcement_learning/test_rl_utils.py +++ b/tests/reinforcement_learning/test_utils.py @@ -3,10 +3,14 @@ from numpy.typing import NDArray from job_shop_lib.exceptions import ValidationError +from job_shop_lib import Operation, JobShopInstance, ScheduledOperation +from job_shop_lib.dispatching import Dispatcher from job_shop_lib.reinforcement_learning import ( add_padding, create_edge_type_dict, map_values, + get_deadline_violation_penalty, + get_due_date_violation_penalty, ) @@ -297,5 +301,105 @@ def test_invalid_global_id(): map_values(edge_index, mapping) +def _make_scheduled_operation( + *, + duration: int, + start_time: int, + machine: int = 0, + deadline=None, + due_date=None, +): + """Helper to build a minimal scheduled operation and dispatcher.""" + jobs = [ + [ + Operation( + machine, + duration=duration, + deadline=deadline, + due_date=due_date, + ) + ] + ] + instance = JobShopInstance(jobs, name="PenaltyTestInstance") + dispatcher = Dispatcher(instance) + op = instance.jobs[0][0] + scheduled_op = ScheduledOperation( + op, start_time=start_time, machine_id=machine + ) + return scheduled_op, dispatcher + + +# ---------------- Deadline penalty tests ---------------- # + + +def test_deadline_penalty_violation(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=10, start_time=0, deadline=5 + ) # end_time = 10 > 5 + assert get_deadline_violation_penalty(scheduled_op, dispatcher) == 10_000 + + +def test_deadline_penalty_no_violation_equal_boundary(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=5, start_time=0, deadline=5 + ) # end_time = 5 == 5 + assert get_deadline_violation_penalty(scheduled_op, dispatcher) == 0.0 + + +def test_deadline_penalty_none_deadline(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=4, start_time=0, deadline=None + ) + assert get_deadline_violation_penalty(scheduled_op, dispatcher) == 0.0 + + +def test_deadline_penalty_custom_factor(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=3, start_time=0, deadline=2 + ) # end_time = 3 > 2 + assert ( + get_deadline_violation_penalty( + scheduled_op, dispatcher, deadline_penalty_factor=123.45 + ) + == 123.45 + ) + + +# ---------------- Due date penalty tests ---------------- # + + +def test_due_date_penalty_violation(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=7, start_time=0, due_date=6 + ) # end_time = 7 > 6 + assert get_due_date_violation_penalty(scheduled_op, dispatcher) == 100 + + +def test_due_date_penalty_no_violation_equal_boundary(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=5, start_time=0, due_date=5 + ) # end_time = 5 == 5 + assert get_due_date_violation_penalty(scheduled_op, dispatcher) == 0.0 + + +def test_due_date_penalty_none_due_date(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=4, start_time=0, due_date=None + ) + assert get_due_date_violation_penalty(scheduled_op, dispatcher) == 0.0 + + +def test_due_date_penalty_custom_factor(): + scheduled_op, dispatcher = _make_scheduled_operation( + duration=9, start_time=0, due_date=1 + ) # end_time = 9 > 1 + assert ( + get_due_date_violation_penalty( + scheduled_op, dispatcher, due_date_penalty_factor=7.5 + ) + == 7.5 + ) + + if __name__ == "__main__": pytest.main(["-vv", __file__]) From 6ecc2c7cfd7d9f5dfb879e7f3ce0888f701ede80 Mon Sep 17 00:00:00 2001 From: Pabloo22 Date: Fri, 12 Sep 2025 18:27:45 +0200 Subject: [PATCH 2/4] [Tests] Add tests for RL utils and reward observers --- .../_reward_observers.py | 6 +- tests/conftest.py | 14 ++ .../test_reward_observers.py | 191 ++++++++++++++++++ tests/reinforcement_learning/test_utils.py | 79 ++++++++ 4 files changed, 287 insertions(+), 3 deletions(-) create mode 100644 tests/reinforcement_learning/test_reward_observers.py diff --git a/job_shop_lib/reinforcement_learning/_reward_observers.py b/job_shop_lib/reinforcement_learning/_reward_observers.py index 06c9536b..32818ba3 100644 --- a/job_shop_lib/reinforcement_learning/_reward_observers.py +++ b/job_shop_lib/reinforcement_learning/_reward_observers.py @@ -125,10 +125,10 @@ class RewardWithPenalties(RewardObserver): The following functions (along with ``functools.partial``) can be used to create penalty functions: - - :class:`job_shop_lib.metaheuristics.penalty_for_deadlines` - - :class:`job_shop_lib.metaheuristics.penalty_for_due_dates` + - :class:`~job_shop_lib.reinforcement_learning.get_deadline_violation_penalty` + - :class:`~job_shop_lib.reinforcement_learning.get_due_date_violation_penalty` - """ + """ # noqa: E501 def __init__( self, diff --git a/tests/conftest.py b/tests/conftest.py index 62aa2109..c65c1848 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -334,3 +334,17 @@ def ft06_instance(): @pytest.fixture def seeded_rng() -> random.Random: return random.Random(42) + + +@pytest.fixture +def single_machine_instance() -> JobShopInstance: + # Two single-op jobs on same machine + jobs = [[Operation(0, 2)], [Operation(0, 3)]] + return JobShopInstance(jobs, name="SingleMachine") + + +@pytest.fixture +def two_machines_instance() -> JobShopInstance: + # Two jobs, each with one operation on different machines + jobs = [[Operation(0, 5)], [Operation(1, 3)]] + return JobShopInstance(jobs, name="TwoMachines") diff --git a/tests/reinforcement_learning/test_reward_observers.py b/tests/reinforcement_learning/test_reward_observers.py new file mode 100644 index 00000000..adbb3377 --- /dev/null +++ b/tests/reinforcement_learning/test_reward_observers.py @@ -0,0 +1,191 @@ +# pylint: disable=missing-function-docstring, redefined-outer-name +import functools +import pytest + +from job_shop_lib import JobShopInstance, Operation +from job_shop_lib.dispatching import Dispatcher +from job_shop_lib.exceptions import ValidationError +from job_shop_lib.reinforcement_learning import ( + MakespanReward, + IdleTimeReward, + RewardWithPenalties, + get_due_date_violation_penalty, + get_deadline_violation_penalty, +) + + +def test_makespan_reward_basic(single_machine_instance: JobShopInstance): + dispatcher = Dispatcher(single_machine_instance) + reward_obs = MakespanReward(dispatcher) + + # Schedule first job on machine 0 + op0 = single_machine_instance.jobs[0][0] + dispatcher.dispatch(op0, 0) + assert reward_obs.rewards[-1] == -2 + + # Schedule second job on same machine + op1 = single_machine_instance.jobs[1][0] + dispatcher.dispatch(op1, 0) + # makespan increases from 2 to 5 + assert reward_obs.rewards[-1] == -3 + + # Sum of rewards equals -final_makespan + assert sum(reward_obs.rewards) == -dispatcher.schedule.makespan() == -5 + + +def test_makespan_reward_zero_when_no_increase( + two_machines_instance: JobShopInstance, +): + dispatcher = Dispatcher(two_machines_instance) + reward_obs = MakespanReward(dispatcher) + + # Schedule the longer op first -> makespan = 5 + op_long = two_machines_instance.jobs[0][0] + dispatcher.dispatch(op_long, 0) + assert reward_obs.rewards[-1] == -5 + + # Now schedule the shorter op on another machine -> ends + # at 3 < current makespan + op_short = two_machines_instance.jobs[1][0] + dispatcher.dispatch(op_short, 1) + # No makespan increase -> zero reward + assert reward_obs.rewards[-1] == 0 + + +def test_idle_time_reward_computation(): + # Construct instance that creates idle time on machine 0 + # Job1: M0(1) then M1(1) + # Job0: M1(5) then M0(1) -> causes M0 idle from t=1 to t=5 + jobs = [ + [Operation(1, 5), Operation(0, 1)], # job 0 + [Operation(0, 1), Operation(1, 1)], # job 1 + ] + instance = JobShopInstance(jobs, name="IdleTimeExample") + dispatcher = Dispatcher(instance) + idle_obs = IdleTimeReward(dispatcher) + + # 1) j1[0] on M0 at t=0..1 + dispatcher.dispatch(instance.jobs[1][0], 0) + assert idle_obs.rewards[-1] == 0 # first op on machine -> start_time 0 + + # 2) j0[0] on M1 at t=0..5 + dispatcher.dispatch(instance.jobs[0][0], 1) + assert idle_obs.rewards[-1] == 0 # first op on machine -> start_time 0 + + # 3) j1[1] on M1 at t=5..6 (no idle on M1) + dispatcher.dispatch(instance.jobs[1][1], 1) + assert idle_obs.rewards[-1] == 0 + + # 4) j0[1] on M0 at t=5..6 (idle on M0 from 1 to 5 -> reward = -4) + dispatcher.dispatch(instance.jobs[0][1], 0) + assert idle_obs.rewards[-1] == -4 + + +def test_reward_with_penalties_due_date(): + # Build small instance where second op violates due date + jobs = [ + [Operation(0, 1)], + [ + Operation(0, 10, due_date=5) + ], # will start at 1 and end at 11 -> late + ] + instance = JobShopInstance(jobs, name="DueDatePenalty") + dispatcher = Dispatcher(instance) + + base = MakespanReward(dispatcher) + penalty_fn = functools.partial( + get_due_date_violation_penalty, due_date_penalty_factor=7 + ) + reward = RewardWithPenalties( + dispatcher, + base_reward_observer=base, + penalty_function=penalty_fn, + ) + + # First op (no penalty) + dispatcher.dispatch(instance.jobs[0][0], 0) + assert base.rewards[-1] == -1 + assert reward.rewards[-1] == -1 + + # Second op violates due date -> penalty 7 + dispatcher.dispatch(instance.jobs[1][0], 0) + assert base.rewards[-1] == -10 + assert reward.rewards[-1] == -10 - 7 + + +def test_reward_with_penalties_deadline(): + jobs = [ + [Operation(0, 1)], + [Operation(0, 10, deadline=5)], # ends at 11 -> deadline violation + ] + instance = JobShopInstance(jobs, name="DeadlinePenalty") + dispatcher = Dispatcher(instance) + + base = MakespanReward(dispatcher) + penalty_fn = functools.partial( + get_deadline_violation_penalty, deadline_penalty_factor=13 + ) + reward = RewardWithPenalties( + dispatcher, + base_reward_observer=base, + penalty_function=penalty_fn, + ) + + dispatcher.dispatch(instance.jobs[0][0], 0) + dispatcher.dispatch(instance.jobs[1][0], 0) + assert reward.rewards[-1] == -10 - 13 + + +def test_reward_with_penalties_requires_same_dispatcher(): + instance = JobShopInstance([[Operation(0, 1)]]) + d1 = Dispatcher(instance) + d2 = Dispatcher(instance) + base = MakespanReward(d1) + + with pytest.raises(ValidationError): + RewardWithPenalties( + d2, base_reward_observer=base, penalty_function=lambda op, d: 0.0 + ) + + +def test_reward_with_penalties_unsubscribes_base(): + instance = JobShopInstance([[Operation(0, 1)], [Operation(0, 1)]]) + dispatcher = Dispatcher(instance) + + base = MakespanReward(dispatcher) + assert base in dispatcher.subscribers + + reward = RewardWithPenalties( + dispatcher, + base_reward_observer=base, + penalty_function=lambda op, d: 0.0, + ) + # Base should be unsubscribed; wrapper is subscribed + assert base not in dispatcher.subscribers + assert reward in dispatcher.subscribers + + # test reset + reward.reset() + assert not reward.rewards + assert not base.rewards + + +def test_reward_observers_reset(): + instance = JobShopInstance([[Operation(0, 1)], [Operation(0, 1)]]) + dispatcher = Dispatcher(instance) + + m_reward = MakespanReward(dispatcher) + i_reward = IdleTimeReward(dispatcher) + + dispatcher.dispatch(instance.jobs[0][0], 0) + dispatcher.dispatch(instance.jobs[1][0], 0) + + # Ensure rewards collected + assert m_reward.rewards + assert i_reward.rewards + + # Reset and ensure cleared and internal state matches + m_reward.reset() + i_reward.reset() + assert not m_reward.rewards + assert not i_reward.rewards diff --git a/tests/reinforcement_learning/test_utils.py b/tests/reinforcement_learning/test_utils.py index 170ea764..c7516d0c 100644 --- a/tests/reinforcement_learning/test_utils.py +++ b/tests/reinforcement_learning/test_utils.py @@ -9,9 +9,12 @@ add_padding, create_edge_type_dict, map_values, + get_optimal_actions, get_deadline_violation_penalty, get_due_date_violation_penalty, ) +from job_shop_lib.dispatching import OptimalOperationsObserver +from job_shop_lib.dispatching.rules import DispatchingRuleSolver def test_add_padding_int_array(): @@ -401,5 +404,81 @@ def test_due_date_penalty_custom_factor(): ) +# ---------------- get_optimal_actions tests ---------------- # + + +def test_get_optimal_actions_initial_and_after_step( + example_job_shop_instance: JobShopInstance, +): + # Build a reference schedule using a simple heuristic solver + solver = DispatchingRuleSolver() + reference_schedule = solver.solve(example_job_shop_instance) + + # Fresh dispatcher and observer on same instance + dispatcher = Dispatcher(example_job_shop_instance) + optimal_obs = OptimalOperationsObserver(dispatcher, reference_schedule) + + # Build available actions tuples (operation_id, machine_id, job_id) + available_ops = dispatcher.available_operations() + actions = [ + (op.operation_id, op.machine_id, op.job_id) for op in available_ops + ] + + # Compute mapping and expected optimal ids + mapping = get_optimal_actions(optimal_obs, actions) + expected_ones = { + (op.operation_id, op.machine_id, op.job_id) + for op in optimal_obs.optimal_available + } + + # Check 1 for optimal, 0 otherwise + for a in actions: + assert mapping[a] == int(a in expected_ones) + + # Dispatch one optimal operation and validate mapping updates + op_to_dispatch = next(iter(optimal_obs.optimal_available)) + dispatcher.dispatch(op_to_dispatch) + + available_ops = dispatcher.available_operations() + actions = [ + (op.operation_id, op.machine_id, op.job_id) for op in available_ops + ] + mapping = get_optimal_actions(optimal_obs, actions) + expected_ones = { + (op.operation_id, op.machine_id, op.job_id) + for op in optimal_obs.optimal_available + } + for a in actions: + assert mapping[a] == int(a in expected_ones) + + +def test_get_optimal_actions_marks_non_optimal_zero( + example_job_shop_instance: JobShopInstance, +): + solver = DispatchingRuleSolver() + reference_schedule = solver.solve(example_job_shop_instance) + dispatcher = Dispatcher(example_job_shop_instance) + optimal_obs = OptimalOperationsObserver(dispatcher, reference_schedule) + + # Valid available actions + available_ops = dispatcher.available_operations() + actions = [ + (op.operation_id, op.machine_id, op.job_id) for op in available_ops + ] + + # Add an artificial non-optimal action tuple (invalid machine id) + if actions: + fake_action = (actions[0][0], actions[0][1] + 99, actions[0][2]) + actions_with_fake = actions + [fake_action] + else: + actions_with_fake = [] + + mapping = get_optimal_actions(optimal_obs, actions_with_fake) + + # Fake action should be marked as non-optimal (0) + if actions_with_fake: + assert mapping[fake_action] == 0 + + if __name__ == "__main__": pytest.main(["-vv", __file__]) From 99aa60c8493bb304ad1ef1b552fdd3adbc9bec01 Mon Sep 17 00:00:00 2001 From: Pabloo22 Date: Sat, 13 Sep 2025 21:12:22 +0200 Subject: [PATCH 3/4] [Tests] Add comprehensive tests for objective functions and penalties --- .../test_objective_functions.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 tests/metaheuristics/test_objective_functions.py diff --git a/tests/metaheuristics/test_objective_functions.py b/tests/metaheuristics/test_objective_functions.py new file mode 100644 index 00000000..852204ec --- /dev/null +++ b/tests/metaheuristics/test_objective_functions.py @@ -0,0 +1,108 @@ +# pylint: disable=missing-function-docstring, redefined-outer-name +import pytest + +from job_shop_lib import ( + JobShopInstance, + Operation, + Schedule, + ScheduledOperation, +) +from job_shop_lib.metaheuristics import ( + get_makespan_with_penalties_objective, + compute_penalty_for_deadlines, + compute_penalty_for_due_dates, +) + + +@pytest.fixture +def schedule_no_penalties() -> Schedule: + # Two machines; set due_date/deadline None + jobs = [[Operation(0, 2)], [Operation(1, 3)]] + instance = JobShopInstance(jobs, name="NoPenalties") + # Build schedule manually: M0: job0@t0..2; M1: job1@t0..3 + s0 = ScheduledOperation(instance.jobs[0][0], start_time=0, machine_id=0) + s1 = ScheduledOperation(instance.jobs[1][0], start_time=0, machine_id=1) + schedule = Schedule(instance, [[s0], [s1]]) + return schedule + + +@pytest.fixture +def schedule_with_deadlines() -> Schedule: + # Single machine sequence; second op violates deadline + jobs = [ + [Operation(0, 2, deadline=1)], # ends at 2 -> violation + [Operation(0, 3, deadline=5)], # ends at 5 -> boundary, no violation + ] + instance = JobShopInstance(jobs, name="Deadlines") + s0 = ScheduledOperation(instance.jobs[0][0], start_time=0, machine_id=0) + s1 = ScheduledOperation(instance.jobs[1][0], start_time=2, machine_id=0) + schedule = Schedule(instance, [[s0, s1]]) + return schedule + + +@pytest.fixture +def schedule_with_due_dates() -> Schedule: + # Single machine sequence; first op OK, second violates due date + jobs = [ + [Operation(0, 1, due_date=1)], # ends at 1 -> equal, OK + [Operation(0, 4, due_date=3)], # ends at 5 -> violation + ] + instance = JobShopInstance(jobs, name="DueDates") + s0 = ScheduledOperation(instance.jobs[0][0], start_time=0, machine_id=0) + s1 = ScheduledOperation(instance.jobs[1][0], start_time=1, machine_id=0) + schedule = Schedule(instance, [[s0, s1]]) + return schedule + + +@pytest.fixture +def schedule_with_both() -> Schedule: + # Mixed: first violates deadline, second violates due date + jobs = [ + [Operation(0, 3, deadline=2, due_date=10)], # deadline violation + [Operation(0, 4, deadline=10, due_date=6)], # due date violation + ] + instance = JobShopInstance(jobs, name="Both") + s0 = ScheduledOperation(instance.jobs[0][0], start_time=0, machine_id=0) + s1 = ScheduledOperation(instance.jobs[1][0], start_time=3, machine_id=0) + schedule = Schedule(instance, [[s0, s1]]) + return schedule + + +def test_compute_penalty_for_deadlines_none(schedule_no_penalties: Schedule): + assert compute_penalty_for_deadlines(schedule_no_penalties, 1000) == 0.0 + + +def test_compute_penalty_for_due_dates_none(schedule_no_penalties: Schedule): + assert compute_penalty_for_due_dates(schedule_no_penalties, 100) == 0.0 + + +def test_compute_penalty_for_deadlines(schedule_with_deadlines: Schedule): + # Only first op violates -> 1 penalty + assert compute_penalty_for_deadlines(schedule_with_deadlines, 7.5) == 7.5 + + +def test_compute_penalty_for_due_dates(schedule_with_due_dates: Schedule): + # Only second op violates -> 1 penalty + assert compute_penalty_for_due_dates(schedule_with_due_dates, 3.0) == 3.0 + + +def test_objective_makespan_only_when_zero_factors( + schedule_with_both: Schedule, +): + objective = get_makespan_with_penalties_objective( + deadline_penalty_factor=0, due_date_penalty_factor=0 + ) + assert objective(schedule_with_both) == schedule_with_both.makespan() + + +def test_objective_with_penalties(schedule_with_both: Schedule): + # s0: 0..3 (violates deadline=2) -> +d_factor + # s1: 3..7 (violates due_date=6) -> +dd_factor + d_factor = 123.0 + dd_factor = 4.0 + objective = get_makespan_with_penalties_objective( + deadline_penalty_factor=d_factor, + due_date_penalty_factor=dd_factor, + ) + expected = schedule_with_both.makespan() + d_factor + dd_factor + assert objective(schedule_with_both) == expected From 976765be66e3d9bf6afbeba1239e5607a2a19499 Mon Sep 17 00:00:00 2001 From: Pabloo22 Date: Sat, 13 Sep 2025 21:12:40 +0200 Subject: [PATCH 4/4] [Docs] Fix `RewardWithPenalties` class documentation --- job_shop_lib/reinforcement_learning/_reward_observers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/job_shop_lib/reinforcement_learning/_reward_observers.py b/job_shop_lib/reinforcement_learning/_reward_observers.py index 32818ba3..d25786a0 100644 --- a/job_shop_lib/reinforcement_learning/_reward_observers.py +++ b/job_shop_lib/reinforcement_learning/_reward_observers.py @@ -98,8 +98,9 @@ class RewardWithPenalties(RewardObserver): Attributes: base_reward_observer: The base reward observer to use for calculating the reward. - penalty_per_violation: - The penalty to apply for each constraint violation. + penalty_function: + A function that takes a scheduled operation and the dispatcher as + input and returns the penalty for that operation. Args: dispatcher: @@ -125,8 +126,8 @@ class RewardWithPenalties(RewardObserver): The following functions (along with ``functools.partial``) can be used to create penalty functions: - - :class:`~job_shop_lib.reinforcement_learning.get_deadline_violation_penalty` - - :class:`~job_shop_lib.reinforcement_learning.get_due_date_violation_penalty` + - :func:`~job_shop_lib.reinforcement_learning.get_deadline_violation_penalty` + - :func:`~job_shop_lib.reinforcement_learning.get_due_date_violation_penalty` """ # noqa: E501