Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions job_shop_lib/metaheuristics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@
swap_random_operations
ObjectiveFunction
get_makespan_with_penalties_objective
compute_penalty_for_deadlines
compute_penalty_for_due_dates

"""

from job_shop_lib.metaheuristics._objective_functions import (
ObjectiveFunction,
get_makespan_with_penalties_objective,
compute_penalty_for_deadlines,
compute_penalty_for_due_dates,
)
from job_shop_lib.metaheuristics._neighbor_generators import (
NeighborGenerator,
Expand All @@ -58,4 +62,6 @@
"swap_random_operations",
"ObjectiveFunction",
"get_makespan_with_penalties_objective",
"compute_penalty_for_deadlines",
"compute_penalty_for_due_dates",
]
85 changes: 63 additions & 22 deletions job_shop_lib/metaheuristics/_objective_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,70 @@ def get_makespan_with_penalties_objective(

def objective(schedule: Schedule) -> float:
makespan = schedule.makespan()
instance = schedule.instance

# Fast path: no constraint attributes present in the instance
if not instance.has_deadlines and not instance.has_due_dates:
return makespan

penalty = 0.0
for machine_schedule in schedule.schedule:
for scheduled_op in machine_schedule:
op = scheduled_op.operation
# Deadline (hard) penalty
if (
op.deadline is not None
and scheduled_op.end_time > op.deadline
):
penalty += deadline_penalty_factor
# Due date (soft) penalty
if (
op.due_date is not None
and scheduled_op.end_time > op.due_date
):
penalty += due_date_penalty_factor
penalty_for_deadlines = compute_penalty_for_deadlines(
schedule, deadline_penalty_factor
)
penalty_for_due_dates = compute_penalty_for_due_dates(
schedule, due_date_penalty_factor
)
penalty = penalty_for_deadlines + penalty_for_due_dates

return makespan + penalty

return objective


def compute_penalty_for_deadlines(
schedule: Schedule, penalty_per_violation: float
) -> float:
"""Compute the total penalty for deadline violations in a schedule.

Args:
schedule:
The schedule to evaluate.
penalty_per_violation:
The penalty to apply for each operation that
finishes after its deadline.

Returns:
The total penalty for deadline violations.
"""
if not schedule.instance.has_deadlines or penalty_per_violation == 0:
return 0.0

penalty = 0.0
for machine_schedule in schedule.schedule:
for scheduled_op in machine_schedule:
op = scheduled_op.operation
if op.deadline is not None and scheduled_op.end_time > op.deadline:
penalty += penalty_per_violation

return penalty


def compute_penalty_for_due_dates(
schedule: Schedule, penalty_per_violation: float
) -> float:
"""Compute the total penalty for due date violations in a schedule.

Args:
schedule:
The schedule to evaluate.
penalty_per_violation:
The penalty to apply for each operation that
finishes after its due date.

Returns:
The total penalty for due date violations.
"""
if not schedule.instance.has_due_dates or penalty_per_violation == 0:
return 0.0

penalty = 0.0
for machine_schedule in schedule.schedule:
for scheduled_op in machine_schedule:
op = scheduled_op.operation
if op.due_date is not None and scheduled_op.end_time > op.due_date:
penalty += penalty_per_violation

return penalty
9 changes: 9 additions & 0 deletions job_shop_lib/reinforcement_learning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@
RewardObserver
MakespanReward
IdleTimeReward
RewardWithPenalties
RenderConfig
add_padding
create_edge_type_dict
map_values
get_optimal_actions
get_deadline_violation_penalty
get_due_date_violation_penalty

"""

Expand All @@ -32,13 +35,16 @@
RewardObserver,
MakespanReward,
IdleTimeReward,
RewardWithPenalties,
)

from job_shop_lib.reinforcement_learning._utils import (
add_padding,
create_edge_type_dict,
map_values,
get_optimal_actions,
get_deadline_violation_penalty,
get_due_date_violation_penalty,
)

from job_shop_lib.reinforcement_learning._single_job_shop_graph_env import (
Expand All @@ -63,9 +69,12 @@
"RewardObserver",
"MakespanReward",
"IdleTimeReward",
"RewardWithPenalties",
"RenderConfig",
"add_padding",
"create_edge_type_dict",
"map_values",
"get_optimal_actions",
"get_deadline_violation_penalty",
"get_due_date_violation_penalty",
]
76 changes: 76 additions & 0 deletions job_shop_lib/reinforcement_learning/_reward_observers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Rewards functions are defined as `DispatcherObervers` and are used to
calculate the reward for a given state."""

from collections.abc import Callable

from job_shop_lib.exceptions import ValidationError
from job_shop_lib.dispatching import DispatcherObserver, Dispatcher
from job_shop_lib import ScheduledOperation

Expand Down Expand Up @@ -83,3 +86,76 @@ def update(self, scheduled_operation: ScheduledOperation):

reward = -idle_time
self.rewards.append(reward)


class RewardWithPenalties(RewardObserver):
"""Reward function that adds penalties to another reward function.

The reward is calculated as the sum of the reward from another reward
function and a penalty for each constraint violation (due dates and
deadlines).

Attributes:
base_reward_observer:
The base reward observer to use for calculating the reward.
penalty_function:
A function that takes a scheduled operation and the dispatcher as
input and returns the penalty for that operation.

Args:
dispatcher:
The dispatcher to observe.
base_reward_observer:
The base reward observer to use for calculating the reward. It
must use the same dispatcher as this reward observer. If it is
subscribed to the dispatcher, it will be unsubscribed.
penalty_function:
A function that takes a scheduled operation and the
dispatcher as input and returns the penalty for that operation.
subscribe:
Whether to subscribe to the dispatcher upon initialization.

Raises:
ValidationError:
If the base reward observer does not use the same dispatcher as
this reward observer.

.. versionadded:: 1.7.0

.. seealso::
The following functions (along with ``functools.partial``) can be
used to create penalty functions:

- :func:`~job_shop_lib.reinforcement_learning.get_deadline_violation_penalty`
- :func:`~job_shop_lib.reinforcement_learning.get_due_date_violation_penalty`

""" # noqa: E501

def __init__(
self,
dispatcher: Dispatcher,
*,
base_reward_observer: RewardObserver,
penalty_function: Callable[[ScheduledOperation, Dispatcher], float],
subscribe: bool = True,
) -> None:
super().__init__(dispatcher, subscribe=subscribe)
self.base_reward_observer = base_reward_observer
self.penalty_function = penalty_function
if base_reward_observer.dispatcher is not dispatcher:
raise ValidationError(
"The base reward observer must use the same "
"dispatcher as this reward observer."
)
if base_reward_observer in dispatcher.subscribers:
dispatcher.unsubscribe(base_reward_observer)

def reset(self) -> None:
super().reset()
self.base_reward_observer.reset()

def update(self, scheduled_operation: ScheduledOperation):
self.base_reward_observer.update(scheduled_operation)
base_reward = self.base_reward_observer.last_reward
penalty = self.penalty_function(scheduled_operation, self.dispatcher)
self.rewards.append(base_reward - penalty)
67 changes: 63 additions & 4 deletions job_shop_lib/reinforcement_learning/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import numpy as np
from numpy.typing import NDArray

from job_shop_lib import ScheduledOperation
from job_shop_lib.exceptions import ValidationError
from job_shop_lib.dispatching import OptimalOperationsObserver
from job_shop_lib.dispatching import OptimalOperationsObserver, Dispatcher

T = TypeVar("T", bound=np.number)

Expand Down Expand Up @@ -193,7 +194,65 @@ def get_optimal_actions(
return optimal_actions


if __name__ == "__main__":
import doctest
def get_deadline_violation_penalty(
scheduled_operation: ScheduledOperation,
unused_dispatcher: Dispatcher,
deadline_penalty_factor: float = 10_000,
) -> float:
"""Compute the penalty for a scheduled operation that violates its
deadline.

doctest.testmod()
Args:
scheduled_operation:
The scheduled operation to evaluate.
unused_dispatcher:
This argument is unused but included for compatibility with the
penalty function signature.
deadline_penalty_factor:
Cost added for each operation that
finishes after its deadline. Defaults to 10_000.
Returns:
The penalty for the scheduled operation if it violates its deadline,
otherwise 0.

.. versionadded:: 1.7.0
"""
if (
scheduled_operation.operation.deadline is not None
and scheduled_operation.end_time
> scheduled_operation.operation.deadline
):
return deadline_penalty_factor
return 0.0


def get_due_date_violation_penalty(
scheduled_operation: ScheduledOperation,
unused_dispatcher: Dispatcher,
due_date_penalty_factor: float = 100,
) -> float:
"""Compute the penalty for a scheduled operation that violates its
due date.

Args:
scheduled_operation:
The scheduled operation to evaluate.
unused_dispatcher:
This argument is unused but included for compatibility with the
penalty function signature.
due_date_penalty_factor:
Cost added for each operation that
finishes after its due date. Defaults to 100.
Returns:
The penalty for the scheduled operation if it violates its due date,
otherwise 0.

.. versionadded:: 1.7.0
"""
if (
scheduled_operation.operation.due_date is not None
and scheduled_operation.end_time
> scheduled_operation.operation.due_date
):
return due_date_penalty_factor
return 0.0
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,17 @@ def ft06_instance():
@pytest.fixture
def seeded_rng() -> random.Random:
return random.Random(42)


@pytest.fixture
def single_machine_instance() -> JobShopInstance:
# Two single-op jobs on same machine
jobs = [[Operation(0, 2)], [Operation(0, 3)]]
return JobShopInstance(jobs, name="SingleMachine")


@pytest.fixture
def two_machines_instance() -> JobShopInstance:
# Two jobs, each with one operation on different machines
jobs = [[Operation(0, 5)], [Operation(1, 3)]]
return JobShopInstance(jobs, name="TwoMachines")
Loading
Loading